summaryrefslogtreecommitdiff
path: root/src/jit
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit')
-rw-r--r--src/jit/CMakeLists.txt9
-rw-r--r--src/jit/DIRS.proj4
-rw-r--r--src/jit/block.cpp20
-rw-r--r--src/jit/block.h161
-rwxr-xr-xsrc/jit/codegen.h2
-rw-r--r--src/jit/codegenarm.cpp1185
-rw-r--r--src/jit/codegenarm64.cpp27
-rw-r--r--src/jit/codegencommon.cpp151
-rw-r--r--src/jit/codegenlegacy.cpp215
-rw-r--r--src/jit/codegenlinear.cpp32
-rw-r--r--src/jit/codegenlinear.h5
-rw-r--r--src/jit/codegenxarch.cpp483
-rw-r--r--src/jit/compiler.cpp83
-rw-r--r--src/jit/compiler.h322
-rw-r--r--src/jit/compiler.hpp68
-rw-r--r--src/jit/compmemkind.h2
-rw-r--r--src/jit/compphases.h6
-rw-r--r--src/jit/decomposelongs.cpp165
-rw-r--r--src/jit/decomposelongs.h2
-rw-r--r--src/jit/ee_il_dll.cpp3
-rw-r--r--src/jit/emit.cpp2
-rw-r--r--src/jit/emit.h21
-rw-r--r--src/jit/emitarm.cpp57
-rw-r--r--src/jit/emitarm64.cpp1
-rw-r--r--src/jit/emitxarch.cpp79
-rw-r--r--src/jit/emitxarch.h40
-rw-r--r--src/jit/flowgraph.cpp1797
-rw-r--r--src/jit/gcencode.cpp11
-rw-r--r--src/jit/gcinfo.cpp6
-rw-r--r--src/jit/gentree.cpp315
-rw-r--r--src/jit/gentree.h188
-rw-r--r--src/jit/gschecks.cpp3
-rw-r--r--src/jit/gtlist.h13
-rw-r--r--src/jit/importer.cpp233
-rw-r--r--src/jit/inline.def2
-rw-r--r--src/jit/instr.cpp12
-rw-r--r--src/jit/instrsxarch.h3
-rw-r--r--src/jit/jit.h12
-rw-r--r--src/jit/jit.settings.targets5
-rw-r--r--src/jit/jitconfigvalues.h25
-rw-r--r--src/jit/jiteh.cpp7
-rw-r--r--src/jit/jiteh.h4
-rw-r--r--src/jit/lclvars.cpp136
-rw-r--r--src/jit/lir.cpp10
-rw-r--r--src/jit/liveness.cpp699
-rw-r--r--src/jit/lower.cpp724
-rw-r--r--src/jit/lower.h23
-rw-r--r--src/jit/lowerarm.cpp148
-rw-r--r--src/jit/lowerarm64.cpp1690
-rw-r--r--src/jit/lowerxarch.cpp3677
-rw-r--r--src/jit/lsra.cpp34
-rw-r--r--src/jit/lsraarm.cpp1073
-rw-r--r--src/jit/lsraarm64.cpp1766
-rw-r--r--src/jit/lsraxarch.cpp3684
-rw-r--r--src/jit/morph.cpp567
-rw-r--r--src/jit/optcse.cpp20
-rw-r--r--src/jit/optimizer.cpp134
-rw-r--r--src/jit/rationalize.cpp16
-rw-r--r--src/jit/regalloc.cpp4
-rw-r--r--src/jit/registerfp.cpp6
-rw-r--r--src/jit/simd.cpp257
-rw-r--r--src/jit/simdcodegenxarch.cpp161
-rw-r--r--src/jit/simdintrinsiclist.h5
-rw-r--r--src/jit/sm.cpp5
-rw-r--r--src/jit/sm.h3
-rw-r--r--src/jit/ssabuilder.cpp410
-rw-r--r--src/jit/ssabuilder.h4
-rw-r--r--src/jit/ssarenamestate.cpp16
-rw-r--r--src/jit/ssarenamestate.h94
-rw-r--r--src/jit/stackfp.cpp10
-rw-r--r--src/jit/target.h20
-rw-r--r--src/jit/unwind.cpp32
-rw-r--r--src/jit/unwindx86.cpp249
-rw-r--r--src/jit/valuenum.cpp804
-rw-r--r--src/jit/valuenum.h4
-rw-r--r--src/jit/valuenumfuncs.h10
-rw-r--r--src/jit/valuenumtype.h4
77 files changed, 14076 insertions, 8204 deletions
diff --git a/src/jit/CMakeLists.txt b/src/jit/CMakeLists.txt
index 96b8c49..db6e597 100644
--- a/src/jit/CMakeLists.txt
+++ b/src/jit/CMakeLists.txt
@@ -81,6 +81,7 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64)
codegenxarch.cpp
emitxarch.cpp
lowerxarch.cpp
+ lsraxarch.cpp
simd.cpp
simdcodegenxarch.cpp
targetamd64.cpp
@@ -92,6 +93,7 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM)
decomposelongs.cpp
emitarm.cpp
lowerarm.cpp
+ lsraarm.cpp
targetarm.cpp
unwindarm.cpp
)
@@ -101,15 +103,18 @@ elseif(CLR_CMAKE_TARGET_ARCH_I386)
decomposelongs.cpp
emitxarch.cpp
lowerxarch.cpp
+ lsraxarch.cpp
simd.cpp
simdcodegenxarch.cpp
targetx86.cpp
+ unwindx86.cpp
)
elseif(CLR_CMAKE_TARGET_ARCH_ARM64)
set( ARCH_SOURCES
codegenarm64.cpp
emitarm64.cpp
lowerarm64.cpp
+ lsraarm64.cpp
targetarm64.cpp
unwindarm.cpp
unwindarm64.cpp
@@ -203,9 +208,9 @@ if (CLR_CMAKE_PLATFORM_ARCH_ARM)
add_subdirectory(protojit)
endif (CLR_CMAKE_PLATFORM_ARCH_ARM)
-if (CLR_CMAKE_PLATFORM_ARCH_I386)
+if (CLR_CMAKE_PLATFORM_ARCH_I386 AND WIN32)
add_subdirectory(legacyjit)
if (NOT CLR_BUILD_JIT32)
add_subdirectory(compatjit)
endif ()
-endif (CLR_CMAKE_PLATFORM_ARCH_I386)
+endif (CLR_CMAKE_PLATFORM_ARCH_I386 AND WIN32)
diff --git a/src/jit/DIRS.proj b/src/jit/DIRS.proj
index 6d1c06d..eb00cc1 100644
--- a/src/jit/DIRS.proj
+++ b/src/jit/DIRS.proj
@@ -26,8 +26,8 @@
<ItemGroup Condition="'$(BuildExePhase)' == '1' and '$(BuildProjectName)' != 'CoreSys'">
<!-- Build the "FrankenJit" (RyuJIT front-end, legacy back-end) and "FrankenAltjit". These can't conflict with the names of the JIT32 directory outputs. -->
- <ProjectFile Condition="'$(BuildArchitecture)' == 'i386' or '$(BuildArchitecture)' == 'arm'" Include="frankenjit\frankenjit.nativeproj" />
- <ProjectFile Condition="'$(BuildArchitecture)' == 'i386'" Include="frankenaltjit\frankenaltjit.nativeproj" />
+ <ProjectFile Condition="'$(BuildArchitecture)' == 'arm'" Include="frankenjit\frankenjit.nativeproj" />
+ <ProjectFile Condition="'$(BuildArchitecture)' == 'i386'" Include="frankenaltjit\frankenaltjit.nativeproj" />
<!-- This might be useful, to help make sure JIT devs build all configurations of the JIT (including crossgen), but
it appears to cause problems with the build system, and it slows down normal JIT developer productivity by adding a seldom-useful build.
diff --git a/src/jit/block.cpp b/src/jit/block.cpp
index 47f1052..6d8bc34 100644
--- a/src/jit/block.cpp
+++ b/src/jit/block.cpp
@@ -365,6 +365,14 @@ void BasicBlock::dspFlags()
{
printf("KEEP ");
}
+ if (bbFlags & BBF_CLONED_FINALLY_BEGIN)
+ {
+ printf("cfb ");
+ }
+ if (bbFlags & BBF_CLONED_FINALLY_END)
+ {
+ printf("cfe ");
+ }
}
/*****************************************************************************
@@ -564,10 +572,10 @@ void BasicBlock::dspBlockHeader(Compiler* compiler,
#endif // DEBUG
-// Allocation function for HeapPhiArg.
-void* BasicBlock::HeapPhiArg::operator new(size_t sz, Compiler* comp)
+// Allocation function for MemoryPhiArg.
+void* BasicBlock::MemoryPhiArg::operator new(size_t sz, Compiler* comp)
{
- return comp->compGetMem(sz, CMK_HeapPhiArg);
+ return comp->compGetMem(sz, CMK_MemoryPhiArg);
}
//------------------------------------------------------------------------
@@ -664,7 +672,7 @@ bool BasicBlock::IsLIR()
// Return Value:
// The first statement in the block's bbTreeList.
//
-GenTreeStmt* BasicBlock::firstStmt()
+GenTreeStmt* BasicBlock::firstStmt() const
{
if (bbTreeList == nullptr)
{
@@ -683,7 +691,7 @@ GenTreeStmt* BasicBlock::firstStmt()
// Return Value:
// The last statement in the block's bbTreeList.
//
-GenTreeStmt* BasicBlock::lastStmt()
+GenTreeStmt* BasicBlock::lastStmt() const
{
if (bbTreeList == nullptr)
{
@@ -765,7 +773,7 @@ BasicBlock* BasicBlock::GetUniqueSucc()
}
// Static vars.
-BasicBlock::HeapPhiArg* BasicBlock::EmptyHeapPhiDef = (BasicBlock::HeapPhiArg*)0x1;
+BasicBlock::MemoryPhiArg* BasicBlock::EmptyMemoryPhiDef = (BasicBlock::MemoryPhiArg*)0x1;
unsigned PtrKeyFuncs<BasicBlock>::GetHashCode(const BasicBlock* ptr)
{
diff --git a/src/jit/block.h b/src/jit/block.h
index 99c0efc..786b831 100644
--- a/src/jit/block.h
+++ b/src/jit/block.h
@@ -144,6 +144,88 @@ struct EntryState
StackEntry* esStack; // ptr to stack
};
+// Enumeration of the kinds of memory whose state changes the compiler tracks
+enum MemoryKind
+{
+ ByrefExposed = 0, // Includes anything byrefs can read/write (everything in GcHeap, address-taken locals,
+ // unmanaged heap, callers' locals, etc.)
+ GcHeap, // Includes actual GC heap, and also static fields
+ MemoryKindCount, // Number of MemoryKinds
+};
+#ifdef DEBUG
+const char* const memoryKindNames[] = {"ByrefExposed", "GcHeap"};
+#endif // DEBUG
+
+// Bitmask describing a set of memory kinds (usable in bitfields)
+typedef unsigned int MemoryKindSet;
+
+// Bitmask for a MemoryKindSet containing just the specified MemoryKind
+inline MemoryKindSet memoryKindSet(MemoryKind memoryKind)
+{
+ return (1U << memoryKind);
+}
+
+// Bitmask for a MemoryKindSet containing the specified MemoryKinds
+template <typename... MemoryKinds>
+inline MemoryKindSet memoryKindSet(MemoryKind memoryKind, MemoryKinds... memoryKinds)
+{
+ return memoryKindSet(memoryKind) | memoryKindSet(memoryKinds...);
+}
+
+// Bitmask containing all the MemoryKinds
+const MemoryKindSet fullMemoryKindSet = (1 << MemoryKindCount) - 1;
+
+// Bitmask containing no MemoryKinds
+const MemoryKindSet emptyMemoryKindSet = 0;
+
+// Standard iterator class for iterating through MemoryKinds
+class MemoryKindIterator
+{
+ int value;
+
+public:
+ explicit inline MemoryKindIterator(int val) : value(val)
+ {
+ }
+ inline MemoryKindIterator& operator++()
+ {
+ ++value;
+ return *this;
+ }
+ inline MemoryKindIterator operator++(int)
+ {
+ return MemoryKindIterator(value++);
+ }
+ inline MemoryKind operator*()
+ {
+ return static_cast<MemoryKind>(value);
+ }
+ friend bool operator==(const MemoryKindIterator& left, const MemoryKindIterator& right)
+ {
+ return left.value == right.value;
+ }
+ friend bool operator!=(const MemoryKindIterator& left, const MemoryKindIterator& right)
+ {
+ return left.value != right.value;
+ }
+};
+
+// Empty struct that allows enumerating memory kinds via `for(MemoryKind kind : allMemoryKinds())`
+struct allMemoryKinds
+{
+ inline allMemoryKinds()
+ {
+ }
+ inline MemoryKindIterator begin()
+ {
+ return MemoryKindIterator(0);
+ }
+ inline MemoryKindIterator end()
+ {
+ return MemoryKindIterator(MemoryKindCount);
+ }
+};
+
// This encapsulates the "exception handling" successors of a block. That is,
// if a basic block BB1 occurs in a try block, we consider the first basic block
// BB2 of the corresponding handler to be an "EH successor" of BB1. Because we
@@ -353,15 +435,18 @@ struct BasicBlock : private LIR::Range
// BBJ_CALLFINALLY block, as well as, on x86, the final step block out of a
// finally.
+#define BBF_CLONED_FINALLY_BEGIN 0x100000000 // First block of a cloned finally region
+#define BBF_CLONED_FINALLY_END 0x200000000 // Last block of a cloned finally region
+
// Flags that relate blocks to loop structure.
#define BBF_LOOP_FLAGS (BBF_LOOP_PREHEADER | BBF_LOOP_HEAD | BBF_LOOP_CALL0 | BBF_LOOP_CALL1)
- bool isRunRarely()
+ bool isRunRarely() const
{
return ((bbFlags & BBF_RUN_RARELY) != 0);
}
- bool isLoopHead()
+ bool isLoopHead() const
{
return ((bbFlags & BBF_LOOP_HEAD) != 0);
}
@@ -388,7 +473,7 @@ struct BasicBlock : private LIR::Range
// For example, the top block might or might not have BBF_GC_SAFE_POINT,
// but we assume it does not have BBF_GC_SAFE_POINT any more.
-#define BBF_SPLIT_LOST (BBF_GC_SAFE_POINT | BBF_HAS_JMP | BBF_KEEP_BBJ_ALWAYS)
+#define BBF_SPLIT_LOST (BBF_GC_SAFE_POINT | BBF_HAS_JMP | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END)
// Flags gained by the bottom block when a block is split.
// Note, this is a conservative guess.
@@ -399,7 +484,7 @@ struct BasicBlock : private LIR::Range
#define BBF_SPLIT_GAINED \
(BBF_DONT_REMOVE | BBF_HAS_LABEL | BBF_HAS_JMP | BBF_BACKWARD_JUMP | BBF_HAS_IDX_LEN | BBF_HAS_NEWARRAY | \
- BBF_PROF_WEIGHT | BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS)
+ BBF_PROF_WEIGHT | BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END)
#ifndef __GNUC__ // GCC doesn't like C_ASSERT at global scope
static_assert_no_msg((BBF_SPLIT_NONEXIST & BBF_SPLIT_LOST) == 0);
@@ -801,64 +886,46 @@ struct BasicBlock : private LIR::Range
VARSET_TP bbVarUse; // variables used by block (before an assignment)
VARSET_TP bbVarDef; // variables assigned by block (before a use)
- VARSET_TP bbVarTmp; // TEMP: only used by FP enregistering code!
VARSET_TP bbLiveIn; // variables live on entry
VARSET_TP bbLiveOut; // variables live on exit
- // Use, def, live in/out information for the implicit "Heap" variable.
- unsigned bbHeapUse : 1;
- unsigned bbHeapDef : 1;
- unsigned bbHeapLiveIn : 1;
- unsigned bbHeapLiveOut : 1;
- unsigned bbHeapHavoc : 1; // If true, at some point the block does an operation that leaves the heap
- // in an unknown state. (E.g., unanalyzed call, store through unknown
- // pointer...)
+ // Use, def, live in/out information for the implicit memory variable.
+ MemoryKindSet bbMemoryUse : MemoryKindCount; // must be set for any MemoryKinds this block references
+ MemoryKindSet bbMemoryDef : MemoryKindCount; // must be set for any MemoryKinds this block mutates
+ MemoryKindSet bbMemoryLiveIn : MemoryKindCount;
+ MemoryKindSet bbMemoryLiveOut : MemoryKindCount;
+ MemoryKindSet bbMemoryHavoc : MemoryKindCount; // If true, at some point the block does an operation
+ // that leaves memory in an unknown state. (E.g.,
+ // unanalyzed call, store through unknown pointer...)
- // We want to make phi functions for the special implicit var "Heap". But since this is not a real
+ // We want to make phi functions for the special implicit var memory. But since this is not a real
// lclVar, and thus has no local #, we can't use a GenTreePhiArg. Instead, we use this struct.
- struct HeapPhiArg
+ struct MemoryPhiArg
{
- bool m_isSsaNum; // If true, the phi arg is an SSA # for an internal try block heap state, being
- // added to the phi of a catch block. If false, it's a pred block.
- union {
- BasicBlock* m_predBB; // Predecessor block from which the SSA # flows.
- unsigned m_ssaNum; // SSA# for internal block heap state.
- };
- HeapPhiArg* m_nextArg; // Next arg in the list, else NULL.
+ unsigned m_ssaNum; // SSA# for incoming value.
+ MemoryPhiArg* m_nextArg; // Next arg in the list, else NULL.
unsigned GetSsaNum()
{
- if (m_isSsaNum)
- {
- return m_ssaNum;
- }
- else
- {
- assert(m_predBB != nullptr);
- return m_predBB->bbHeapSsaNumOut;
- }
+ return m_ssaNum;
}
- HeapPhiArg(BasicBlock* predBB, HeapPhiArg* nextArg = nullptr)
- : m_isSsaNum(false), m_predBB(predBB), m_nextArg(nextArg)
- {
- }
- HeapPhiArg(unsigned ssaNum, HeapPhiArg* nextArg = nullptr)
- : m_isSsaNum(true), m_ssaNum(ssaNum), m_nextArg(nextArg)
+ MemoryPhiArg(unsigned ssaNum, MemoryPhiArg* nextArg = nullptr) : m_ssaNum(ssaNum), m_nextArg(nextArg)
{
}
void* operator new(size_t sz, class Compiler* comp);
};
- static HeapPhiArg* EmptyHeapPhiDef; // Special value (0x1, FWIW) to represent a to-be-filled in Phi arg list
- // for Heap.
- HeapPhiArg* bbHeapSsaPhiFunc; // If the "in" Heap SSA var is not a phi definition, this value is NULL.
- // Otherwise, it is either the special value EmptyHeapPhiDefn, to indicate
- // that Heap needs a phi definition on entry, or else it is the linked list
- // of the phi arguments.
- unsigned bbHeapSsaNumIn; // The SSA # of "Heap" on entry to the block.
- unsigned bbHeapSsaNumOut; // The SSA # of "Heap" on exit from the block.
+ static MemoryPhiArg* EmptyMemoryPhiDef; // Special value (0x1, FWIW) to represent a to-be-filled in Phi arg list
+ // for Heap.
+ MemoryPhiArg* bbMemorySsaPhiFunc[MemoryKindCount]; // If the "in" Heap SSA var is not a phi definition, this value
+ // is NULL.
+ // Otherwise, it is either the special value EmptyMemoryPhiDefn, to indicate
+ // that Heap needs a phi definition on entry, or else it is the linked list
+ // of the phi arguments.
+ unsigned bbMemorySsaNumIn[MemoryKindCount]; // The SSA # of memory on entry to the block.
+ unsigned bbMemorySsaNumOut[MemoryKindCount]; // The SSA # of memory on exit from the block.
VARSET_TP bbScope; // variables in scope over the block
@@ -981,8 +1048,8 @@ struct BasicBlock : private LIR::Range
return bbNum - 1;
}
- GenTreeStmt* firstStmt();
- GenTreeStmt* lastStmt();
+ GenTreeStmt* firstStmt() const;
+ GenTreeStmt* lastStmt() const;
GenTreeStmt* lastTopLevelStmt();
GenTree* firstNode();
diff --git a/src/jit/codegen.h b/src/jit/codegen.h
index c6e38ab..090283e 100755
--- a/src/jit/codegen.h
+++ b/src/jit/codegen.h
@@ -390,6 +390,8 @@ protected:
// Save/Restore callee saved float regs to stack
void genPreserveCalleeSavedFltRegs(unsigned lclFrameSize);
void genRestoreCalleeSavedFltRegs(unsigned lclFrameSize);
+ // Generate VZeroupper instruction to avoid AVX/SSE transition penalty
+ void genVzeroupperIfNeeded(bool check256bitOnly = true);
#endif // _TARGET_XARCH_ && FEATURE_STACK_FP_X87
diff --git a/src/jit/codegenarm.cpp b/src/jit/codegenarm.cpp
index 73e51f2..81f5889 100644
--- a/src/jit/codegenarm.cpp
+++ b/src/jit/codegenarm.cpp
@@ -23,15 +23,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "gcinfo.h"
#include "emit.h"
-#ifndef JIT32_GCENCODER
-#include "gcinfoencoder.h"
-#endif
-
-/*****************************************************************************
- *
- * Generate code that will set the given register to the integer constant.
- */
-
+//------------------------------------------------------------------------
+// genSetRegToIcon: Generate code that will set the given register to the integer constant.
+//
void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
{
// Reg cannot be a FP reg
@@ -42,41 +36,78 @@ void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFla
// code path.
noway_assert(type != TYP_REF || val == 0);
- if (val == 0)
- {
- instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags);
- }
- else
- {
- // TODO-CQ: needs all the optimized cases
- getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(type), reg, val);
- }
+ instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
}
-/*****************************************************************************
- *
- * Generate code to check that the GS cookie wasn't thrashed by a buffer
- * overrun. If pushReg is true, preserve all registers around code sequence.
- * Otherwise, ECX maybe modified.
- */
+//------------------------------------------------------------------------
+// genEmitGSCookieCheck: Generate code to check that the GS cookie wasn't thrashed by a buffer overrun.
+//
void CodeGen::genEmitGSCookieCheck(bool pushReg)
{
NYI("ARM genEmitGSCookieCheck");
}
-BasicBlock* CodeGen::genCallFinally(BasicBlock* block, BasicBlock* lblk)
+//------------------------------------------------------------------------
+// genCallFinally: Generate a call to the finally block.
+//
+BasicBlock* CodeGen::genCallFinally(BasicBlock* block)
{
NYI("ARM genCallFinally");
return block;
}
-// move an immediate value into an integer register
-
+//------------------------------------------------------------------------
+// genEHCatchRet:
void CodeGen::genEHCatchRet(BasicBlock* block)
{
NYI("ARM genEHCatchRet");
}
+//---------------------------------------------------------------------
+// genIntrinsic - generate code for a given intrinsic
+//
+// Arguments
+// treeNode - the GT_INTRINSIC node
+//
+// Return value:
+// None
+//
+void CodeGen::genIntrinsic(GenTreePtr treeNode)
+{
+ // Both operand and its result must be of the same floating point type.
+ GenTreePtr srcNode = treeNode->gtOp.gtOp1;
+ assert(varTypeIsFloating(srcNode));
+ assert(srcNode->TypeGet() == treeNode->TypeGet());
+
+ // Right now only Abs/Round/Sqrt are treated as math intrinsics.
+ //
+ switch (treeNode->gtIntrinsic.gtIntrinsicId)
+ {
+ case CORINFO_INTRINSIC_Abs:
+ genConsumeOperands(treeNode->AsOp());
+ getEmitter()->emitInsBinary(INS_vabs, emitTypeSize(treeNode), treeNode, srcNode);
+ break;
+
+ case CORINFO_INTRINSIC_Round:
+ NYI_ARM("genIntrinsic for round - not implemented yet");
+ break;
+
+ case CORINFO_INTRINSIC_Sqrt:
+ genConsumeOperands(treeNode->AsOp());
+ getEmitter()->emitInsBinary(INS_vsqrt, emitTypeSize(treeNode), treeNode, srcNode);
+ break;
+
+ default:
+ assert(!"genIntrinsic: Unsupported intrinsic");
+ unreached();
+ }
+
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// instGen_Set_Reg_To_Imm: Move an immediate value into an integer register.
+//
void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags)
{
// reg cannot be a FP register
@@ -87,23 +118,60 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm,
size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs
}
- if ((imm == 0) && !EA_IS_RELOC(size))
+ if (EA_IS_RELOC(size))
+ {
+ getEmitter()->emitIns_R_I(INS_movw, size, reg, imm);
+ getEmitter()->emitIns_R_I(INS_movt, size, reg, imm);
+ }
+ else if (imm == 0)
{
instGen_Set_Reg_To_Zero(size, reg, flags);
}
else
{
- getEmitter()->emitIns_R_I(INS_mov, size, reg, imm);
+ if (arm_Valid_Imm_For_Mov(imm))
+ {
+ getEmitter()->emitIns_R_I(INS_mov, size, reg, imm, flags);
+ }
+ else // We have to use a movw/movt pair of instructions
+ {
+ ssize_t imm_lo16 = (imm & 0xffff);
+ ssize_t imm_hi16 = (imm >> 16) & 0xffff;
+
+ assert(arm_Valid_Imm_For_Mov(imm_lo16));
+ assert(imm_hi16 != 0);
+
+ getEmitter()->emitIns_R_I(INS_movw, size, reg, imm_lo16);
+
+ // If we've got a low register, the high word is all bits set,
+ // and the high bit of the low word is set, we can sign extend
+ // halfword and save two bytes of encoding. This can happen for
+ // small magnitude negative numbers 'n' for -32768 <= n <= -1.
+
+ if (getEmitter()->isLowRegister(reg) && (imm_hi16 == 0xffff) && ((imm_lo16 & 0x8000) == 0x8000))
+ {
+ getEmitter()->emitIns_R_R(INS_sxth, EA_2BYTE, reg, reg);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_I(INS_movt, size, reg, imm_hi16);
+ }
+
+ if (flags == INS_FLAGS_SET)
+ getEmitter()->emitIns_R_R(INS_mov, size, reg, reg, INS_FLAGS_SET);
+ }
}
+
regTracker.rsTrackRegIntCns(reg, imm);
}
-/*****************************************************************************
- *
- * Generate code to set a register 'targetReg' of type 'targetType' to the constant
- * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call
- * genProduceReg() on the target register.
- */
+//------------------------------------------------------------------------
+// genSetRegToConst: Generate code to set a register 'targetReg' of type 'targetType'
+// to the constant specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'.
+//
+// Notes:
+// This does not call genProduceReg() on the target register.
+//
void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTreePtr tree)
{
switch (tree->gtOper)
@@ -130,7 +198,42 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
case GT_CNS_DBL:
{
- NYI("GT_CNS_DBL");
+ GenTreeDblCon* dblConst = tree->AsDblCon();
+ double constValue = dblConst->gtDblCon.gtDconVal;
+ // TODO-ARM-CQ: Do we have a faster/smaller way to generate 0.0 in thumb2 ISA ?
+ if (targetType == TYP_FLOAT)
+ {
+ // Get a temp integer register
+ regMaskTP tmpRegMask = tree->gtRsvdRegs;
+ regNumber tmpReg = genRegNumFromMask(tmpRegMask);
+ assert(tmpReg != REG_NA);
+
+ float f = forceCastToFloat(constValue);
+ genSetRegToIcon(tmpReg, *((int*)(&f)));
+ getEmitter()->emitIns_R_R(INS_vmov_i2f, EA_4BYTE, targetReg, tmpReg);
+ }
+ else
+ {
+ assert(targetType == TYP_DOUBLE);
+
+ unsigned* cv = (unsigned*)&constValue;
+
+ // Get two temp integer registers
+ regMaskTP tmpRegsMask = tree->gtRsvdRegs;
+ regMaskTP tmpRegMask = genFindHighestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask
+ regNumber tmpReg1 = genRegNumFromMask(tmpRegMask);
+ assert(tmpReg1 != REG_NA);
+
+ tmpRegsMask &= ~genRegMask(tmpReg1); // remove the bit for 'tmpReg1'
+ tmpRegMask = genFindHighestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask
+ regNumber tmpReg2 = genRegNumFromMask(tmpRegMask);
+ assert(tmpReg2 != REG_NA);
+
+ genSetRegToIcon(tmpReg1, cv[0]);
+ genSetRegToIcon(tmpReg2, cv[1]);
+
+ getEmitter()->emitIns_R_R_R(INS_vmov_i2d, EA_8BYTE, targetReg, tmpReg1, tmpReg2);
+ }
}
break;
@@ -139,18 +242,22 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
}
}
-/*****************************************************************************
- *
- * Generate code for a single node in the tree.
- * Preconditions: All operands have been evaluated
- *
- */
+//------------------------------------------------------------------------
+// genCodeForTreeNode Generate code for a single node in the tree.
+//
+// Preconditions:
+// All operands have been evaluated.
+//
void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
{
regNumber targetReg = treeNode->gtRegNum;
var_types targetType = treeNode->TypeGet();
emitter* emit = getEmitter();
+#ifdef DEBUG
+ lastConsumedNode = nullptr;
+#endif
+
JITDUMP("Generating: ");
DISPNODE(treeNode);
@@ -169,10 +276,33 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
genProduceReg(treeNode);
break;
- case GT_NEG:
case GT_NOT:
+ assert(!varTypeIsFloating(targetType));
+
+ __fallthrough;
+
+ case GT_NEG:
{
- NYI("GT_NEG and GT_NOT");
+ instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
+
+ // The arithmetic node must be sitting in a register (since it's not contained)
+ assert(!treeNode->isContained());
+ // The dst can only be a register.
+ assert(targetReg != REG_NA);
+
+ GenTreePtr operand = treeNode->gtGetOp1();
+ assert(!operand->isContained());
+ // The src must be a register.
+ regNumber operandReg = genConsumeReg(operand);
+
+ if (ins == INS_vneg)
+ {
+ getEmitter()->emitIns_R_R(ins, emitTypeSize(treeNode), targetReg, operandReg);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_R_I(ins, emitTypeSize(treeNode), targetReg, operandReg, 0);
+ }
}
genProduceReg(treeNode);
break;
@@ -185,9 +315,10 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_ADD:
case GT_SUB:
+ case GT_MUL:
{
const genTreeOps oper = treeNode->OperGet();
- if ((oper == GT_ADD || oper == GT_SUB) && treeNode->gtOverflow())
+ if ((oper == GT_ADD || oper == GT_SUB || oper == GT_MUL) && treeNode->gtOverflow())
{
// This is also checked in the importer.
NYI("Overflow not yet implemented");
@@ -209,40 +340,47 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
genConsumeIfReg(op1);
genConsumeIfReg(op2);
- // This is the case of reg1 = reg1 op reg2
- // We're ready to emit the instruction without any moves
- if (op1reg == targetReg)
+ if (!varTypeIsFloating(targetType))
{
- dst = op1;
- src = op2;
- }
- // We have reg1 = reg2 op reg1
- // In order for this operation to be correct
- // we need that op is a commutative operation so
- // we can convert it into reg1 = reg1 op reg2 and emit
- // the same code as above
- else if (op2reg == targetReg)
- {
- noway_assert(GenTree::OperIsCommutative(treeNode->OperGet()));
- dst = op2;
- src = op1;
+ // This is the case of reg1 = reg1 op reg2
+ // We're ready to emit the instruction without any moves
+ if (op1reg == targetReg)
+ {
+ dst = op1;
+ src = op2;
+ }
+ // We have reg1 = reg2 op reg1
+ // In order for this operation to be correct
+ // we need that op is a commutative operation so
+ // we can convert it into reg1 = reg1 op reg2 and emit
+ // the same code as above
+ else if (op2reg == targetReg)
+ {
+ assert(GenTree::OperIsCommutative(treeNode->OperGet()));
+ dst = op2;
+ src = op1;
+ }
+ // dest, op1 and op2 registers are different:
+ // reg3 = reg1 op reg2
+ // We can implement this by issuing a mov:
+ // reg3 = reg1
+ // reg3 = reg3 op reg2
+ else
+ {
+ inst_RV_RV(ins_Move_Extend(targetType, true), targetReg, op1reg, op1->gtType);
+ regTracker.rsTrackRegCopy(targetReg, op1reg);
+ gcInfo.gcMarkRegPtrVal(targetReg, targetType);
+ dst = treeNode;
+ src = op2;
+ }
+
+ regNumber r = emit->emitInsBinary(ins, emitTypeSize(treeNode), dst, src);
+ assert(r == targetReg);
}
- // dest, op1 and op2 registers are different:
- // reg3 = reg1 op reg2
- // We can implement this by issuing a mov:
- // reg3 = reg1
- // reg3 = reg3 op reg2
else
{
- inst_RV_RV(ins_Move_Extend(targetType, true), targetReg, op1reg, op1->gtType);
- regTracker.rsTrackRegCopy(targetReg, op1reg);
- gcInfo.gcMarkRegPtrVal(targetReg, targetType);
- dst = treeNode;
- src = op2;
+ emit->emitIns_R_R_R(ins, emitTypeSize(treeNode), targetReg, op1reg, op2reg);
}
-
- regNumber r = emit->emitInsBinary(ins, emitTypeSize(treeNode), dst, src);
- noway_assert(r == targetReg);
}
genProduceReg(treeNode);
break;
@@ -429,17 +567,11 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
break;
case GT_IND:
+ genConsumeAddress(treeNode->AsIndir()->Addr());
emit->emitInsMov(ins_Load(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode);
genProduceReg(treeNode);
break;
- case GT_MUL:
- {
- NYI("GT_MUL");
- }
- genProduceReg(treeNode);
- break;
-
case GT_MOD:
case GT_UDIV:
case GT_UMOD:
@@ -451,17 +583,45 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_DIV:
{
- NYI("GT_DIV");
- }
+ genConsumeOperands(treeNode->AsOp());
+
+ noway_assert(targetReg != REG_NA);
+
+ GenTreePtr dst = treeNode;
+ GenTreePtr src1 = treeNode->gtGetOp1();
+ GenTreePtr src2 = treeNode->gtGetOp2();
+ instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
+ emitAttr attr = emitTypeSize(treeNode);
+ regNumber result = REG_NA;
+
+ // dst can only be a reg
+ assert(!dst->isContained());
+
+ // src can be only reg
+ assert(!src1->isContained() || !src2->isContained());
+
+ if (varTypeIsFloating(targetType))
+ {
+ // Floating point divide never raises an exception
+
+ emit->emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+ }
+ else // an signed integer divide operation
+ {
+ // TODO-ARM-Bug: handle zero division exception.
+
+ emit->emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+ }
+
genProduceReg(treeNode);
- break;
+ }
+ break;
case GT_INTRINSIC:
{
- NYI("GT_INTRINSIC");
+ genIntrinsic(treeNode);
}
- genProduceReg(treeNode);
- break;
+ break;
case GT_EQ:
case GT_NE:
@@ -485,26 +645,12 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
emitAttr cmpAttr;
if (varTypeIsFloating(op1))
{
- NYI("Floating point compare");
-
- bool isUnordered = ((treeNode->gtFlags & GTF_RELOP_NAN_UN) != 0);
- switch (tree->OperGet())
- {
- case GT_EQ:
- ins = INS_beq;
- case GT_NE:
- ins = INS_bne;
- case GT_LT:
- ins = isUnordered ? INS_blt : INS_blo;
- case GT_LE:
- ins = isUnordered ? INS_ble : INS_bls;
- case GT_GE:
- ins = isUnordered ? INS_bpl : INS_bge;
- case GT_GT:
- ins = isUnordered ? INS_bhi : INS_bgt;
- default:
- unreached();
- }
+ assert(op1->TypeGet() == op2->TypeGet());
+ ins = INS_vcmp;
+ cmpAttr = emitTypeSize(op1->TypeGet());
+ emit->emitInsBinary(ins, cmpAttr, op1, op2);
+ // vmrs with register 0xf has special meaning of transferring flags
+ emit->emitIns_R(INS_vmrs, EA_4BYTE, REG_R15);
}
else
{
@@ -522,12 +668,12 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
bool op1Is64Bit = (varTypeIsLong(op1Type) || op1Type == TYP_REF);
bool op2Is64Bit = (varTypeIsLong(op2Type) || op2Type == TYP_REF);
NYI_IF(op1Is64Bit || op2Is64Bit, "Long compare");
- assert(!op1->isContainedMemoryOp() || op1Type == op2Type);
- assert(!op2->isContainedMemoryOp() || op1Type == op2Type);
+ assert(!op1->isUsedFromMemory() || op1Type == op2Type);
+ assert(!op2->isUsedFromMemory() || op1Type == op2Type);
cmpAttr = emitTypeSize(cmpType);
}
+ emit->emitInsBinary(ins, cmpAttr, op1, op2);
}
- emit->emitInsBinary(ins, cmpAttr, op1, op2);
// Are we evaluating this into a register?
if (targetReg != REG_NA)
@@ -579,7 +725,68 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_STOREIND:
{
- NYI("GT_STOREIND");
+ GenTreeStoreInd* storeInd = treeNode->AsStoreInd();
+ GenTree* data = storeInd->Data();
+ GenTree* addr = storeInd->Addr();
+ var_types targetType = storeInd->TypeGet();
+
+ assert(!varTypeIsFloating(targetType) || (targetType == data->TypeGet()));
+
+ GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(treeNode, data);
+ if (writeBarrierForm != GCInfo::WBF_NoBarrier)
+ {
+ // data and addr must be in registers.
+ // Consume both registers so that any copies of interfering
+ // registers are taken care of.
+ genConsumeOperands(storeInd->AsOp());
+
+#if NOGC_WRITE_BARRIERS
+ NYI_ARM("NOGC_WRITE_BARRIERS");
+#else
+ // At this point, we should not have any interference.
+ // That is, 'data' must not be in REG_ARG_0,
+ // as that is where 'addr' must go.
+ noway_assert(data->gtRegNum != REG_ARG_0);
+
+ // addr goes in REG_ARG_0
+ if (addr->gtRegNum != REG_ARG_0)
+ {
+ inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet());
+ }
+
+ // data goes in REG_ARG_1
+ if (data->gtRegNum != REG_ARG_1)
+ {
+ inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet());
+ }
+#endif // NOGC_WRITE_BARRIERS
+
+ genGCWriteBarrier(storeInd, writeBarrierForm);
+ }
+ else // A normal store, not a WriteBarrier store
+ {
+ bool reverseOps = ((storeInd->gtFlags & GTF_REVERSE_OPS) != 0);
+ bool dataIsUnary = false;
+
+ // We must consume the operands in the proper execution order,
+ // so that liveness is updated appropriately.
+ if (!reverseOps)
+ {
+ genConsumeAddress(addr);
+ }
+
+ if (!data->isContained())
+ {
+ genConsumeRegs(data);
+ }
+
+ if (reverseOps)
+ {
+ genConsumeAddress(addr);
+ }
+
+ emit->emitInsMov(ins_Store(data->TypeGet()), emitTypeSize(storeInd), storeInd);
+ }
}
break;
@@ -682,7 +889,14 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
break;
case GT_NO_OP:
- NYI("GT_NO_OP");
+ if (treeNode->gtFlags & GTF_NO_OP_NO)
+ {
+ noway_assert(!"GTF_NO_OP_NO should not be set");
+ }
+ else
+ {
+ instGen(INS_nop);
+ }
break;
case GT_ARR_BOUNDS_CHECK:
@@ -733,13 +947,22 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
emit->emitIns_R_L(INS_lea, EA_PTRSIZE, genPendingCallLabel, treeNode->gtRegNum);
break;
+ case GT_CLS_VAR_ADDR:
+ emit->emitIns_R_C(INS_lea, EA_PTRSIZE, targetReg, treeNode->gtClsVar.gtClsVarHnd, 0);
+ genProduceReg(treeNode);
+ break;
+
+ case GT_IL_OFFSET:
+ // Do nothing; these nodes are simply markers for debug info.
+ break;
+
default:
{
#ifdef DEBUG
char message[256];
_snprintf_s(message, _countof(message), _TRUNCATE, "NYI: Unimplemented node type %s\n",
GenTree::NodeName(treeNode->OperGet()));
- notYetImplemented(message, __FILE__, __LINE__);
+ NYIRAW(message);
#else
NYI("unimplemented node");
#endif
@@ -748,24 +971,33 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
}
}
-// generate code for the locked operations:
-// GT_LOCKADD, GT_XCHG, GT_XADD
+//------------------------------------------------------------------------
+// genLockedInstructions: Generate code for the locked operations.
+//
+// Notes:
+// Handles GT_LOCKADD, GT_XCHG, GT_XADD nodes.
+//
void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
{
NYI("genLockedInstructions");
}
-// generate code for GT_ARR_BOUNDS_CHECK node
+//------------------------------------------------------------------------
+// genRangeCheck: generate code for GT_ARR_BOUNDS_CHECK node.
+//
void CodeGen::genRangeCheck(GenTreePtr oper)
{
noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
- GenTreePtr arrLen = bndsChk->gtArrLen->gtEffectiveVal();
GenTreePtr arrIdx = bndsChk->gtIndex->gtEffectiveVal();
+ GenTreePtr arrLen = bndsChk->gtArrLen->gtEffectiveVal();
GenTreePtr arrRef = NULL;
int lenOffset = 0;
+ genConsumeIfReg(arrIdx);
+ genConsumeIfReg(arrLen);
+
GenTree * src1, *src2;
emitJumpKind jmpKind;
@@ -784,15 +1016,13 @@ void CodeGen::genRangeCheck(GenTreePtr oper)
jmpKind = genJumpKindForOper(GT_GE, CK_UNSIGNED);
}
- genConsumeIfReg(src1);
- genConsumeIfReg(src2);
-
getEmitter()->emitInsBinary(INS_cmp, emitAttr(TYP_INT), src1, src2);
genJumpToThrowHlpBlk(jmpKind, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
}
-// make a temporary indir we can feed to pattern matching routines
-// in cases where we don't want to instantiate all the indirs that happen
+//------------------------------------------------------------------------
+// indirForm: Make a temporary indir we can feed to pattern matching routines
+// in cases where we don't want to instantiate all the indirs that happen.
//
GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
{
@@ -804,8 +1034,9 @@ GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
return i;
}
-// make a temporary int we can feed to pattern matching routines
-// in cases where we don't want to instantiate
+//------------------------------------------------------------------------
+// intForm: Make a temporary int we can feed to pattern matching routines
+// in cases where we don't want to instantiate.
//
GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
{
@@ -817,6 +1048,9 @@ GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
return i;
}
+//------------------------------------------------------------------------
+// genGetInsForOper: Return instruction encoding of the operation tree.
+//
instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
{
instruction ins;
@@ -835,6 +1069,9 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
case GT_MUL:
ins = INS_MUL;
break;
+ case GT_DIV:
+ ins = INS_sdiv;
+ break;
case GT_LSH:
ins = INS_SHIFT_LEFT_LOGICAL;
break;
@@ -878,21 +1115,331 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
//
void CodeGen::genCodeForShift(GenTreePtr tree)
{
- NYI("genCodeForShift");
+ var_types targetType = tree->TypeGet();
+ genTreeOps oper = tree->OperGet();
+ instruction ins = genGetInsForOper(oper, targetType);
+ emitAttr size = emitTypeSize(tree);
+
+ assert(tree->gtRegNum != REG_NA);
+
+ GenTreePtr operand = tree->gtGetOp1();
+ genConsumeReg(operand);
+
+ GenTreePtr shiftBy = tree->gtGetOp2();
+ if (!shiftBy->IsCnsIntOrI())
+ {
+ genConsumeReg(shiftBy);
+ getEmitter()->emitIns_R_R_R(ins, size, tree->gtRegNum, operand->gtRegNum, shiftBy->gtRegNum);
+ }
+ else
+ {
+ unsigned immWidth = size * BITS_PER_BYTE;
+ ssize_t shiftByImm = shiftBy->gtIntCon.gtIconVal & (immWidth - 1);
+
+ getEmitter()->emitIns_R_R_I(ins, size, tree->gtRegNum, operand->gtRegNum, shiftByImm);
+ }
+
+ genProduceReg(tree);
}
+//------------------------------------------------------------------------
+// genRegCopy: Generate a register copy.
+//
void CodeGen::genRegCopy(GenTree* treeNode)
{
NYI("genRegCopy");
}
-// Produce code for a GT_CALL node
+//------------------------------------------------------------------------
+// genCallInstruction: Produce code for a GT_CALL node
+//
void CodeGen::genCallInstruction(GenTreePtr node)
{
- NYI("Call not implemented");
+ GenTreeCall* call = node->AsCall();
+
+ assert(call->gtOper == GT_CALL);
+
+ gtCallTypes callType = (gtCallTypes)call->gtCallType;
+
+ IL_OFFSETX ilOffset = BAD_IL_OFFSET;
+
+ // all virtuals should have been expanded into a control expression
+ assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
+
+ // Consume all the arg regs
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->OperIsList());
+
+ GenTreePtr argNode = list->Current();
+
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy());
+ assert(curArgTabEntry);
+
+ if (curArgTabEntry->regNum == REG_STK)
+ continue;
+
+ // Deal with multi register passed struct args.
+ if (argNode->OperGet() == GT_FIELD_LIST)
+ {
+ GenTreeArgList* argListPtr = argNode->AsArgList();
+ unsigned iterationNum = 0;
+ regNumber argReg = curArgTabEntry->regNum;
+ for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++)
+ {
+ GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
+ assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+
+ genConsumeReg(putArgRegNode);
+
+ if (putArgRegNode->gtRegNum != argReg)
+ {
+ inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg,
+ putArgRegNode->gtRegNum);
+ }
+
+ argReg = genRegArgNext(argReg);
+ }
+ }
+ else
+ {
+ regNumber argReg = curArgTabEntry->regNum;
+ genConsumeReg(argNode);
+ if (argNode->gtRegNum != argReg)
+ {
+ inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum);
+ }
+ }
+
+ // In the case of a varargs call,
+ // the ABI dictates that if we have floating point args,
+ // we must pass the enregistered arguments in both the
+ // integer and floating point registers so, let's do that.
+ if (call->IsVarargs() && varTypeIsFloating(argNode))
+ {
+ NYI_ARM("CodeGen - IsVarargs");
+ }
+ }
+
+ // Insert a null check on "this" pointer if asked.
+ if (call->NeedsNullCheck())
+ {
+ const regNumber regThis = genGetThisArgReg(call);
+ const regNumber tmpReg = genRegNumFromMask(node->gtRsvdRegs);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, regThis, 0);
+ }
+
+ // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method.
+ CORINFO_METHOD_HANDLE methHnd;
+ GenTree* target = call->gtControlExpr;
+ if (callType == CT_INDIRECT)
+ {
+ assert(target == nullptr);
+ target = call->gtCall.gtCallAddr;
+ methHnd = nullptr;
+ }
+ else
+ {
+ methHnd = call->gtCallMethHnd;
+ }
+
+ CORINFO_SIG_INFO* sigInfo = nullptr;
+#ifdef DEBUG
+ // Pass the call signature information down into the emitter so the emitter can associate
+ // native call sites with the signatures they were generated from.
+ if (callType != CT_HELPER)
+ {
+ sigInfo = call->callSig;
+ }
+#endif // DEBUG
+
+ // If fast tail call, then we are done.
+ if (call->IsFastTailCall())
+ {
+ NYI_ARM("fast tail call");
+ }
+
+ // For a pinvoke to unmanaged code we emit a label to clear
+ // the GC pointer state before the callsite.
+ // We can't utilize the typical lazy killing of GC pointers
+ // at (or inside) the callsite.
+ if (call->IsUnmanaged())
+ {
+ genDefineTempLabel(genCreateTempLabel());
+ }
+
+ // Determine return value size(s).
+ ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc();
+ emitAttr retSize = EA_PTRSIZE;
+
+ if (call->HasMultiRegRetVal())
+ {
+ NYI_ARM("has multi reg ret val");
+ }
+ else
+ {
+ assert(!varTypeIsStruct(call));
+
+ if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY)
+ {
+ retSize = EA_GCREF;
+ }
+ else if (call->gtType == TYP_BYREF)
+ {
+ retSize = EA_BYREF;
+ }
+ }
+
+ // We need to propagate the IL offset information to the call instruction, so we can emit
+ // an IL to native mapping record for the call, to support managed return value debugging.
+ // We don't want tail call helper calls that were converted from normal calls to get a record,
+ // so we skip this hash table lookup logic in that case.
+ if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall())
+ {
+ (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
+ }
+
+ if (target != nullptr)
+ {
+ // For ARM a call target can not be a contained indirection
+ assert(!target->isContainedIndir());
+
+ // We have already generated code for gtControlExpr evaluating it into a register.
+ // We just need to emit "call reg" in this case.
+ //
+ assert(genIsValidIntReg(target->gtRegNum));
+
+ genEmitCall(emitter::EC_INDIR_R, methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr
+ retSize, ilOffset, target->gtRegNum);
+ }
+ else
+ {
+ // Generate a direct call to a non-virtual user defined or helper method
+ assert(callType == CT_HELPER || callType == CT_USER_FUNC);
+
+ void* addr = nullptr;
+ if (callType == CT_HELPER)
+ {
+ // Direct call to a helper method.
+ CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd);
+ noway_assert(helperNum != CORINFO_HELP_UNDEF);
+
+ void* pAddr = nullptr;
+ addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
+
+ if (addr == nullptr)
+ {
+ addr = pAddr;
+ }
+ }
+ else
+ {
+ // Direct call to a non-virtual user function.
+ CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
+ if (call->IsSameThis())
+ {
+ aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
+ }
+
+ if ((call->NeedsNullCheck()) == 0)
+ {
+ aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
+ }
+
+ CORINFO_CONST_LOOKUP addrInfo;
+ compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags);
+
+ addr = addrInfo.addr;
+ }
+
+ assert(addr);
+ // Non-virtual direct call to known addresses
+ if (!arm_Valid_Imm_For_BL((ssize_t)addr))
+ {
+ regNumber tmpReg = genRegNumFromMask(node->gtRsvdRegs);
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, tmpReg, (ssize_t)addr);
+ genEmitCall(emitter::EC_INDIR_R, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) NULL, retSize, ilOffset, tmpReg);
+ }
+ else
+ {
+ genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, retSize, ilOffset);
+ }
+ }
+
+ // if it was a pinvoke we may have needed to get the address of a label
+ if (genPendingCallLabel)
+ {
+ assert(call->IsUnmanaged());
+ genDefineTempLabel(genPendingCallLabel);
+ genPendingCallLabel = nullptr;
+ }
+
+ // Update GC info:
+ // All Callee arg registers are trashed and no longer contain any GC pointers.
+ // TODO-ARM-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here?
+ // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other
+ // registers from RBM_CALLEE_TRASH
+ assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
+ assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
+ gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS;
+ gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS;
+
+ var_types returnType = call->TypeGet();
+ if (returnType != TYP_VOID)
+ {
+ regNumber returnReg;
+
+ if (call->HasMultiRegRetVal())
+ {
+ assert(pRetTypeDesc != nullptr);
+ unsigned regCount = pRetTypeDesc->GetReturnRegCount();
+
+ // If regs allocated to call node are different from ABI return
+ // regs in which the call has returned its result, move the result
+ // to regs allocated to call node.
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ var_types regType = pRetTypeDesc->GetReturnRegType(i);
+ returnReg = pRetTypeDesc->GetABIReturnReg(i);
+ regNumber allocatedReg = call->GetRegNumByIdx(i);
+ if (returnReg != allocatedReg)
+ {
+ inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType);
+ }
+ }
+ }
+ else
+ {
+ if (varTypeIsFloating(returnType))
+ {
+ returnReg = REG_FLOATRET;
+ }
+ else
+ {
+ returnReg = REG_INTRET;
+ }
+
+ if (call->gtRegNum != returnReg)
+ {
+ inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType);
+ }
+ }
+
+ genProduceReg(call);
+ }
+
+ // If there is nothing next, that means the result is thrown away, so this value is not live.
+ // However, for minopts or debuggable code, we keep it live to support managed return value debugging.
+ if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode)
+ {
+ gcInfo.gcMarkRegSetNpt(RBM_INTRET);
+ }
}
-// produce code for a GT_LEA subnode
+//------------------------------------------------------------------------
+// genLeaInstruction: Produce code for a GT_LEA subnode.
+//
void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
{
if (lea->Base() && lea->Index())
@@ -909,12 +1456,44 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
genProduceReg(lea);
}
-// Generate code to materialize a condition into a register
-// (the condition codes must already have been appropriately set)
-
+//------------------------------------------------------------------------
+// genSetRegToCond: Generate code to materialize a condition into a register.
+//
+// Arguments:
+// dstReg - The target register to set to 1 or 0
+// tree - The GenTree Relop node that was used to set the Condition codes
+//
+// Return Value: none
+//
+// Preconditions:
+// The condition codes must already have been appropriately set.
+//
void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree)
{
- NYI("genSetRegToCond");
+ // Emit code like that:
+ // ...
+ // bgt True
+ // movs rD, #0
+ // b Next
+ // True:
+ // movs rD, #1
+ // Next:
+ // ...
+
+ CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
+ emitJumpKind jmpKind = genJumpKindForOper(tree->gtOper, compareKind);
+
+ BasicBlock* labelTrue = genCreateTempLabel();
+ getEmitter()->emitIns_J(emitter::emitJumpKindToIns(jmpKind), labelTrue);
+
+ getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(tree->gtType), dstReg, 0);
+
+ BasicBlock* labelNext = genCreateTempLabel();
+ getEmitter()->emitIns_J(INS_b, labelNext);
+
+ genDefineTempLabel(labelTrue);
+ getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(tree->gtType), dstReg, 1);
+ genDefineTempLabel(labelNext);
}
//------------------------------------------------------------------------
@@ -933,7 +1512,85 @@ void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree)
//
void CodeGen::genIntToIntCast(GenTreePtr treeNode)
{
- NYI("Cast");
+ assert(treeNode->OperGet() == GT_CAST);
+
+ GenTreePtr castOp = treeNode->gtCast.CastOp();
+ emitter* emit = getEmitter();
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = genActualType(castOp->TypeGet());
+ emitAttr movSize = emitActualTypeSize(dstType);
+ bool movRequired = false;
+
+ regNumber targetReg = treeNode->gtRegNum;
+ regNumber sourceReg = castOp->gtRegNum;
+
+ // For Long to Int conversion we will have a reserved integer register to hold the immediate mask
+ regNumber tmpReg = (treeNode->gtRsvdRegs == RBM_NONE) ? REG_NA : genRegNumFromMask(treeNode->gtRsvdRegs);
+
+ assert(genIsValidIntReg(targetReg));
+ assert(genIsValidIntReg(sourceReg));
+
+ instruction ins = INS_invalid;
+
+ genConsumeReg(castOp);
+ Lowering::CastInfo castInfo;
+
+ // Get information about the cast.
+ Lowering::getCastDescription(treeNode, &castInfo);
+
+ if (castInfo.requiresOverflowCheck)
+ {
+ NYI_ARM("CodeGen::genIntToIntCast for OverflowCheck");
+ }
+ else // Non-overflow checking cast.
+ {
+ if (genTypeSize(srcType) == genTypeSize(dstType))
+ {
+ ins = INS_mov;
+ }
+ else
+ {
+ var_types extendType = TYP_UNKNOWN;
+
+ // If we need to treat a signed type as unsigned
+ if ((treeNode->gtFlags & GTF_UNSIGNED) != 0)
+ {
+ extendType = genUnsignedType(srcType);
+ movSize = emitTypeSize(extendType);
+ movRequired = true;
+ }
+ else
+ {
+ if (genTypeSize(srcType) < genTypeSize(dstType))
+ {
+ extendType = srcType;
+ movSize = emitTypeSize(srcType);
+ if (srcType == TYP_UINT)
+ {
+ movRequired = true;
+ }
+ }
+ else // (genTypeSize(srcType) > genTypeSize(dstType))
+ {
+ extendType = dstType;
+ movSize = emitTypeSize(dstType);
+ }
+ }
+
+ ins = ins_Move_Extend(extendType, castOp->InReg());
+ }
+ }
+
+ // We should never be generating a load from memory instruction here!
+ assert(!emit->emitInsIsLoad(ins));
+
+ if ((ins != INS_mov) || movRequired || (targetReg != sourceReg))
+ {
+ emit->emitIns_R_R(ins, movSize, targetReg, sourceReg);
+ }
+
+ genProduceReg(treeNode);
}
//------------------------------------------------------------------------
@@ -952,7 +1609,39 @@ void CodeGen::genIntToIntCast(GenTreePtr treeNode)
//
void CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
{
- NYI("Cast");
+ // float <--> double conversions are always non-overflow ones
+ assert(treeNode->OperGet() == GT_CAST);
+ assert(!treeNode->gtOverflow());
+
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(genIsValidFloatReg(targetReg));
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ assert(!op1->isContained()); // Cannot be contained
+ assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg.
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = op1->TypeGet();
+ assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
+
+ genConsumeOperands(treeNode->AsOp());
+
+ // treeNode must be a reg
+ assert(!treeNode->isContained());
+
+ if (srcType != dstType)
+ {
+ instruction insVcvt = (srcType == TYP_FLOAT) ? INS_vcvt_f2d // convert Float to Double
+ : INS_vcvt_d2f; // convert Double to Float
+
+ getEmitter()->emitIns_R_R(insVcvt, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum);
+ }
+ else if (treeNode->gtRegNum != op1->gtRegNum)
+ {
+ getEmitter()->emitIns_R_R(INS_vmov, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum);
+ }
+
+ genProduceReg(treeNode);
}
//------------------------------------------------------------------------
@@ -971,7 +1660,69 @@ void CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
//
void CodeGen::genIntToFloatCast(GenTreePtr treeNode)
{
- NYI("Cast");
+ // int --> float/double conversions are always non-overflow ones
+ assert(treeNode->OperGet() == GT_CAST);
+ assert(!treeNode->gtOverflow());
+
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(genIsValidFloatReg(targetReg));
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ assert(!op1->isContained()); // Cannot be contained
+ assert(genIsValidIntReg(op1->gtRegNum)); // Must be a valid int reg.
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = op1->TypeGet();
+ assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
+
+ // force the srcType to unsigned if GT_UNSIGNED flag is set
+ if (treeNode->gtFlags & GTF_UNSIGNED)
+ {
+ srcType = genUnsignedType(srcType);
+ }
+
+ // We should never see a srcType whose size is neither EA_4BYTE or EA_8BYTE
+ // For conversions from small types (byte/sbyte/int16/uint16) to float/double,
+ // we expect the front-end or lowering phase to have generated two levels of cast.
+ //
+ emitAttr srcSize = EA_ATTR(genTypeSize(srcType));
+ noway_assert((srcSize == EA_4BYTE) || (srcSize == EA_8BYTE));
+
+ instruction insVcvt = INS_invalid;
+
+ if (dstType == TYP_DOUBLE)
+ {
+ if (srcSize == EA_4BYTE)
+ {
+ insVcvt = (varTypeIsUnsigned(srcType)) ? INS_vcvt_u2d : INS_vcvt_i2d;
+ }
+ else
+ {
+ assert(srcSize == EA_8BYTE);
+ NYI_ARM("Casting int64/uint64 to double in genIntToFloatCast");
+ }
+ }
+ else
+ {
+ assert(dstType == TYP_FLOAT);
+ if (srcSize == EA_4BYTE)
+ {
+ insVcvt = (varTypeIsUnsigned(srcType)) ? INS_vcvt_u2f : INS_vcvt_i2f;
+ }
+ else
+ {
+ assert(srcSize == EA_8BYTE);
+ NYI_ARM("Casting int64/uint64 to float in genIntToFloatCast");
+ }
+ }
+
+ genConsumeOperands(treeNode->AsOp());
+
+ assert(insVcvt != INS_invalid);
+ getEmitter()->emitIns_R_R(INS_vmov_i2f, srcSize, treeNode->gtRegNum, op1->gtRegNum);
+ getEmitter()->emitIns_R_R(insVcvt, srcSize, treeNode->gtRegNum, treeNode->gtRegNum);
+
+ genProduceReg(treeNode);
}
//------------------------------------------------------------------------
@@ -990,31 +1741,72 @@ void CodeGen::genIntToFloatCast(GenTreePtr treeNode)
//
void CodeGen::genFloatToIntCast(GenTreePtr treeNode)
{
- NYI("Cast");
-}
+ // we don't expect to see overflow detecting float/double --> int type conversions here
+ // as they should have been converted into helper calls by front-end.
+ assert(treeNode->OperGet() == GT_CAST);
+ assert(!treeNode->gtOverflow());
-/*****************************************************************************
- *
- * Create and record GC Info for the function.
- */
-#ifdef JIT32_GCENCODER
-void*
-#else
-void
-#endif
-CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
-{
-#ifdef JIT32_GCENCODER
- return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
-#else
- genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
-#endif
-}
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(genIsValidIntReg(targetReg)); // Must be a valid int reg.
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ assert(!op1->isContained()); // Cannot be contained
+ assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg.
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = op1->TypeGet();
+ assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType));
+
+ // We should never see a dstType whose size is neither EA_4BYTE or EA_8BYTE
+ // For conversions to small types (byte/sbyte/int16/uint16) from float/double,
+ // we expect the front-end or lowering phase to have generated two levels of cast.
+ //
+ emitAttr dstSize = EA_ATTR(genTypeSize(dstType));
+ noway_assert((dstSize == EA_4BYTE) || (dstSize == EA_8BYTE));
+
+ instruction insVcvt = INS_invalid;
+
+ if (srcType == TYP_DOUBLE)
+ {
+ if (dstSize == EA_4BYTE)
+ {
+ insVcvt = (varTypeIsUnsigned(dstType)) ? INS_vcvt_d2u : INS_vcvt_d2i;
+ }
+ else
+ {
+ assert(dstSize == EA_8BYTE);
+ NYI_ARM("Casting double to int64/uint64 in genIntToFloatCast");
+ }
+ }
+ else
+ {
+ assert(srcType == TYP_FLOAT);
+ if (dstSize == EA_4BYTE)
+ {
+ insVcvt = (varTypeIsUnsigned(dstType)) ? INS_vcvt_f2u : INS_vcvt_f2i;
+ }
+ else
+ {
+ assert(dstSize == EA_8BYTE);
+ NYI_ARM("Casting float to int64/uint64 in genIntToFloatCast");
+ }
+ }
-// TODO-ARM-Cleanup: It seems that the ARM JIT (classic and otherwise) uses this method, so it seems to be
-// inappropriately named?
+ genConsumeOperands(treeNode->AsOp());
+
+ assert(insVcvt != INS_invalid);
+ getEmitter()->emitIns_R_R(insVcvt, dstSize, op1->gtRegNum, op1->gtRegNum);
+ getEmitter()->emitIns_R_R(INS_vmov_f2i, dstSize, treeNode->gtRegNum, op1->gtRegNum);
+
+ genProduceReg(treeNode);
+}
-void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
+//------------------------------------------------------------------------
+// genCreateAndStoreGCInfo: Create and record GC Info for the function.
+//
+void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
+ unsigned prologSize,
+ unsigned epilogSize DEBUGARG(void* codePtr))
{
IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC)
@@ -1039,20 +1831,73 @@ void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize
compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
}
-/*****************************************************************************
- * Emit a call to a helper function.
- */
-
-void CodeGen::genEmitHelperCall(unsigned helper,
- int argSize,
- emitAttr retSize
-#ifndef LEGACY_BACKEND
- ,
- regNumber callTargetReg /*= REG_NA */
-#endif // !LEGACY_BACKEND
- )
+//------------------------------------------------------------------------
+// genEmitHelperCall: Emit a call to a helper function.
+//
+void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg /*= REG_NA */)
{
- NYI("Helper call");
+ // Can we call the helper function directly
+
+ void *addr = NULL, **pAddr = NULL;
+
+#if defined(DEBUG) && defined(PROFILING_SUPPORTED)
+ // Don't ask VM if it hasn't requested ELT hooks
+ if (!compiler->compProfilerHookNeeded && compiler->opts.compJitELTHookEnabled &&
+ (helper == CORINFO_HELP_PROF_FCN_ENTER || helper == CORINFO_HELP_PROF_FCN_LEAVE ||
+ helper == CORINFO_HELP_PROF_FCN_TAILCALL))
+ {
+ addr = compiler->compProfilerMethHnd;
+ }
+ else
+#endif
+ {
+ addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, (void**)&pAddr);
+ }
+
+ if (!addr || !arm_Valid_Imm_For_BL((ssize_t)addr))
+ {
+ if (callTargetReg == REG_NA)
+ {
+ // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but
+ // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET.
+ callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET;
+ }
+
+ // Load the address into a register and call through a register
+ if (addr)
+ {
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, callTargetReg, (ssize_t)addr);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, callTargetReg, (ssize_t)pAddr);
+ regTracker.rsTrackRegTrash(callTargetReg);
+ }
+
+ getEmitter()->emitIns_Call(emitter::EC_INDIR_R, compiler->eeFindHelper(helper),
+ INDEBUG_LDISASM_COMMA(nullptr) NULL, // addr
+ argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur,
+ BAD_IL_OFFSET, // ilOffset
+ callTargetReg, // ireg
+ REG_NA, 0, 0, // xreg, xmul, disp
+ false, // isJump
+ emitter::emitNoGChelper(helper),
+ (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
+ }
+ else
+ {
+ getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, compiler->eeFindHelper(helper),
+ INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, retSize, gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA, 0,
+ 0, /* ilOffset, ireg, xreg, xmul, disp */
+ false, /* isJump */
+ emitter::emitNoGChelper(helper),
+ (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
+ }
+
+ regTracker.rsTrashRegSet(RBM_CALLEE_TRASH);
+ regTracker.rsTrashRegsForGCInterruptability();
}
#endif // _TARGET_ARM_
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp
index cc7c5dc..71c6dd1 100644
--- a/src/jit/codegenarm64.cpp
+++ b/src/jit/codegenarm64.cpp
@@ -1326,7 +1326,7 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
genDefineTempLabel(gsCheckBlk);
}
-BasicBlock* CodeGen::genCallFinally(BasicBlock* block, BasicBlock* lblk)
+BasicBlock* CodeGen::genCallFinally(BasicBlock* block)
{
// Generate a call to the finally, like this:
// mov x0,qword ptr [fp + 10H] / sp // Load x0 with PSPSym, or sp if PSPSym is not used
@@ -1387,8 +1387,6 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block, BasicBlock* lblk)
if (!(block->bbFlags & BBF_RETLESS_CALL))
{
assert(block->isBBCallAlwaysPair());
-
- lblk = block;
block = block->bbNext;
}
return block;
@@ -1918,6 +1916,10 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
emitter* emit = getEmitter();
#ifdef DEBUG
+ // Validate that all the operands for the current node are consumed in order.
+ // This is important because LSRA ensures that any necessary copies will be
+ // handled correctly.
+ lastConsumedNode = nullptr;
if (compiler->verbose)
{
unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio
@@ -2262,7 +2264,6 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
else
{
assert(!data->isContained());
- genConsumeReg(data);
dataReg = data->gtRegNum;
}
assert(dataReg != REG_NA);
@@ -2314,7 +2315,6 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
else
{
assert(!data->isContained());
- genConsumeReg(data);
dataReg = data->gtRegNum;
}
assert(dataReg != REG_NA);
@@ -2423,8 +2423,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
var_types op1Type = op1->TypeGet();
var_types op2Type = op2->TypeGet();
- assert(!op1->isContainedMemoryOp());
- assert(!op2->isContainedMemoryOp());
+ assert(!op1->isUsedFromMemory());
+ assert(!op2->isUsedFromMemory());
genConsumeOperands(tree);
@@ -3798,8 +3798,8 @@ void CodeGen::genRangeCheck(GenTreePtr oper)
GenTree * src1, *src2;
emitJumpKind jmpKind;
- genConsumeRegs(arrLen);
genConsumeRegs(arrIndex);
+ genConsumeRegs(arrLen);
if (arrIndex->isContainedIntOrIImmed())
{
@@ -3951,14 +3951,14 @@ void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
if (!offsetNode->IsIntegralConst(0))
{
- emitter* emit = getEmitter();
- GenTreePtr arrObj = arrOffset->gtArrObj;
- regNumber arrReg = genConsumeReg(arrObj);
- noway_assert(arrReg != REG_NA);
+ emitter* emit = getEmitter();
regNumber offsetReg = genConsumeReg(offsetNode);
noway_assert(offsetReg != REG_NA);
regNumber indexReg = genConsumeReg(indexNode);
noway_assert(indexReg != REG_NA);
+ GenTreePtr arrObj = arrOffset->gtArrObj;
+ regNumber arrReg = genConsumeReg(arrObj);
+ noway_assert(arrReg != REG_NA);
regMaskTP tmpRegMask = arrOffset->gtRsvdRegs;
regNumber tmpReg = genRegNumFromMask(tmpRegMask);
noway_assert(tmpReg != REG_NA);
@@ -4118,12 +4118,11 @@ void CodeGen::genCodeForShift(GenTreePtr tree)
assert(tree->gtRegNum != REG_NA);
GenTreePtr operand = tree->gtGetOp1();
- genConsumeReg(operand);
+ genConsumeOperands(tree->AsOp());
GenTreePtr shiftBy = tree->gtGetOp2();
if (!shiftBy->IsCnsIntOrI())
{
- genConsumeReg(shiftBy);
getEmitter()->emitIns_R_R_R(ins, size, tree->gtRegNum, operand->gtRegNum, shiftBy->gtRegNum);
}
else
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
index 2409115..b1e474b 100644
--- a/src/jit/codegencommon.cpp
+++ b/src/jit/codegencommon.cpp
@@ -2475,6 +2475,10 @@ emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind
EJ_jle, // GT_LE
EJ_jge, // GT_GE
EJ_jg, // GT_GT
+#ifndef LEGACY_BACKEND
+ EJ_je, // GT_TEST_EQ
+ EJ_jne, // GT_TEST_NE
+#endif
#elif defined(_TARGET_ARMARCH_)
EJ_eq, // GT_EQ
EJ_ne, // GT_NE
@@ -2494,6 +2498,10 @@ emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind
EJ_jbe, // GT_LE
EJ_jae, // GT_GE
EJ_ja, // GT_GT
+#ifndef LEGACY_BACKEND
+ EJ_je, // GT_TEST_EQ
+ EJ_jne, // GT_TEST_NE
+#endif
#elif defined(_TARGET_ARMARCH_)
EJ_eq, // GT_EQ
EJ_ne, // GT_NE
@@ -2513,6 +2521,10 @@ emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind
EJ_NONE, // GT_LE
EJ_jns, // GT_GE (S == 0)
EJ_NONE, // GT_GT
+#ifndef LEGACY_BACKEND
+ EJ_NONE, // GT_TEST_EQ
+ EJ_NONE, // GT_TEST_NE
+#endif
#elif defined(_TARGET_ARMARCH_)
EJ_eq, // GT_EQ (Z == 1)
EJ_ne, // GT_NE (Z == 0)
@@ -2530,6 +2542,10 @@ emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind
assert(genJCCinsSigned[GT_LE - GT_EQ] == EJ_jle);
assert(genJCCinsSigned[GT_GE - GT_EQ] == EJ_jge);
assert(genJCCinsSigned[GT_GT - GT_EQ] == EJ_jg);
+#ifndef LEGACY_BACKEND
+ assert(genJCCinsSigned[GT_TEST_EQ - GT_EQ] == EJ_je);
+ assert(genJCCinsSigned[GT_TEST_NE - GT_EQ] == EJ_jne);
+#endif
assert(genJCCinsUnsigned[GT_EQ - GT_EQ] == EJ_je);
assert(genJCCinsUnsigned[GT_NE - GT_EQ] == EJ_jne);
@@ -2537,6 +2553,10 @@ emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind
assert(genJCCinsUnsigned[GT_LE - GT_EQ] == EJ_jbe);
assert(genJCCinsUnsigned[GT_GE - GT_EQ] == EJ_jae);
assert(genJCCinsUnsigned[GT_GT - GT_EQ] == EJ_ja);
+#ifndef LEGACY_BACKEND
+ assert(genJCCinsUnsigned[GT_TEST_EQ - GT_EQ] == EJ_je);
+ assert(genJCCinsUnsigned[GT_TEST_NE - GT_EQ] == EJ_jne);
+#endif
assert(genJCCinsLogical[GT_EQ - GT_EQ] == EJ_je);
assert(genJCCinsLogical[GT_NE - GT_EQ] == EJ_jne);
@@ -3145,12 +3165,17 @@ void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
/* Check our max stack level. Needed for fgAddCodeRef().
We need to relax the assert as our estimation won't include code-gen
stack changes (which we know don't affect fgAddCodeRef()) */
- noway_assert(getEmitter()->emitMaxStackDepth <=
- (compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments.
- compiler->compHndBBtabCount + // Return address for locally-called finallys
- genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc
- (compiler->compTailCallUsed ? 4 : 0))); // CORINFO_HELP_TAILCALL args
+ {
+ unsigned maxAllowedStackDepth = compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments.
+ compiler->compHndBBtabCount + // Return address for locally-called finallys
+ genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc
+ (compiler->compTailCallUsed ? 4 : 0); // CORINFO_HELP_TAILCALL args
+#if defined(UNIX_X86_ABI)
+ maxAllowedStackDepth += genTypeStSz(TYP_INT) * 3; // stack align for x86 - allow up to 3 INT's for padding
#endif
+ noway_assert(getEmitter()->emitMaxStackDepth <= maxAllowedStackDepth);
+ }
+#endif // EMIT_TRACK_STACK_DEPTH
*nativeSizeOfCode = codeSize;
compiler->info.compNativeCodeSize = (UNATIVE_OFFSET)codeSize;
@@ -10241,6 +10266,66 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
// Look in CodeGenArm64.cpp
+#elif defined(_TARGET_X86_)
+
+/*****************************************************************************
+ *
+ * Generates code for an EH funclet prolog.
+ */
+
+void CodeGen::genFuncletProlog(BasicBlock* block)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In genFuncletProlog()\n");
+ }
+#endif
+
+ ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
+
+ compiler->unwindBegProlog();
+
+ // TODO Save callee-saved registers
+
+ // This is the end of the OS-reported prolog for purposes of unwinding
+ compiler->unwindEndProlog();
+}
+
+/*****************************************************************************
+ *
+ * Generates code for an EH funclet epilog.
+ */
+
+void CodeGen::genFuncletEpilog()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In genFuncletEpilog()\n");
+ }
+#endif
+
+ ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
+
+ // TODO Restore callee-saved registers
+
+ instGen_Return(0);
+}
+
+/*****************************************************************************
+ *
+ * Capture the information used to generate the funclet prologs and epilogs.
+ */
+
+void CodeGen::genCaptureFuncletPrologEpilogInfo()
+{
+ if (!compiler->ehAnyFunclets())
+ {
+ return;
+ }
+}
+
#else // _TARGET_*
/*****************************************************************************
@@ -10583,6 +10668,7 @@ GenTreePtr CodeGen::genMakeConst(const void* cnsAddr, var_types cnsType, GenTree
// funclet frames: this will be FuncletInfo.fiSpDelta.
void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize)
{
+ genVzeroupperIfNeeded(false);
regMaskTP regMask = compiler->compCalleeFPRegsSavedMask;
// Only callee saved floating point registers should be in regMask
@@ -10621,16 +10707,6 @@ void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize)
offset -= XMM_REGSIZE_BYTES;
}
}
-
-#ifdef FEATURE_AVX_SUPPORT
- // Just before restoring float registers issue a Vzeroupper to zero out upper 128-bits of all YMM regs.
- // This is to avoid penalty if this routine is using AVX-256 and now returning to a routine that is
- // using SSE2.
- if (compiler->getFloatingPointInstructionSet() == InstructionSet_AVX)
- {
- instGen(INS_vzeroupper);
- }
-#endif
}
// Save/Restore compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working
@@ -10651,6 +10727,7 @@ void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize)
// fast path return
if (regMask == RBM_NONE)
{
+ genVzeroupperIfNeeded();
return;
}
@@ -10682,16 +10759,6 @@ void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize)
assert((offset % 16) == 0);
#endif // _TARGET_AMD64_
-#ifdef FEATURE_AVX_SUPPORT
- // Just before restoring float registers issue a Vzeroupper to zero out upper 128-bits of all YMM regs.
- // This is to avoid penalty if this routine is using AVX-256 and now returning to a routine that is
- // using SSE2.
- if (compiler->getFloatingPointInstructionSet() == InstructionSet_AVX)
- {
- instGen(INS_vzeroupper);
- }
-#endif
-
for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg))
{
regMaskTP regBit = genRegMask(reg);
@@ -10706,7 +10773,41 @@ void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize)
offset -= XMM_REGSIZE_BYTES;
}
}
+ genVzeroupperIfNeeded();
}
+
+// Generate Vzeroupper instruction as needed to zero out upper 128b-bit of all YMM registers so that the
+// AVX/Legacy SSE transition penalties can be avoided. This function is been used in genPreserveCalleeSavedFltRegs
+// (prolog) and genRestoreCalleeSavedFltRegs (epilog). Issue VZEROUPPER in Prolog if the method contains
+// 128-bit or 256-bit AVX code, to avoid legacy SSE to AVX transition penalty, which could happen when native
+// code contains legacy SSE code calling into JIT AVX code (e.g. reverse pinvoke). Issue VZEROUPPER in Epilog
+// if the method contains 256-bit AVX code, to avoid AVX to legacy SSE transition penalty.
+//
+// Params
+// check256bitOnly - true to check if the function contains 256-bit AVX instruction and generate Vzeroupper
+// instruction, false to check if the function contains AVX instruciton (either 128-bit or 256-bit).
+//
+void CodeGen::genVzeroupperIfNeeded(bool check256bitOnly /* = true*/)
+{
+#ifdef FEATURE_AVX_SUPPORT
+ bool emitVzeroUpper = false;
+ if (check256bitOnly)
+ {
+ emitVzeroUpper = getEmitter()->Contains256bitAVX();
+ }
+ else
+ {
+ emitVzeroUpper = getEmitter()->ContainsAVX();
+ }
+
+ if (emitVzeroUpper)
+ {
+ assert(compiler->getSIMDInstructionSet() == InstructionSet_AVX);
+ instGen(INS_vzeroupper);
+ }
+#endif
+}
+
#endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
//-----------------------------------------------------------------------------------
diff --git a/src/jit/codegenlegacy.cpp b/src/jit/codegenlegacy.cpp
index 667b9d4..0530863 100644
--- a/src/jit/codegenlegacy.cpp
+++ b/src/jit/codegenlegacy.cpp
@@ -1837,6 +1837,15 @@ void CodeGen::genRangeCheck(GenTreePtr oper)
GenTreePtr arrRef = NULL;
int lenOffset = 0;
+ /* Is the array index a constant value? */
+ GenTreePtr index = bndsChk->gtIndex;
+ if (!index->IsCnsIntOrI())
+ {
+ // No, it's not a constant.
+ genCodeForTree(index, RBM_ALLINT);
+ regSet.rsMarkRegUsed(index);
+ }
+
// If "arrLen" is a ARR_LENGTH operation, get the array whose length that takes in a register.
// Otherwise, if the length is not a constant, get it (the length, not the arr reference) in
// a register.
@@ -1884,14 +1893,8 @@ void CodeGen::genRangeCheck(GenTreePtr oper)
}
}
- /* Is the array index a constant value? */
- GenTreePtr index = bndsChk->gtIndex;
if (!index->IsCnsIntOrI())
{
- // No, it's not a constant.
- genCodeForTree(index, RBM_ALLINT);
- regSet.rsMarkRegUsed(index);
-
// If we need "arrRef" or "arrLen", and evaluating "index" displaced whichever of them we're using
// from its register, get it back in a register.
if (arrRef != NULL)
@@ -1983,6 +1986,11 @@ void CodeGen::genRangeCheck(GenTreePtr oper)
}
// Free the registers that were used.
+ if (!index->IsCnsIntOrI())
+ {
+ regSet.rsMarkRegFree(index->gtRegNum, index);
+ }
+
if (arrRef != NULL)
{
regSet.rsMarkRegFree(arrRef->gtRegNum, arrRef);
@@ -1991,11 +1999,6 @@ void CodeGen::genRangeCheck(GenTreePtr oper)
{
regSet.rsMarkRegFree(arrLen->gtRegNum, arrLen);
}
-
- if (!index->IsCnsIntOrI())
- {
- regSet.rsMarkRegFree(index->gtRegNum, index);
- }
}
/*****************************************************************************
@@ -2590,7 +2593,7 @@ regMaskTP CodeGen::genRestoreAddrMode(GenTreePtr addr, GenTreePtr tree, bool loc
if (tree->gtOp.gtOp1)
regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp1, lockPhase);
- if (tree->gtGetOp2())
+ if (tree->gtGetOp2IfPresent())
regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp2, lockPhase);
}
else if (tree->gtOper == GT_ARR_ELEM)
@@ -3039,7 +3042,7 @@ AGAIN:
noway_assert(kind & GTK_SMPOP);
- if (tree->gtGetOp2())
+ if (tree->gtGetOp2IfPresent())
{
genEvalSideEffects(tree->gtOp.gtOp1);
@@ -9689,7 +9692,7 @@ void CodeGen::genCodeForTreeSmpOp(GenTreePtr tree, regMaskTP destReg, regMaskTP
const genTreeOps oper = tree->OperGet();
const var_types treeType = tree->TypeGet();
GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr op2 = tree->gtGetOp2IfPresent();
regNumber reg = DUMMY_INIT(REG_CORRUPT);
regMaskTP regs = regSet.rsMaskUsed;
regMaskTP needReg = destReg;
@@ -13394,7 +13397,7 @@ void CodeGen::genCodeForTreeLng(GenTreePtr tree, regMaskTP needReg, regMaskTP av
int helper;
GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr op2 = tree->gtGetOp2IfPresent();
switch (oper)
{
@@ -14538,79 +14541,6 @@ void CodeGen::genCodeForTreeLng(GenTreePtr tree, regMaskTP needReg, regMaskTP av
goto DONE;
-#if LONG_ASG_OPS
-
- case GT_ASG_OR:
- insLo = insHi = INS_OR;
- goto ASG_OPR;
- case GT_ASG_XOR:
- insLo = insHi = INS_XOR;
- goto ASG_OPR;
- case GT_ASG_AND:
- insLo = insHi = INS_AND;
- goto ASG_OPR;
- case GT_ASG_SUB:
- insLo = INS_sub;
- insHi = INS_SUBC;
- goto ASG_OPR;
- case GT_ASG_ADD:
- insLo = INS_add;
- insHi = INS_ADDC;
- goto ASG_OPR;
-
- ASG_OPR:
-
- if (op2->gtOper == GT_CNS_LNG)
- {
- __int64 lval = op2->gtLngCon.gtLconVal;
-
- /* Make the target addressable */
-
- addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG);
-
- /* Optimize some special cases */
-
- doLo = doHi = true;
-
- /* Check for "(op1 AND -1)" and "(op1 [X]OR 0)" */
-
- switch (oper)
- {
- case GT_ASG_AND:
- if ((int)(lval) == -1)
- doLo = false;
- if ((int)(lval >> 32) == -1)
- doHi = false;
- break;
-
- case GT_ASG_OR:
- case GT_ASG_XOR:
- if (!(lval & 0x00000000FFFFFFFF))
- doLo = false;
- if (!(lval & 0xFFFFFFFF00000000))
- doHi = false;
- break;
- }
-
- if (doLo)
- inst_TT_IV(insLo, op1, (int)(lval), 0);
- if (doHi)
- inst_TT_IV(insHi, op1, (int)(lval >> 32), 4);
-
- bool isArith = (oper == GT_ASG_ADD || oper == GT_ASG_SUB);
- if (doLo || doHi)
- tree->gtFlags |= GTF_ZSF_SET;
-
- genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
- goto DONE_ASSG_REGS;
- }
-
- /* TODO: allow non-const long assignment operators */
-
- noway_assert(!"non-const long asgop NYI");
-
-#endif // LONG_ASG_OPS
-
case GT_IND:
case GT_NULLCHECK:
{
@@ -20725,27 +20655,26 @@ bool CodeGen::genRegTrashable(regNumber reg, GenTreePtr tree)
*/
GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode, // The node to start walking with.
- GenTreePtr relopNode, // The node before the startNode.
+ GenTreePtr relopNode) // The node before the startNode.
// (It should either be NULL or
// a GTF_RELOP_QMARK node.)
- GenTreePtr asgdLclVar)
{
GenTreePtr tree;
VARSET_TP VARSET_INIT(this, defSet_BeforeSplit, fgCurDefSet); // Store the current fgCurDefSet and fgCurUseSet so
VARSET_TP VARSET_INIT(this, useSet_BeforeSplit, fgCurUseSet); // we can restore then before entering the elseTree.
- bool heapUse_BeforeSplit = fgCurHeapUse;
- bool heapDef_BeforeSplit = fgCurHeapDef;
- bool heapHavoc_BeforeSplit = fgCurHeapHavoc;
+ MemoryKindSet memoryUse_BeforeSplit = fgCurMemoryUse;
+ MemoryKindSet memoryDef_BeforeSplit = fgCurMemoryDef;
+ MemoryKindSet memoryHavoc_BeforeSplit = fgCurMemoryHavoc;
VARSET_TP VARSET_INIT_NOCOPY(defSet_AfterThenTree, VarSetOps::MakeEmpty(this)); // These two variables will store
// the USE and DEF sets after
VARSET_TP VARSET_INIT_NOCOPY(useSet_AfterThenTree, VarSetOps::MakeEmpty(this)); // evaluating the thenTree.
- bool heapUse_AfterThenTree = fgCurHeapUse;
- bool heapDef_AfterThenTree = fgCurHeapDef;
- bool heapHavoc_AfterThenTree = fgCurHeapHavoc;
+ MemoryKindSet memoryUse_AfterThenTree = fgCurMemoryUse;
+ MemoryKindSet memoryDef_AfterThenTree = fgCurMemoryDef;
+ MemoryKindSet memoryHavoc_AfterThenTree = fgCurMemoryHavoc;
// relopNode is either NULL or a GTF_RELOP_QMARK node.
assert(!relopNode || (relopNode->OperKind() & GTK_RELOP) && (relopNode->gtFlags & GTF_RELOP_QMARK));
@@ -20772,9 +20701,9 @@ GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode,
VarSetOps::IntersectionD(this, fgCurDefSet, defSet_AfterThenTree);
VarSetOps::UnionD(this, fgCurUseSet, useSet_AfterThenTree);
- fgCurHeapDef = fgCurHeapDef && heapDef_AfterThenTree;
- fgCurHeapHavoc = fgCurHeapHavoc && heapHavoc_AfterThenTree;
- fgCurHeapUse = fgCurHeapUse || heapUse_AfterThenTree;
+ fgCurMemoryDef = fgCurMemoryDef & memoryDef_AfterThenTree;
+ fgCurMemoryHavoc = fgCurMemoryHavoc & memoryHavoc_AfterThenTree;
+ fgCurMemoryUse = fgCurMemoryUse | memoryUse_AfterThenTree;
// Return the GT_QMARK node itself so the caller can continue from there.
// NOTE: the caller will get to the next node by doing the "tree = tree->gtNext"
@@ -20791,16 +20720,16 @@ GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode,
VarSetOps::Assign(this, defSet_AfterThenTree, fgCurDefSet);
VarSetOps::Assign(this, useSet_AfterThenTree, fgCurUseSet);
- heapDef_AfterThenTree = fgCurHeapDef;
- heapHavoc_AfterThenTree = fgCurHeapHavoc;
- heapUse_AfterThenTree = fgCurHeapUse;
+ memoryDef_AfterThenTree = fgCurMemoryDef;
+ memoryHavoc_AfterThenTree = fgCurMemoryHavoc;
+ memoryUse_AfterThenTree = fgCurMemoryUse;
VarSetOps::Assign(this, fgCurDefSet, defSet_BeforeSplit);
VarSetOps::Assign(this, fgCurUseSet, useSet_BeforeSplit);
- fgCurHeapDef = heapDef_BeforeSplit;
- fgCurHeapHavoc = heapHavoc_BeforeSplit;
- fgCurHeapUse = heapUse_BeforeSplit;
+ fgCurMemoryDef = memoryDef_BeforeSplit;
+ fgCurMemoryHavoc = memoryHavoc_BeforeSplit;
+ fgCurMemoryUse = memoryUse_BeforeSplit;
break;
@@ -20810,43 +20739,43 @@ GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode,
case GT_LCL_FLD_ADDR:
case GT_STORE_LCL_VAR:
case GT_STORE_LCL_FLD:
- fgMarkUseDef(tree->AsLclVarCommon(), asgdLclVar);
+ fgMarkUseDef(tree->AsLclVarCommon());
break;
case GT_CLS_VAR:
- // For Volatile indirection, first mutate the global heap
+ // For Volatile indirection, first mutate GcHeap/ByrefExposed
// see comments in ValueNum.cpp (under case GT_CLS_VAR)
// This models Volatile reads as def-then-use of the heap.
// and allows for a CSE of a subsequent non-volatile read
if ((tree->gtFlags & GTF_FLD_VOLATILE) != 0)
{
// For any Volatile indirection, we must handle it as a
- // definition of the global heap
- fgCurHeapDef = true;
+ // definition of GcHeap/ByrefExposed
+ fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
}
// If the GT_CLS_VAR is the lhs of an assignment, we'll handle it as a heap def, when we get to
// assignment.
// Otherwise, we treat it as a use here.
- if (!fgCurHeapDef && (tree->gtFlags & GTF_CLS_VAR_ASG_LHS) == 0)
+ if ((tree->gtFlags & GTF_CLS_VAR_ASG_LHS) == 0)
{
- fgCurHeapUse = true;
+ fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
}
break;
case GT_IND:
- // For Volatile indirection, first mutate the global heap
+ // For Volatile indirection, first mutate GcHeap/ByrefExposed
// see comments in ValueNum.cpp (under case GT_CLS_VAR)
// This models Volatile reads as def-then-use of the heap.
// and allows for a CSE of a subsequent non-volatile read
if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
{
// For any Volatile indirection, we must handle it as a
- // definition of the global heap
- fgCurHeapDef = true;
+ // definition of GcHeap/ByrefExposed
+ fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
}
// If the GT_IND is the lhs of an assignment, we'll handle it
- // as a heap def, when we get to assignment.
+ // as a heap/byref def, when we get to assignment.
// Otherwise, we treat it as a use here.
if ((tree->gtFlags & GTF_IND_ASG_LHS) == 0)
{
@@ -20855,16 +20784,13 @@ GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode,
GenTreePtr addrArg = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
if (!addrArg->DefinesLocalAddr(this, /*width doesn't matter*/ 0, &dummyLclVarTree, &dummyIsEntire))
{
- if (!fgCurHeapDef)
- {
- fgCurHeapUse = true;
- }
+ fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
}
else
{
// Defines a local addr
assert(dummyLclVarTree != nullptr);
- fgMarkUseDef(dummyLclVarTree->AsLclVarCommon(), asgdLclVar);
+ fgMarkUseDef(dummyLclVarTree->AsLclVarCommon());
}
}
break;
@@ -20875,25 +20801,23 @@ GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode,
unreached();
break;
- // We'll assume these are use-then-defs of the heap.
+ // We'll assume these are use-then-defs of GcHeap/ByrefExposed.
case GT_LOCKADD:
case GT_XADD:
case GT_XCHG:
case GT_CMPXCHG:
- if (!fgCurHeapDef)
- {
- fgCurHeapUse = true;
- }
- fgCurHeapDef = true;
- fgCurHeapHavoc = true;
+ fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
+ fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
+ fgCurMemoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
break;
case GT_MEMORYBARRIER:
- // Simliar to any Volatile indirection, we must handle this as a definition of the global heap
- fgCurHeapDef = true;
+ // Simliar to any Volatile indirection, we must handle this as a definition of GcHeap/ByrefExposed
+ fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
break;
- // For now, all calls read/write the heap, the latter in its entirety. Might tighten this case later.
+ // For now, all calls read/write GcHeap/ByrefExposed, writes in their entirety. Might tighten this case
+ // later.
case GT_CALL:
{
GenTreeCall* call = tree->AsCall();
@@ -20909,12 +20833,9 @@ GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode,
}
if (modHeap)
{
- if (!fgCurHeapDef)
- {
- fgCurHeapUse = true;
- }
- fgCurHeapDef = true;
- fgCurHeapHavoc = true;
+ fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
+ fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
+ fgCurMemoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
}
}
@@ -20946,14 +20867,26 @@ GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode,
default:
- // Determine whether it defines a heap location.
+ // Determine what memory kinds it defines.
if (tree->OperIsAssignment() || tree->OperIsBlkOp())
{
GenTreeLclVarCommon* dummyLclVarTree = NULL;
- if (!tree->DefinesLocal(this, &dummyLclVarTree))
+ if (tree->DefinesLocal(this, &dummyLclVarTree))
+ {
+ if (lvaVarAddrExposed(dummyLclVarTree->gtLclNum))
+ {
+ fgCurMemoryDef |= memoryKindSet(ByrefExposed);
+
+ // We've found a store that modifies ByrefExposed
+ // memory but not GcHeap memory, so track their
+ // states separately.
+ byrefStatesMatchGcHeapStates = false;
+ }
+ }
+ else
{
- // If it doesn't define a local, then it might update the heap.
- fgCurHeapDef = true;
+ // If it doesn't define a local, then it might update GcHeap/ByrefExposed.
+ fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
}
}
@@ -20967,7 +20900,7 @@ GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode,
// fgCurDefSet and fgCurUseSet into local variables defSet_BeforeSplit and useSet_BeforeSplit.
// The cached values will be used to restore fgCurDefSet and fgCurUseSet once we see the GT_COLON
// node.
- tree = fgLegacyPerStatementLocalVarLiveness(tree->gtNext, tree, asgdLclVar);
+ tree = fgLegacyPerStatementLocalVarLiveness(tree->gtNext, tree);
// We must have been returned here after seeing a GT_QMARK node.
noway_assert(tree->gtOper == GT_QMARK);
diff --git a/src/jit/codegenlinear.cpp b/src/jit/codegenlinear.cpp
index 9713288..329c4a7 100644
--- a/src/jit/codegenlinear.cpp
+++ b/src/jit/codegenlinear.cpp
@@ -133,9 +133,8 @@ void CodeGen::genCodeForBBlist()
*/
BasicBlock* block;
- BasicBlock* lblk; /* previous block */
- for (lblk = nullptr, block = compiler->fgFirstBB; block != nullptr; lblk = block, block = block->bbNext)
+ for (block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
{
#ifdef DEBUG
if (compiler->verbose)
@@ -284,7 +283,7 @@ void CodeGen::genCodeForBBlist()
}
#endif
// We should never have a block that falls through into the Cold section
- noway_assert(!lblk->bbFallsThrough());
+ noway_assert(!block->bbPrev->bbFallsThrough());
// We require the block that starts the Cold section to have a label
noway_assert(block->bbEmitCookie);
@@ -602,7 +601,7 @@ void CodeGen::genCodeForBBlist()
break;
case BBJ_CALLFINALLY:
- block = genCallFinally(block, lblk);
+ block = genCallFinally(block);
break;
#if FEATURE_EH_FUNCLETS
@@ -906,6 +905,13 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
// Load local variable from its home location.
inst_RV_TT(ins, dstReg, unspillTree, 0, attr);
+#elif defined(_TARGET_ARM_)
+ var_types targetType = unspillTree->gtType;
+ instruction ins = ins_Load(targetType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum));
+ emitAttr attr = emitTypeSize(targetType);
+
+ // Load local variable from its home location.
+ inst_RV_TT(ins, dstReg, unspillTree, 0, attr);
#else
NYI("Unspilling not implemented for this target architecture.");
#endif
@@ -1203,22 +1209,16 @@ void CodeGen::genConsumeRegs(GenTree* tree)
}
#endif // !defined(_TARGET_64BIT_)
- if (tree->isContained())
+ if (tree->isUsedFromSpillTemp())
{
- if (tree->isContainedSpillTemp())
- {
- // spill temps are un-tracked and hence no need to update life
- }
- else if (tree->isIndir())
+ // spill temps are un-tracked and hence no need to update life
+ }
+ else if (tree->isContained())
+ {
+ if (tree->isIndir())
{
genConsumeAddress(tree->AsIndir()->Addr());
}
- else if (tree->OperGet() == GT_AND)
- {
- // This is the special contained GT_AND that we created in Lowering::TreeNodeInfoInitCmp()
- // Now we need to consume the operands of the GT_AND node.
- genConsumeOperands(tree->AsOp());
- }
#ifdef _TARGET_XARCH_
else if (tree->OperGet() == GT_LCL_VAR)
{
diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h
index 406ab77..c8a5af6 100644
--- a/src/jit/codegenlinear.h
+++ b/src/jit/codegenlinear.h
@@ -93,10 +93,11 @@ void genSIMDCheck(GenTree* treeNode);
// their size rounded to TARGET_POINTER_SIZE (which is 8 bytes on 64-bit targets) and hence
// Vector3 locals could be treated as TYP_SIMD16 while reading/writing.
void genStoreIndTypeSIMD12(GenTree* treeNode);
-void genStoreLclFldTypeSIMD12(GenTree* treeNode);
void genLoadIndTypeSIMD12(GenTree* treeNode);
+void genStoreLclTypeSIMD12(GenTree* treeNode);
void genLoadLclTypeSIMD12(GenTree* treeNode);
#ifdef _TARGET_X86_
+void genStoreSIMD12ToStack(regNumber operandReg, regNumber tmpReg);
void genPutArgStkSIMD12(GenTree* treeNode);
#endif // _TARGET_X86_
#endif // FEATURE_SIMD
@@ -217,7 +218,7 @@ void genCallInstruction(GenTreePtr call);
void genJmpMethod(GenTreePtr jmp);
-BasicBlock* genCallFinally(BasicBlock* block, BasicBlock* lblk);
+BasicBlock* genCallFinally(BasicBlock* block);
#if FEATURE_EH_FUNCLETS
void genEHCatchRet(BasicBlock* block);
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index 8e0af48..e893da6 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -226,7 +226,7 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
genPopRegs(pushedRegs, byrefPushedRegs, norefPushedRegs);
}
-BasicBlock* CodeGen::genCallFinally(BasicBlock* block, BasicBlock* lblk)
+BasicBlock* CodeGen::genCallFinally(BasicBlock* block)
{
#if FEATURE_EH_FUNCLETS
// Generate a call to the finally, like this:
@@ -263,10 +263,14 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block, BasicBlock* lblk)
}
else
{
+// TODO-Linux-x86: Do we need to handle the GC information for this NOP or JMP specially, as is done for other
+// architectures?
+#ifndef JIT32_GCENCODER
// Because of the way the flowgraph is connected, the liveness info for this one instruction
// after the call is not (can not be) correct in cases where a variable has a last use in the
// handler. So turn off GC reporting for this single instruction.
getEmitter()->emitDisableGC();
+#endif // JIT32_GCENCODER
// Now go to where the finally funclet needs to return to.
if (block->bbNext->bbJumpDest == block->bbNext->bbNext)
@@ -282,7 +286,9 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block, BasicBlock* lblk)
inst_JMP(EJ_jmp, block->bbNext->bbJumpDest);
}
+#ifndef JIT32_GCENCODER
getEmitter()->emitEnableGC();
+#endif // JIT32_GCENCODER
}
#else // !FEATURE_EH_FUNCLETS
@@ -348,8 +354,6 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block, BasicBlock* lblk)
if (!(block->bbFlags & BBF_RETLESS_CALL))
{
assert(block->isBBCallAlwaysPair());
-
- lblk = block;
block = block->bbNext;
}
return block;
@@ -515,13 +519,13 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
GenTree* regOp = op1;
GenTree* rmOp = op2;
- // Set rmOp to the contained memory operand (if any)
- if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == REG_RAX)))
+ // Set rmOp to the memory operand (if any)
+ if (op1->isUsedFromMemory() || (op2->isUsedFromReg() && (op2->gtRegNum == REG_RAX)))
{
regOp = op2;
rmOp = op1;
}
- assert(!regOp->isContained());
+ assert(regOp->isUsedFromReg());
// Setup targetReg when neither of the source operands was a matching register
if (regOp->gtRegNum != REG_RAX)
@@ -569,12 +573,12 @@ void CodeGen::genCodeForLongUMod(GenTreeOp* node)
GenTree* const dividendLo = dividend->gtOp1;
GenTree* const dividendHi = dividend->gtOp2;
- assert(!dividendLo->isContained());
- assert(!dividendHi->isContained());
+ assert(dividendLo->isUsedFromReg());
+ assert(dividendHi->isUsedFromReg());
GenTree* const divisor = node->gtOp2;
assert(divisor->gtSkipReloadOrCopy()->OperGet() == GT_CNS_INT);
- assert(!divisor->gtSkipReloadOrCopy()->isContained());
+ assert(divisor->gtSkipReloadOrCopy()->isUsedFromReg());
assert(divisor->gtSkipReloadOrCopy()->AsIntCon()->gtIconVal >= 2);
assert(divisor->gtSkipReloadOrCopy()->AsIntCon()->gtIconVal <= 0x3fffffff);
@@ -656,16 +660,16 @@ void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
var_types targetType = treeNode->TypeGet();
emitter* emit = getEmitter();
- // dividend is not contained.
- assert(!dividend->isContained());
+ // dividend is in a register.
+ assert(dividend->isUsedFromReg());
genConsumeOperands(treeNode->AsOp());
if (varTypeIsFloating(targetType))
{
- // divisor is not contained or if contained is a memory op.
+ // Check that divisor is a valid operand.
// Note that a reg optional operand is a treated as a memory op
// if no register is allocated to it.
- assert(!divisor->isContained() || divisor->isMemoryOp() || divisor->IsCnsFltOrDbl() ||
+ assert(divisor->isUsedFromReg() || divisor->isMemoryOp() || divisor->IsCnsFltOrDbl() ||
divisor->IsRegOptional());
// Floating point div/rem operation
@@ -675,7 +679,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
{
emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor);
}
- else if (!divisor->isContained() && divisor->gtRegNum == targetReg)
+ else if (divisor->isUsedFromReg() && divisor->gtRegNum == targetReg)
{
// It is not possible to generate 2-operand divss or divsd where reg2 = reg1 / reg2
// because divss/divsd reg1, reg2 will over-write reg1. Therefore, in case of AMD64
@@ -773,8 +777,8 @@ void CodeGen::genCodeForBinary(GenTree* treeNode)
GenTreePtr op1 = treeNode->gtGetOp1();
GenTreePtr op2 = treeNode->gtGetOp2();
- // Commutative operations can mark op1 as contained to generate "op reg, memop/immed"
- if (op1->isContained())
+ // Commutative operations can mark op1 as contained or reg-optional to generate "op reg, memop/immed"
+ if (!op1->isUsedFromReg())
{
assert(treeNode->OperIsCommutative());
assert(op1->isMemoryOp() || op1->IsCnsNonZeroFltOrDbl() || op1->IsIntCnsFitsInI32() || op1->IsRegOptional());
@@ -788,8 +792,8 @@ void CodeGen::genCodeForBinary(GenTree* treeNode)
// The arithmetic node must be sitting in a register (since it's not contained)
noway_assert(targetReg != REG_NA);
- regNumber op1reg = op1->isContained() ? REG_NA : op1->gtRegNum;
- regNumber op2reg = op2->isContained() ? REG_NA : op2->gtRegNum;
+ regNumber op1reg = op1->isUsedFromReg() ? op1->gtRegNum : REG_NA;
+ regNumber op2reg = op2->isUsedFromReg() ? op2->gtRegNum : REG_NA;
GenTreePtr dst;
GenTreePtr src;
@@ -814,7 +818,7 @@ void CodeGen::genCodeForBinary(GenTree* treeNode)
}
// now we know there are 3 different operands so attempt to use LEA
else if (oper == GT_ADD && !varTypeIsFloating(treeNode) && !treeNode->gtOverflowEx() // LEA does not set flags
- && (op2->isContainedIntOrIImmed() || !op2->isContained()) && !treeNode->gtSetFlags())
+ && (op2->isContainedIntOrIImmed() || op2->isUsedFromReg()) && !treeNode->gtSetFlags())
{
if (op2->isContainedIntOrIImmed())
{
@@ -936,7 +940,7 @@ void CodeGen::genStructReturn(GenTreePtr treeNode)
{
// Right now the only enregistrable structs supported are SIMD vector types.
assert(varTypeIsSIMD(op1));
- assert(!op1->isContained());
+ assert(op1->isUsedFromReg());
// This is a case of operand is in a single reg and needs to be
// returned in multiple ABI return registers.
@@ -974,7 +978,7 @@ void CodeGen::genStructReturn(GenTreePtr treeNode)
}
else
{
- assert(op1->isContained());
+ assert(op1->isUsedFromMemory());
// Copy var on stack into ABI return registers
int offset = 0;
@@ -1328,7 +1332,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
else
{
GenTreePtr operand = treeNode->gtGetOp1();
- assert(!operand->isContained());
+ assert(operand->isUsedFromReg());
regNumber operandReg = genConsumeReg(operand);
if (operandReg != targetReg)
@@ -1374,7 +1378,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_RSH_LO:
// TODO-X86-CQ: This only handles the case where the operand being shifted is in a register. We don't
// need sourceHi to be always in reg in case of GT_LSH_HI (because it could be moved from memory to
- // targetReg if sourceHi is a contained mem-op). Similarly for GT_RSH_LO, sourceLo could be marked as
+ // targetReg if sourceHi is a memory operand). Similarly for GT_RSH_LO, sourceLo could be marked as
// contained memory-op. Even if not a memory-op, we could mark it as reg-optional.
genCodeForShiftLong(treeNode);
break;
@@ -1423,7 +1427,6 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED))
{
assert(!isRegCandidate);
-
#if defined(FEATURE_SIMD) && defined(_TARGET_X86_)
// Loading of TYP_SIMD12 (i.e. Vector3) variable
if (treeNode->TypeGet() == TYP_SIMD12)
@@ -1486,10 +1489,11 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
// storing of TYP_SIMD12 (i.e. Vector3) field
if (treeNode->TypeGet() == TYP_SIMD12)
{
- genStoreLclFldTypeSIMD12(treeNode);
+ genStoreLclTypeSIMD12(treeNode);
break;
}
-#endif
+#endif // FEATURE_SIMD
+
GenTreePtr op1 = treeNode->gtGetOp1();
genConsumeRegs(op1);
emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1);
@@ -1526,6 +1530,13 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
#endif // !defined(_TARGET_64BIT_)
#ifdef FEATURE_SIMD
+ // storing of TYP_SIMD12 (i.e. Vector3) field
+ if (treeNode->TypeGet() == TYP_SIMD12)
+ {
+ genStoreLclTypeSIMD12(treeNode);
+ break;
+ }
+
if (varTypeIsSIMD(targetType) && (targetReg != REG_NA) && op1->IsCnsIntOrI())
{
// This is only possible for a zero-init.
@@ -1547,25 +1558,24 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
}
else
{
- bool containedOp1 = op1->isContained();
// Look for the case where we have a constant zero which we've marked for reuse,
// but which isn't actually in the register we want. In that case, it's better to create
// zero in the target register, because an xor is smaller than a copy. Note that we could
// potentially handle this in the register allocator, but we can't always catch it there
// because the target may not have a register allocated for it yet.
- if (!containedOp1 && (op1->gtRegNum != treeNode->gtRegNum) &&
+ if (op1->isUsedFromReg() && (op1->gtRegNum != treeNode->gtRegNum) &&
(op1->IsIntegralConst(0) || op1->IsFPZero()))
{
op1->gtRegNum = REG_NA;
op1->ResetReuseRegVal();
- containedOp1 = true;
}
- if (containedOp1)
+ if (!op1->isUsedFromReg())
{
- // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register
- // must be a constant. However, in the future we might want to support a contained memory op.
- // This is a bit tricky because we have to decide it's contained before register allocation,
+ // Currently, we assume that the non-reg source of a GT_STORE_LCL_VAR writing to a register
+ // must be a constant. However, in the future we might want to support an operand used from
+ // memory. This is a bit tricky because we have to decide it can be used from memory before
+ // register allocation,
// and this would be a case where, once that's done, we need to mark that node as always
// requiring a register - which we always assume now anyway, but once we "optimize" that
// we'll have to take cases like this into account.
@@ -1682,7 +1692,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
// CQ: When possible use LEA for mul by imm 3, 5 or 9
ssize_t imm = immOp->AsIntConCommon()->IconValue();
- if (!requiresOverflowCheck && !rmOp->isContained() && ((imm == 3) || (imm == 5) || (imm == 9)))
+ if (!requiresOverflowCheck && rmOp->isUsedFromReg() && ((imm == 3) || (imm == 5) || (imm == 9)))
{
// We will use the LEA instruction to perform this multiply
// Note that an LEA with base=x, index=x and scale=(imm-1) computes x*imm when imm=3,5 or 9.
@@ -1712,15 +1722,15 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
ins = genGetInsForOper(GT_MUL, targetType);
}
- // Set rmOp to the contain memory operand (if any)
+ // Set rmOp to the memory operand (if any)
// or set regOp to the op2 when it has the matching target register for our multiply op
//
- if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == mulTargetReg)))
+ if (op1->isUsedFromMemory() || (op2->isUsedFromReg() && (op2->gtRegNum == mulTargetReg)))
{
regOp = op2;
rmOp = op1;
}
- assert(!regOp->isContained());
+ assert(regOp->isUsedFromReg());
// Setup targetReg when neither of the source operands was a matching register
if (regOp->gtRegNum != mulTargetReg)
@@ -1781,6 +1791,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_LE:
case GT_GE:
case GT_GT:
+ case GT_TEST_EQ:
+ case GT_TEST_NE:
{
// TODO-XArch-CQ: Check if we can use the currently set flags.
// TODO-XArch-CQ: Check for the case where we can simply transfer the carry bit to a register
@@ -2089,7 +2101,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_NULLCHECK:
{
- assert(!treeNode->gtOp.gtOp1->isContained());
+ assert(treeNode->gtOp.gtOp1->isUsedFromReg());
regNumber reg = genConsumeReg(treeNode->gtOp.gtOp1);
emit->emitIns_AR_R(INS_cmp, EA_4BYTE, reg, reg, 0);
}
@@ -2180,7 +2192,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
#if !defined(_TARGET_64BIT_)
case GT_LONG:
- assert(!treeNode->isContained());
+ assert(treeNode->isUsedFromReg());
genConsumeRegs(treeNode);
break;
#endif
@@ -2631,16 +2643,14 @@ void CodeGen::genLclHeap(GenTreePtr tree)
// Loop:
genDefineTempLabel(loop);
-#if defined(_TARGET_AMD64_)
- // Push two 8-byte zeros. This matches the 16-byte STACK_ALIGN value.
- static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2));
- inst_IV(INS_push_hide, 0); // --- push 8-byte 0
- inst_IV(INS_push_hide, 0); // --- push 8-byte 0
-#elif defined(_TARGET_X86_)
- // Push a single 4-byte zero. This matches the 4-byte STACK_ALIGN value.
- static_assert_no_msg(STACK_ALIGN == REGSIZE_BYTES);
- inst_IV(INS_push_hide, 0); // --- push 4-byte 0
-#endif // _TARGET_X86_
+ static_assert_no_msg((STACK_ALIGN % REGSIZE_BYTES) == 0);
+ unsigned const count = (STACK_ALIGN / REGSIZE_BYTES);
+
+ for (unsigned i = 0; i < count; i++)
+ {
+ inst_IV(INS_push_hide, 0); // --- push REG_SIZE bytes of 0
+ }
+ // Note that the stack must always be aligned to STACK_ALIGN bytes
// Decrement the loop counter and loop if not done.
inst_RV(INS_dec, regCnt, TYP_I_IMPL);
@@ -2841,8 +2851,8 @@ void CodeGen::genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode)
}
#ifdef DEBUG
- assert(!dstAddr->isContained());
- assert(!initVal->isContained());
+ assert(dstAddr->isUsedFromReg());
+ assert(initVal->isUsedFromReg());
#ifdef _TARGET_AMD64_
assert(size != 0);
#endif
@@ -2878,8 +2888,8 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
initVal = initVal->gtGetOp1();
}
- assert(!dstAddr->isContained());
- assert(!initVal->isContained() || (initVal->IsIntegralConst(0) && ((size & 0xf) == 0)));
+ assert(dstAddr->isUsedFromReg());
+ assert(initVal->isUsedFromReg() || (initVal->IsIntegralConst(0) && ((size & 0xf) == 0)));
assert(size != 0);
assert(size <= INITBLK_UNROLL_LIMIT);
assert(initVal->gtSkipReloadOrCopy()->IsCnsIntOrI());
@@ -2979,8 +2989,8 @@ void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode)
initVal = initVal->gtGetOp1();
}
- assert(!dstAddr->isContained());
- assert(!initVal->isContained());
+ assert(dstAddr->isUsedFromReg());
+ assert(initVal->isUsedFromReg());
if (blockSize != 0)
{
@@ -3064,7 +3074,7 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
if (source->gtOper == GT_IND)
{
srcAddr = source->gtGetOp1();
- if (!srcAddr->isContained())
+ if (srcAddr->isUsedFromReg())
{
genConsumeReg(srcAddr);
}
@@ -3086,7 +3096,7 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
srcAddr = source;
}
- if (!dstAddr->isContained())
+ if (dstAddr->isUsedFromReg())
{
genConsumeReg(dstAddr);
}
@@ -3171,7 +3181,7 @@ void CodeGen::genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode)
GenTreePtr srcAddr = nullptr;
#ifdef DEBUG
- assert(!dstAddr->isContained());
+ assert(dstAddr->isUsedFromReg());
assert(source->isContained());
#ifdef _TARGET_X86_
@@ -3352,7 +3362,7 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode)
assert(src->gtOper == GT_OBJ);
- if (!src->gtOp.gtOp1->isContained())
+ if (src->gtOp.gtOp1->isUsedFromReg())
{
genConsumeReg(src->gtOp.gtOp1);
}
@@ -3544,7 +3554,7 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
if (source->gtOper == GT_IND)
{
srcAddr = source->gtGetOp1();
- assert(!srcAddr->isContained());
+ assert(srcAddr->isUsedFromReg());
}
else
{
@@ -3557,7 +3567,7 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
#ifdef DEBUG
bool isRepMovspUsed = false;
- assert(!dstAddr->isContained());
+ assert(dstAddr->isUsedFromReg());
// If the GenTree node has data about GC pointers, this means we're dealing
// with CpObj, so this requires special logic.
@@ -3720,7 +3730,7 @@ void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode)
if (source->gtOper == GT_IND)
{
srcAddr = source->gtGetOp1();
- assert(!srcAddr->isContained());
+ assert(srcAddr->isUsedFromReg());
}
else
{
@@ -3863,16 +3873,16 @@ void CodeGen::genRangeCheck(GenTreePtr oper)
GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
- GenTreePtr arrLen = bndsChk->gtArrLen;
GenTreePtr arrIndex = bndsChk->gtIndex;
+ GenTreePtr arrLen = bndsChk->gtArrLen;
GenTreePtr arrRef = nullptr;
int lenOffset = 0;
GenTree * src1, *src2;
emitJumpKind jmpKind;
- genConsumeRegs(arrLen);
genConsumeRegs(arrIndex);
+ genConsumeRegs(arrLen);
if (arrIndex->isContainedIntOrIImmed())
{
@@ -3899,7 +3909,7 @@ void CodeGen::genRangeCheck(GenTreePtr oper)
// cmp reg, [mem] (if arrLen is a memory op)
//
// That is only one of arrIndex or arrLen can be a memory op.
- assert(!arrIndex->isContainedMemoryOp() || !arrLen->isContainedMemoryOp());
+ assert(!arrIndex->isUsedFromMemory() || !arrLen->isUsedFromMemory());
src1 = arrIndex;
src2 = arrLen;
@@ -4211,7 +4221,7 @@ void CodeGen::genCodeForShift(GenTreePtr tree)
{
// Only the non-RMW case here.
assert(tree->OperIsShiftOrRotate());
- assert(!tree->gtOp.gtOp1->isContained());
+ assert(tree->gtOp.gtOp1->isUsedFromReg());
assert(tree->gtRegNum != REG_NA);
genConsumeOperands(tree->AsOp());
@@ -4277,8 +4287,8 @@ void CodeGen::genCodeForShiftLong(GenTreePtr tree)
GenTree* operand = tree->gtOp.gtOp1;
assert(operand->OperGet() == GT_LONG);
- assert(!operand->gtOp.gtOp1->isContained());
- assert(!operand->gtOp.gtOp2->isContained());
+ assert(operand->gtOp.gtOp1->isUsedFromReg());
+ assert(operand->gtOp.gtOp2->isUsedFromReg());
GenTree* operandLo = operand->gtGetOp1();
GenTree* operandHi = operand->gtGetOp2();
@@ -4334,7 +4344,7 @@ void CodeGen::genCodeForShiftRMW(GenTreeStoreInd* storeInd)
assert(data->OperIsShiftOrRotate());
// This function only handles the RMW case.
- assert(data->gtOp.gtOp1->isContained());
+ assert(data->gtOp.gtOp1->isUsedFromMemory());
assert(data->gtOp.gtOp1->isIndir());
assert(Lowering::IndirsAreEquivalent(data->gtOp.gtOp1, storeInd));
assert(data->gtRegNum == REG_NA);
@@ -4580,7 +4590,7 @@ void CodeGen::genStoreInd(GenTreePtr node)
assert(storeInd->IsRMWDstOp1());
rmwSrc = data->gtGetOp1();
rmwDst = data->gtGetOp1();
- assert(rmwSrc->isContained());
+ assert(rmwSrc->isUsedFromMemory());
}
assert(rmwSrc != nullptr);
@@ -4616,8 +4626,7 @@ void CodeGen::genStoreInd(GenTreePtr node)
assert(rmwSrc == data->gtGetOp2());
genCodeForShiftRMW(storeInd);
}
- else if (!compiler->opts.compDbgCode && data->OperGet() == GT_ADD &&
- (rmwSrc->IsIntegralConst(1) || rmwSrc->IsIntegralConst(-1)))
+ else if (data->OperGet() == GT_ADD && (rmwSrc->IsIntegralConst(1) || rmwSrc->IsIntegralConst(-1)))
{
// Generate "inc/dec [mem]" instead of "add/sub [mem], 1".
//
@@ -4858,11 +4867,6 @@ void CodeGen::genCallInstruction(GenTreePtr node)
if (arg->OperGet() != GT_ARGPLACE && !(arg->gtFlags & GTF_LATE_ARG))
{
#if defined(_TARGET_X86_)
- assert((arg->OperGet() == GT_PUTARG_STK) || (arg->OperGet() == GT_LONG));
- if (arg->OperGet() == GT_LONG)
- {
- assert((arg->gtGetOp1()->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp2()->OperGet() == GT_PUTARG_STK));
- }
if ((arg->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp1()->OperGet() == GT_FIELD_LIST))
{
fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
@@ -4886,9 +4890,9 @@ void CodeGen::genCallInstruction(GenTreePtr node)
stackArgBytes += argBytes;
}
else
- {
#endif // FEATURE_PUT_STRUCT_ARG_STK
+ {
stackArgBytes += genTypeSize(genActualType(arg->TypeGet()));
}
}
@@ -5001,6 +5005,20 @@ void CodeGen::genCallInstruction(GenTreePtr node)
#endif // defined(_TARGET_X86_)
+#ifdef FEATURE_AVX_SUPPORT
+ // When it's a PInvoke call and the call type is USER function, we issue VZEROUPPER here
+ // if the function contains 256bit AVX instructions, this is to avoid AVX-256 to Legacy SSE
+ // transition penalty, assuming the user function contains legacy SSE instruction.
+ // To limit code size increase impact: we only issue VZEROUPPER before PInvoke call, not issue
+ // VZEROUPPER after PInvoke call because transition penalty from legacy SSE to AVX only happens
+ // when there's preceding 256-bit AVX to legacy SSE transition penalty.
+ if (call->IsPInvoke() && (call->gtCallType == CT_USER_FUNC) && getEmitter()->Contains256bitAVX())
+ {
+ assert(compiler->getSIMDInstructionSet() == InstructionSet_AVX);
+ instGen(INS_vzeroupper);
+ }
+#endif
+
if (target != nullptr)
{
#ifdef _TARGET_X86_
@@ -5020,7 +5038,7 @@ void CodeGen::genCallInstruction(GenTreePtr node)
assert(target->OperGet() == GT_IND);
GenTree* addr = target->AsIndir()->Addr();
- assert(!addr->isContained());
+ assert(addr->isUsedFromReg());
genConsumeReg(addr);
genCopyRegIfNeeded(addr, REG_VIRTUAL_STUB_TARGET);
@@ -5113,6 +5131,15 @@ void CodeGen::genCallInstruction(GenTreePtr node)
retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
}
+#if defined(UNIX_X86_ABI)
+ // Put back the stack pointer if there was any padding for stack alignment
+ unsigned padStackAlign = call->fgArgInfo->GetPadStackAlign();
+ if (padStackAlign != 0)
+ {
+ inst_RV_IV(INS_add, REG_SPBASE, padStackAlign * TARGET_POINTER_SIZE, EA_PTRSIZE);
+ }
+#endif // UNIX_X86_ABI
+
// if it was a pinvoke we may have needed to get the address of a label
if (genPendingCallLabel)
{
@@ -6064,7 +6091,7 @@ void CodeGen::genCompareInt(GenTreePtr treeNode)
GenTreePtr op2 = tree->gtOp2;
var_types op1Type = op1->TypeGet();
var_types op2Type = op2->TypeGet();
- regNumber targetReg = treeNode->gtRegNum;
+ regNumber targetReg = tree->gtRegNum;
// Case of op1 == 0 or op1 != 0:
// Optimize generation of 'test' instruction if op1 sets flags.
@@ -6081,7 +6108,7 @@ void CodeGen::genCompareInt(GenTreePtr treeNode)
assert(realOp1->gtSetZSFlags());
// Must be (in)equality against zero.
- assert(tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE);
+ assert(tree->OperIs(GT_EQ, GT_NE));
assert(op2->IsIntegralConst(0));
assert(op2->isContained());
@@ -6105,7 +6132,7 @@ void CodeGen::genCompareInt(GenTreePtr treeNode)
// If we have GT_JTRUE(GT_EQ/NE(GT_SIMD((in)Equality, v1, v2), true/false)),
// then we don't need to generate code for GT_EQ/GT_NE, since SIMD (in)Equality intrinsic
// would set or clear Zero flag.
- if ((targetReg == REG_NA) && (tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE))
+ if ((targetReg == REG_NA) && tree->OperIs(GT_EQ, GT_NE))
{
// Is it a SIMD (in)Equality that doesn't need to materialize result into a register?
if ((op1->gtRegNum == REG_NA) && op1->IsSIMDEqualityOrInequality())
@@ -6124,128 +6151,67 @@ void CodeGen::genCompareInt(GenTreePtr treeNode)
genConsumeOperands(tree);
- instruction ins;
- emitAttr cmpAttr;
-
// TODO-CQ: We should be able to support swapping op1 and op2 to generate cmp reg, imm.
// https://github.com/dotnet/coreclr/issues/7270
assert(!op1->isContainedIntOrIImmed()); // We no longer support
assert(!varTypeIsFloating(op2Type));
-#ifdef _TARGET_X86_
- assert(!varTypeIsLong(op1Type) && !varTypeIsLong(op2Type));
-#endif // _TARGET_X86_
-
- // By default we use an int32 sized cmp instruction
- //
- ins = INS_cmp;
- var_types cmpType = TYP_INT;
-
- // In the if/then/else statement below we may change the
- // 'cmpType' and/or 'ins' to generate a smaller instruction
+ instruction ins;
- // Are we comparing two values that are the same size?
- //
- if (genTypeSize(op1Type) == genTypeSize(op2Type))
+ if (tree->OperIs(GT_TEST_EQ, GT_TEST_NE))
{
- if (op1Type == op2Type)
- {
- // If both types are exactly the same we can use that type
- cmpType = op1Type;
- }
- else if (genTypeSize(op1Type) == 8)
- {
- // If we have two different int64 types we need to use a long compare
- cmpType = TYP_LONG;
- }
-
- cmpAttr = emitTypeSize(cmpType);
+ ins = INS_test;
}
- else // Here we know that (op1Type != op2Type)
+ else if (op1->isUsedFromReg() && op2->IsIntegralConst(0))
{
- // Do we have a short compare against a constant in op2?
- //
- // We checked for this case in TreeNodeInfoInitCmp() and if we can perform a small
- // compare immediate we labeled this compare with a GTF_RELOP_SMALL
- // and for unsigned small non-equality compares the GTF_UNSIGNED flag.
- //
- if (op2->isContainedIntOrIImmed() && ((tree->gtFlags & GTF_RELOP_SMALL) != 0))
- {
- assert(varTypeIsSmall(op1Type));
- cmpType = op1Type;
- }
-#ifdef _TARGET_AMD64_
- else // compare two different sized operands
- {
- // For this case we don't want any memory operands, only registers or immediates
- //
- assert(!op1->isContainedMemoryOp());
- assert(!op2->isContainedMemoryOp());
+ // We're comparing a register to 0 so we can generate "test reg1, reg1"
+ // instead of the longer "cmp reg1, 0"
+ ins = INS_test;
+ op2 = op1;
+ }
+ else
+ {
+ ins = INS_cmp;
+ }
- // Check for the case where one operand is an int64 type
- // Lower should have placed 32-bit operand in a register
- // for signed comparisons we will sign extend the 32-bit value in place.
- //
- bool op1Is64Bit = (genTypeSize(op1Type) == 8);
- bool op2Is64Bit = (genTypeSize(op2Type) == 8);
- if (op1Is64Bit)
- {
- cmpType = TYP_LONG;
- if (!(tree->gtFlags & GTF_UNSIGNED) && !op2Is64Bit)
- {
- assert(op2->gtRegNum != REG_NA);
- inst_RV_RV(INS_movsxd, op2->gtRegNum, op2->gtRegNum, op2Type);
- }
- }
- else if (op2Is64Bit)
- {
- cmpType = TYP_LONG;
- if (!(tree->gtFlags & GTF_UNSIGNED) && !op1Is64Bit)
- {
- assert(op1->gtRegNum != REG_NA);
- }
- }
- }
-#endif // _TARGET_AMD64_
+ var_types type;
- cmpAttr = emitTypeSize(cmpType);
+ if (op1Type == op2Type)
+ {
+ type = op1Type;
}
-
- // See if we can generate a "test" instruction instead of a "cmp".
- // For this to generate the correct conditional branch we must have
- // a compare against zero.
- //
- if (op2->IsIntegralConst(0))
+ else if (genTypeSize(op1Type) == genTypeSize(op2Type))
{
- if (op1->isContained())
- {
- // op1 can be a contained memory op
- // or the special contained GT_AND that we created in Lowering::TreeNodeInfoInitCmp()
- //
- if ((op1->OperGet() == GT_AND) && op1->gtGetOp2()->isContainedIntOrIImmed() &&
- ((tree->OperGet() == GT_EQ) || (tree->OperGet() == GT_NE)))
- {
- ins = INS_test; // we will generate "test andOp1, andOp2CnsVal"
- op2 = op1->gtOp.gtOp2; // must assign op2 before we overwrite op1
- op1 = op1->gtOp.gtOp1; // overwrite op1
-
- if (op1->isContainedMemoryOp())
- {
- // use the size andOp1 if it is a contained memoryop.
- cmpAttr = emitTypeSize(op1->TypeGet());
- }
- // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2);
- }
- }
- else // op1 is not contained thus it must be in a register
- {
- ins = INS_test;
- op2 = op1; // we will generate "test reg1,reg1"
- // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2);
- }
+ // If the types are different but have the same size then we'll use TYP_INT or TYP_LONG.
+ // This primarily deals with small type mixes (e.g. byte/ubyte) that need to be widened
+ // and compared as int. We should not get long type mixes here but handle that as well
+ // just in case.
+ type = genTypeSize(op1Type) == 8 ? TYP_LONG : TYP_INT;
}
-
- getEmitter()->emitInsBinary(ins, cmpAttr, op1, op2);
+ else
+ {
+ // In the types are different simply use TYP_INT. This deals with small type/int type
+ // mixes (e.g. byte/short ubyte/int) that need to be widened and compared as int.
+ // Lowering is expected to handle any mixes that involve long types (e.g. int/long).
+ type = TYP_INT;
+ }
+
+ // The common type cannot be larger than the machine word size
+ assert(genTypeSize(type) <= genTypeSize(TYP_I_IMPL));
+ // The common type cannot be smaller than any of the operand types, we're probably mixing int/long
+ assert(genTypeSize(type) >= max(genTypeSize(op1Type), genTypeSize(op2Type)));
+ // TYP_UINT and TYP_ULONG should not appear here, only small types can be unsigned
+ assert(!varTypeIsUnsigned(type) || varTypeIsSmall(type));
+ // Small unsigned int types (TYP_BOOL can use anything) should use unsigned comparisons
+ assert(!(varTypeIsSmallInt(type) && varTypeIsUnsigned(type)) || ((tree->gtFlags & GTF_UNSIGNED) != 0));
+ // If op1 is smaller then it cannot be in memory, we're probably missing a cast
+ assert((genTypeSize(op1Type) >= genTypeSize(type)) || !op1->isUsedFromMemory());
+ // If op2 is smaller then it cannot be in memory, we're probably missing a cast
+ assert((genTypeSize(op2Type) >= genTypeSize(type)) || !op2->isUsedFromMemory());
+ // If op2 is a constant then it should fit in the common type
+ assert(!op2->IsCnsIntOrI() || genTypeCanRepresentValue(type, op2->AsIntCon()->IconValue()));
+
+ getEmitter()->emitInsBinary(ins, emitTypeSize(type), op1, op2);
// Are we evaluating this into a register?
if (targetReg != REG_NA)
@@ -6810,7 +6776,7 @@ void CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
GenTreePtr op1 = treeNode->gtOp.gtOp1;
#ifdef DEBUG
// If not contained, must be a valid float reg.
- if (!op1->isContained())
+ if (op1->isUsedFromReg())
{
assert(genIsValidFloatReg(op1->gtRegNum));
}
@@ -6821,7 +6787,7 @@ void CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
genConsumeOperands(treeNode->AsOp());
- if (srcType == dstType && (!op1->isContained() && (targetReg == op1->gtRegNum)))
+ if (srcType == dstType && (op1->isUsedFromReg() && (targetReg == op1->gtRegNum)))
{
// source and destinations types are the same and also reside in the same register.
// we just need to consume and produce the reg in this case.
@@ -6861,7 +6827,7 @@ void CodeGen::genIntToFloatCast(GenTreePtr treeNode)
GenTreePtr op1 = treeNode->gtOp.gtOp1;
#ifdef DEBUG
- if (!op1->isContained())
+ if (op1->isUsedFromReg())
{
assert(genIsValidIntReg(op1->gtRegNum));
}
@@ -6936,7 +6902,7 @@ void CodeGen::genIntToFloatCast(GenTreePtr treeNode)
// If we change the instructions below, FloatingPointUtils::convertUInt64ToDobule
// should be also updated for consistent conversion result.
assert(dstType == TYP_DOUBLE);
- assert(!op1->isContained());
+ assert(op1->isUsedFromReg());
// Set the flags without modifying op1.
// test op1Reg, op1Reg
@@ -6995,7 +6961,7 @@ void CodeGen::genFloatToIntCast(GenTreePtr treeNode)
GenTreePtr op1 = treeNode->gtOp.gtOp1;
#ifdef DEBUG
- if (!op1->isContained())
+ if (op1->isUsedFromReg())
{
assert(genIsValidFloatReg(op1->gtRegNum));
}
@@ -7374,7 +7340,7 @@ void CodeGen::genSSE2BitwiseOp(GenTreePtr treeNode)
// Move operand into targetReg only if the reg reserved for
// internal purpose is not the same as targetReg.
GenTreePtr op1 = treeNode->gtOp.gtOp1;
- assert(!op1->isContained());
+ assert(op1->isUsedFromReg());
regNumber operandReg = genConsumeReg(op1);
if (tmpReg != targetReg)
{
@@ -7497,7 +7463,7 @@ unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode)
#ifdef _TARGET_X86_
//---------------------------------------------------------------------
-// adjustStackForPutArgStk:
+// genAdjustStackForPutArgStk:
// adjust the stack pointer for a putArgStk node if necessary.
//
// Arguments:
@@ -7505,6 +7471,12 @@ unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode)
//
// Returns: true if the stack pointer was adjusted; false otherwise.
//
+// Notes:
+// Sets `m_pushStkArg` to true if the stack arg needs to be pushed,
+// false if the stack arg needs to be stored at the current stack
+// pointer address. This is exactly the opposite of the return value
+// of this function.
+//
bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk)
{
#ifdef FEATURE_SIMD
@@ -7562,11 +7534,10 @@ bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk)
}
//---------------------------------------------------------------------
-// genPutArgStkFieldList - generate code for passing an arg on the stack.
+// genPutArgStkFieldList - generate code for passing a GT_FIELD_LIST arg on the stack.
//
// Arguments
-// treeNode - the GT_PUTARG_STK node
-// targetType - the type of the treeNode
+// treeNode - the GT_PUTARG_STK node whose op1 is a GT_FIELD_LIST
//
// Return value:
// None
@@ -7578,24 +7549,36 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
// Set m_pushStkArg and pre-adjust the stack if necessary.
const bool preAdjustedStack = genAdjustStackForPutArgStk(putArgStk);
+
// For now, we only support the "push" case; we will push a full slot for the first field of each slot
// within the struct.
assert((putArgStk->isPushKind()) && !preAdjustedStack && m_pushStkArg);
- // If we have pre-adjusted the stack and are simply storing the fields in order) set the offset to 0.
+ // If we have pre-adjusted the stack and are simply storing the fields in order, set the offset to 0.
// (Note that this mode is not currently being used.)
// If we are pushing the arguments (i.e. we have not pre-adjusted the stack), then we are pushing them
// in reverse order, so we start with the current field offset at the size of the struct arg (which must be
// a multiple of the target pointer size).
unsigned currentOffset = (preAdjustedStack) ? 0 : putArgStk->getArgSize();
unsigned prevFieldOffset = currentOffset;
- regNumber tmpReg = REG_NA;
+ regNumber intTmpReg = REG_NA;
+ regNumber simdTmpReg = REG_NA;
if (putArgStk->gtRsvdRegs != RBM_NONE)
{
- assert(genCountBits(putArgStk->gtRsvdRegs) == 1);
- tmpReg = genRegNumFromMask(putArgStk->gtRsvdRegs);
- assert(genIsValidIntReg(tmpReg));
+ regMaskTP rsvdRegs = putArgStk->gtRsvdRegs;
+ if ((rsvdRegs & RBM_ALLINT) != 0)
+ {
+ intTmpReg = genRegNumFromMask(rsvdRegs & RBM_ALLINT);
+ assert(genIsValidIntReg(intTmpReg));
+ }
+ if ((rsvdRegs & RBM_ALLFLOAT) != 0)
+ {
+ simdTmpReg = genRegNumFromMask(rsvdRegs & RBM_ALLFLOAT);
+ assert(genIsValidFloatReg(simdTmpReg));
+ }
+ assert(genCountBits(rsvdRegs) == (unsigned)((intTmpReg == REG_NA) ? 0 : 1) + ((simdTmpReg == REG_NA) ? 0 : 1));
}
+
for (GenTreeFieldList* current = fieldList; current != nullptr; current = current->Rest())
{
GenTree* const fieldNode = current->Current();
@@ -7612,7 +7595,7 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
// assigned a register, and which is therefore contained.
// Unlike genConsumeReg(), it handles the case where no registers are being consumed.
genConsumeRegs(fieldNode);
- regNumber argReg = fieldNode->isContainedSpillTemp() ? REG_NA : fieldNode->gtRegNum;
+ regNumber argReg = fieldNode->isUsedFromSpillTemp() ? REG_NA : fieldNode->gtRegNum;
// If the field is slot-like, we can use a push instruction to store the entire register no matter the type.
//
@@ -7623,7 +7606,7 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
// able to detect stores into the outgoing argument area of the stack on x86.
const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevFieldOffset - fieldOffset) >= 4);
int adjustment = roundUp(currentOffset - fieldOffset, 4);
- if (fieldIsSlot)
+ if (fieldIsSlot && !varTypeIsSIMD(fieldType))
{
fieldType = genActualType(fieldType);
unsigned pushSize = genTypeSize(fieldType);
@@ -7641,12 +7624,13 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
else
{
m_pushStkArg = false;
+
// We always "push" floating point fields (i.e. they are full slot values that don't
// require special handling).
- assert(varTypeIsIntegralOrI(fieldNode));
+ assert(varTypeIsIntegralOrI(fieldNode) || varTypeIsSIMD(fieldNode));
+
// If we can't push this field, it needs to be in a register so that we can store
// it to the stack location.
- assert(tmpReg != REG_NA);
if (adjustment != 0)
{
// This moves the stack pointer to fieldOffset.
@@ -7658,15 +7642,16 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
}
// Does it need to be in a byte register?
- // If so, we'll use tmpReg, which must have been allocated as a byte register.
+ // If so, we'll use intTmpReg, which must have been allocated as a byte register.
// If it's already in a register, but not a byteable one, then move it.
if (varTypeIsByte(fieldType) && ((argReg == REG_NA) || ((genRegMask(argReg) & RBM_BYTE_REGS) == 0)))
{
- noway_assert((genRegMask(tmpReg) & RBM_BYTE_REGS) != 0);
+ assert(intTmpReg != REG_NA);
+ noway_assert((genRegMask(intTmpReg) & RBM_BYTE_REGS) != 0);
if (argReg != REG_NA)
{
- inst_RV_RV(INS_mov, tmpReg, argReg, fieldType);
- argReg = tmpReg;
+ inst_RV_RV(INS_mov, intTmpReg, argReg, fieldType);
+ argReg = intTmpReg;
}
}
}
@@ -7675,8 +7660,9 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
{
if (m_pushStkArg)
{
- if (fieldNode->isContainedSpillTemp())
+ if (fieldNode->isUsedFromSpillTemp())
{
+ assert(!varTypeIsSIMD(fieldType)); // Q: can we get here with SIMD?
assert(fieldNode->IsRegOptional());
TempDsc* tmp = getSpillTempDsc(fieldNode);
getEmitter()->emitIns_S(INS_push, emitActualTypeSize(fieldNode->TypeGet()), tmp->tdTempNum(), 0);
@@ -7709,25 +7695,35 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
}
else
{
- // The stack has been adjusted and we will load the field to tmpReg and then store it on the stack.
+ // The stack has been adjusted and we will load the field to intTmpReg and then store it on the stack.
assert(varTypeIsIntegralOrI(fieldNode));
switch (fieldNode->OperGet())
{
case GT_LCL_VAR:
- inst_RV_TT(INS_mov, tmpReg, fieldNode);
+ inst_RV_TT(INS_mov, intTmpReg, fieldNode);
break;
case GT_CNS_INT:
- genSetRegToConst(tmpReg, fieldNode->TypeGet(), fieldNode);
+ genSetRegToConst(intTmpReg, fieldNode->TypeGet(), fieldNode);
break;
default:
unreached();
}
- genStoreRegToStackArg(fieldType, tmpReg, fieldOffset - currentOffset);
+ genStoreRegToStackArg(fieldType, intTmpReg, fieldOffset - currentOffset);
}
}
else
{
- genStoreRegToStackArg(fieldType, argReg, fieldOffset - currentOffset);
+#if defined(_TARGET_X86_) && defined(FEATURE_SIMD)
+ if (fieldType == TYP_SIMD12)
+ {
+ assert(genIsValidFloatReg(simdTmpReg));
+ genStoreSIMD12ToStack(argReg, simdTmpReg);
+ }
+ else
+#endif // defined(_TARGET_X86_) && defined(FEATURE_SIMD)
+ {
+ genStoreRegToStackArg(fieldType, argReg, fieldOffset - currentOffset);
+ }
if (m_pushStkArg)
{
// We always push a slot-rounded size
@@ -7762,13 +7758,15 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
#ifdef _TARGET_X86_
-#ifdef FEATURE_SIMD
- if (targetType == TYP_SIMD12)
+#if defined(UNIX_X86_ABI)
+ // For each call, first stack argument has the padding for alignment
+ // if this value is not zero, use it to adjust the ESP
+ unsigned argPadding = putArgStk->getArgPadding();
+ if (argPadding != 0)
{
- genPutArgStkSIMD12(putArgStk);
- return;
+ inst_RV_IV(INS_sub, REG_SPBASE, argPadding * TARGET_POINTER_SIZE, EA_PTRSIZE);
}
-#endif // FEATURE_SIMD
+#endif
if (varTypeIsStruct(targetType))
{
@@ -7782,9 +7780,9 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
GenTreePtr data = putArgStk->gtOp1;
- // On a 32-bit target, all of the long arguments have been decomposed into
- // a separate putarg_stk for each of the upper and lower halves.
- noway_assert(targetType != TYP_LONG);
+ // On a 32-bit target, all of the long arguments are handled with GT_FIELD_LIST,
+ // and the type of the putArgStk is TYP_VOID.
+ assert(targetType != TYP_LONG);
const unsigned argSize = putArgStk->getArgSize();
assert((argSize % TARGET_POINTER_SIZE) == 0);
@@ -7808,7 +7806,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
else
{
// We should not see any contained nodes that are not immediates.
- assert(!data->isContained());
+ assert(data->isUsedFromReg());
genConsumeReg(data);
genPushReg(targetType, data->gtRegNum);
}
@@ -7844,13 +7842,14 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
GenTreePtr data = putArgStk->gtOp1;
- if (data->isContained())
+ if (data->isContainedIntOrIImmed())
{
getEmitter()->emitIns_S_I(ins_Store(targetType), emitTypeSize(targetType), baseVarNum, argOffset,
(int)data->AsIntConCommon()->IconValue());
}
else
{
+ assert(data->isUsedFromReg());
genConsumeReg(data);
getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, baseVarNum,
argOffset);
@@ -7996,6 +7995,14 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
{
var_types targetType = putArgStk->TypeGet();
+#if defined(_TARGET_X86_) && defined(FEATURE_SIMD)
+ if (targetType == TYP_SIMD12)
+ {
+ genPutArgStkSIMD12(putArgStk);
+ return;
+ }
+#endif // defined(_TARGET_X86_) && defined(FEATURE_SIMD)
+
if (varTypeIsSIMD(targetType))
{
regNumber srcReg = genConsumeReg(putArgStk->gtGetOp1());
@@ -8078,7 +8085,7 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
slotAttr = EA_BYREF;
}
- const unsigned offset = i * 4;
+ const unsigned offset = i * TARGET_POINTER_SIZE;
if (srcAddrInReg)
{
getEmitter()->emitIns_AR_R(INS_push, slotAttr, REG_NA, srcRegNum, offset);
@@ -8087,7 +8094,7 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
{
getEmitter()->emitIns_S(INS_push, slotAttr, srcLclNum, srcLclOffset + offset);
}
- genStackLevel += 4;
+ genStackLevel += TARGET_POINTER_SIZE;
}
#else // !defined(_TARGET_X86_)
@@ -8175,11 +8182,11 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
*
* Create and record GC Info for the function.
*/
-#ifdef _TARGET_AMD64_
+#ifndef JIT32_GCENCODER
void
-#else // !_TARGET_AMD64_
+#else // !JIT32_GCENCODER
void*
-#endif // !_TARGET_AMD64_
+#endif // !JIT32_GCENCODER
CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
{
#ifdef JIT32_GCENCODER
@@ -8381,6 +8388,14 @@ void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize
gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
}
+ if (compiler->opts.IsReversePInvoke())
+ {
+ unsigned reversePInvokeFrameVarNumber = compiler->lvaReversePInvokeFrameVar;
+ assert(reversePInvokeFrameVarNumber != BAD_VAR_NUM && reversePInvokeFrameVarNumber < compiler->lvaRefCount);
+ LclVarDsc& reversePInvokeFrameVar = compiler->lvaTable[reversePInvokeFrameVarNumber];
+ gcInfoEncoder->SetReversePInvokeFrameSlot(reversePInvokeFrameVar.lvStkOffs);
+ }
+
gcInfoEncoder->Build();
// GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp
index 114847c..01c7f8d 100644
--- a/src/jit/compiler.cpp
+++ b/src/jit/compiler.cpp
@@ -1635,18 +1635,16 @@ void Compiler::compDisplayStaticSizes(FILE* fout)
sizeof(bbDummy->bbVarUse));
fprintf(fout, "Offset / size of bbVarDef = %3u / %3u\n", offsetof(BasicBlock, bbVarDef),
sizeof(bbDummy->bbVarDef));
- fprintf(fout, "Offset / size of bbVarTmp = %3u / %3u\n", offsetof(BasicBlock, bbVarTmp),
- sizeof(bbDummy->bbVarTmp));
fprintf(fout, "Offset / size of bbLiveIn = %3u / %3u\n", offsetof(BasicBlock, bbLiveIn),
sizeof(bbDummy->bbLiveIn));
fprintf(fout, "Offset / size of bbLiveOut = %3u / %3u\n", offsetof(BasicBlock, bbLiveOut),
sizeof(bbDummy->bbLiveOut));
- fprintf(fout, "Offset / size of bbHeapSsaPhiFunc = %3u / %3u\n", offsetof(BasicBlock, bbHeapSsaPhiFunc),
- sizeof(bbDummy->bbHeapSsaPhiFunc));
- fprintf(fout, "Offset / size of bbHeapSsaNumIn = %3u / %3u\n", offsetof(BasicBlock, bbHeapSsaNumIn),
- sizeof(bbDummy->bbHeapSsaNumIn));
- fprintf(fout, "Offset / size of bbHeapSsaNumOut = %3u / %3u\n", offsetof(BasicBlock, bbHeapSsaNumOut),
- sizeof(bbDummy->bbHeapSsaNumOut));
+ fprintf(fout, "Offset / size of bbMemorySsaPhiFunc = %3u / %3u\n", offsetof(BasicBlock, bbMemorySsaPhiFunc),
+ sizeof(bbDummy->bbMemorySsaPhiFunc));
+ fprintf(fout, "Offset / size of bbMemorySsaNumIn = %3u / %3u\n", offsetof(BasicBlock, bbMemorySsaNumIn),
+ sizeof(bbDummy->bbMemorySsaNumIn));
+ fprintf(fout, "Offset / size of bbMemorySsaNumOut = %3u / %3u\n", offsetof(BasicBlock, bbMemorySsaNumOut),
+ sizeof(bbDummy->bbMemorySsaNumOut));
fprintf(fout, "Offset / size of bbScope = %3u / %3u\n", offsetof(BasicBlock, bbScope),
sizeof(bbDummy->bbScope));
fprintf(fout, "Offset / size of bbCseGen = %3u / %3u\n", offsetof(BasicBlock, bbCseGen),
@@ -1788,9 +1786,9 @@ void Compiler::compInit(ArenaAllocator* pAlloc, InlineInfo* inlineInfo)
impSpillCliquePredMembers = ExpandArray<BYTE>(getAllocator());
impSpillCliqueSuccMembers = ExpandArray<BYTE>(getAllocator());
- memset(&lvHeapPerSsaData, 0, sizeof(PerSsaArray));
- lvHeapPerSsaData.Init(getAllocator());
- lvHeapNumSsaNames = 0;
+ memset(&lvMemoryPerSsaData, 0, sizeof(PerSsaArray));
+ lvMemoryPerSsaData.Init(getAllocator());
+ lvMemoryNumSsaNames = 0;
//
// Initialize all the per-method statistics gathering data structures.
@@ -1871,8 +1869,11 @@ void Compiler::compInit(ArenaAllocator* pAlloc, InlineInfo* inlineInfo)
m_fieldSeqStore = nullptr;
m_zeroOffsetFieldMap = nullptr;
m_arrayInfoMap = nullptr;
- m_heapSsaMap = nullptr;
m_refAnyClass = nullptr;
+ for (MemoryKind memoryKind : allMemoryKinds())
+ {
+ m_memorySsaMap[memoryKind] = nullptr;
+ }
#ifdef DEBUG
if (!compIsForInlining())
@@ -2312,6 +2313,9 @@ void Compiler::compSetProcessor()
if (opts.compCanUseAVX)
{
codeGen->getEmitter()->SetUseAVX(true);
+ // Assume each JITted method does not contain AVX instruction at first
+ codeGen->getEmitter()->SetContainsAVX(false);
+ codeGen->getEmitter()->SetContains256bitAVX(false);
}
else
#endif // FEATURE_AVX_SUPPORT
@@ -3024,6 +3028,7 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
#ifdef FEATURE_SIMD
// Minimum bar for availing SIMD benefits is SSE2 on AMD64/x86.
featureSIMD = jitFlags->IsSet(JitFlags::JIT_FLAG_FEATURE_SIMD);
+ setUsesSIMDTypes(false);
#endif // FEATURE_SIMD
if (compIsForInlining() || compIsForImportOnly())
@@ -3296,8 +3301,6 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
}
#endif
- opts.compMustInlinePInvokeCalli = jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB);
-
opts.compScopeInfo = opts.compDbgInfo;
#ifdef LATE_DISASM
@@ -4194,11 +4197,17 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags
assert(!fgComputePredsDone);
if (fgCheapPredsValid)
{
- // Remove cheap predecessors before inlining; allowing the cheap predecessor lists to be inserted
- // with inlined blocks causes problems.
+ // Remove cheap predecessors before inlining and fat call transformation;
+ // allowing the cheap predecessor lists to be inserted causes problems
+ // with splitting existing blocks.
fgRemovePreds();
}
+ if (IsTargetAbi(CORINFO_CORERT_ABI) && doesMethodHaveFatPointer())
+ {
+ fgTransformFatCalli();
+ }
+
EndPhase(PHASE_IMPORTATION);
if (compIsForInlining())
@@ -4598,6 +4607,10 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags
codeGen->regSet.rsMaskResvd |= RBM_OPT_RSVD;
assert(REG_OPT_RSVD != REG_FP);
}
+ // compRsvdRegCheck() has read out the FramePointerUsed property, but doLinearScan()
+ // tries to overwrite it later. This violates the PhasedVar rule and triggers an assertion.
+ // TODO-ARM-Bug?: What is the proper way to handle this situation?
+ codeGen->resetFramePointerUsedWritePhase();
#ifdef DEBUG
//
@@ -4718,21 +4731,6 @@ void Compiler::ResetOptAnnotations()
tree->ClearVN();
tree->ClearAssertion();
tree->gtCSEnum = NO_CSE;
-
- // Clear any *_ASG_LHS flags -- these are set during SSA construction,
- // and the heap live-in calculation depends on them being unset coming
- // into SSA construction (without clearing them, a block that has a
- // heap def via one of these before any heap use is treated as not having
- // an upwards-exposed heap use, even though subsequent heap uses may not
- // be killed by the store; this seems to be a bug, worked around here).
- if (tree->OperIsIndir())
- {
- tree->gtFlags &= ~GTF_IND_ASG_LHS;
- }
- else if (tree->OperGet() == GT_CLS_VAR)
- {
- tree->gtFlags &= ~GTF_CLS_VAR_ASG_LHS;
- }
}
}
}
@@ -6708,16 +6706,7 @@ Compiler::NodeToIntMap* Compiler::FindReachableNodesInNodeTestData()
if (arg->gtFlags & GTF_LATE_ARG)
{
// Find the corresponding late arg.
- GenTreePtr lateArg = nullptr;
- for (unsigned j = 0; j < call->fgArgInfo->ArgCount(); j++)
- {
- if (call->fgArgInfo->ArgTable()[j]->argNum == i)
- {
- lateArg = call->fgArgInfo->ArgTable()[j]->node;
- break;
- }
- }
- assert(lateArg != nullptr);
+ GenTreePtr lateArg = call->fgArgInfo->GetLateArg(i);
if (GetNodeTestData()->Lookup(lateArg, &tlAndN))
{
reachable->Set(lateArg, 0);
@@ -6805,14 +6794,14 @@ void Compiler::CopyTestDataToCloneTree(GenTreePtr from, GenTreePtr to)
assert(to->gtOp.gtOp1 == nullptr);
}
- if (from->gtGetOp2() != nullptr)
+ if (from->gtGetOp2IfPresent() != nullptr)
{
- assert(to->gtGetOp2() != nullptr);
+ assert(to->gtGetOp2IfPresent() != nullptr);
CopyTestDataToCloneTree(from->gtGetOp2(), to->gtGetOp2());
}
else
{
- assert(to->gtGetOp2() == nullptr);
+ assert(to->gtGetOp2IfPresent() == nullptr);
}
return;
@@ -6863,8 +6852,8 @@ void Compiler::CopyTestDataToCloneTree(GenTreePtr from, GenTreePtr to)
#ifdef FEATURE_SIMD
case GT_SIMD_CHK:
#endif // FEATURE_SIMD
- CopyTestDataToCloneTree(from->gtBoundsChk.gtArrLen, to->gtBoundsChk.gtArrLen);
CopyTestDataToCloneTree(from->gtBoundsChk.gtIndex, to->gtBoundsChk.gtIndex);
+ CopyTestDataToCloneTree(from->gtBoundsChk.gtArrLen, to->gtBoundsChk.gtArrLen);
return;
default:
@@ -9175,10 +9164,6 @@ int cTreeFlagsIR(Compiler* comp, GenTree* tree)
{
chars += printf("[RELOP_QMARK]");
}
- if (tree->gtFlags & GTF_RELOP_SMALL)
- {
- chars += printf("[RELOP_SMALL]");
- }
break;
case GT_QMARK:
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index d8cd491..4239cf6 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -268,10 +268,6 @@ public:
unsigned char lvDisqualify : 1; // variable is no longer OK for add copy optimization
unsigned char lvVolatileHint : 1; // hint for AssertionProp
#endif
-#if FANCY_ARRAY_OPT
- unsigned char lvAssignOne : 1; // assigned at least once?
- unsigned char lvAssignTwo : 1; // assigned at least twice?
-#endif
unsigned char lvSpilled : 1; // enregistered variable was spilled
#ifndef _TARGET_64BIT_
@@ -322,6 +318,7 @@ public:
// type of an arg node is TYP_BYREF and a local node is TYP_SIMD*.
unsigned char lvSIMDType : 1; // This is a SIMD struct
unsigned char lvUsedInSIMDIntrinsic : 1; // This tells lclvar is used for simd intrinsic
+ var_types lvBaseType : 5; // Note: this only packs because var_types is a typedef of unsigned char
#endif // FEATURE_SIMD
unsigned char lvRegStruct : 1; // This is a reg-sized non-field-addressed struct.
@@ -330,9 +327,6 @@ public:
// local.
unsigned lvParentLcl; // The index of the local var representing the parent (i.e. the promoted struct local).
// Valid on promoted struct local fields.
-#ifdef FEATURE_SIMD
- var_types lvBaseType; // The base type of a SIMD local var. Valid on TYP_SIMD locals.
-#endif // FEATURE_SIMD
};
unsigned char lvFieldCnt; // Number of fields in the promoted VarDsc.
@@ -676,7 +670,7 @@ public:
#endif // defined(_TARGET_64BIT_)
}
- unsigned lvSize() // Size needed for storage representation. Only used for structs or TYP_BLK.
+ unsigned lvSize() const // Size needed for storage representation. Only used for structs or TYP_BLK.
{
// TODO-Review: Sometimes we get called on ARM with HFA struct variables that have been promoted,
// where the struct itself is no longer used because all access is via its member fields.
@@ -694,7 +688,8 @@ public:
#if defined(FEATURE_SIMD) && !defined(_TARGET_64BIT_)
// For 32-bit architectures, we make local variable SIMD12 types 16 bytes instead of just 12. We can't do
- // this for arguments, which must be passed according the defined ABI.
+ // this for arguments, which must be passed according the defined ABI. We don't want to do this for
+ // dependently promoted struct fields, but we don't know that here. See lvaMapSimd12ToSimd16().
if ((lvType == TYP_SIMD12) && !lvIsParam)
{
assert(lvExactSize == 12);
@@ -711,10 +706,6 @@ public:
BYTE* lvGcLayout; // GC layout info for structs
-#if FANCY_ARRAY_OPT
- GenTreePtr lvKnownDim; // array size if known
-#endif
-
#if ASSERTION_PROP
BlockSet lvRefBlks; // Set of blocks that contain refs
GenTreePtr lvDefStmt; // Pointer to the statement with the single definition
@@ -1195,6 +1186,11 @@ struct fgArgTabEntry
unsigned alignment; // 1 or 2 (slots/registers)
unsigned lateArgInx; // index into gtCallLateArgs list
unsigned tmpNum; // the LclVar number if we had to force evaluation of this arg
+#if defined(UNIX_X86_ABI)
+ unsigned padStkAlign; // Count of number of padding slots for stack alignment. For each Call, only the first
+ // argument may have a value to emit "sub esp, n" to adjust the stack before pushing
+ // the argument.
+#endif
bool isSplit : 1; // True when this argument is split between the registers and OutArg area
bool needTmp : 1; // True when we force this argument's evaluation into a temp LclVar
@@ -1272,6 +1268,10 @@ class fgArgInfo
unsigned argCount; // Updatable arg count value
unsigned nextSlotNum; // Updatable slot count value
unsigned stkLevel; // Stack depth when we make this call (for x86)
+#if defined(UNIX_X86_ABI)
+ unsigned padStkAlign; // Count of number of padding slots for stack alignment. This value is used to turn back
+ // stack pointer before it was adjusted after each Call
+#endif
unsigned argTableSize; // size of argTable array (equal to the argCount when done with fgMorphArgs)
bool hasRegArgs; // true if we have one or more register arguments
@@ -1321,6 +1321,10 @@ public:
void ArgsComplete();
+#if defined(UNIX_X86_ABI)
+ void ArgsAlignPadding();
+#endif
+
void SortArgs();
void EvalArgsToTemps();
@@ -1340,6 +1344,12 @@ public:
{
return nextSlotNum;
}
+#if defined(UNIX_X86_ABI)
+ unsigned GetPadStackAlign()
+ {
+ return padStkAlign;
+ }
+#endif
bool HasRegArgs()
{
return hasRegArgs;
@@ -1352,6 +1362,9 @@ public:
{
return argsComplete;
}
+
+ // Get the late arg for arg at position argIndex. Caller must ensure this position has a late arg.
+ GenTreePtr GetLateArg(unsigned argIndex);
};
#ifdef DEBUG
@@ -1771,7 +1784,11 @@ public:
// a PSPSym for functions with any EH.
bool ehNeedsPSPSym() const
{
+#ifdef _TARGET_X86_
+ return false;
+#else // _TARGET_X86_
return compHndBBtabCount > 0;
+#endif // _TARGET_X86_
}
bool ehAnyFunclets(); // Are there any funclets in this function?
@@ -1936,6 +1953,11 @@ public:
GenTreePtr gtNewOneConNode(var_types type);
+#ifdef FEATURE_SIMD
+ GenTreePtr gtNewSIMDVectorZero(var_types simdType, var_types baseType, unsigned size);
+ GenTreePtr gtNewSIMDVectorOne(var_types simdType, var_types baseType, unsigned size);
+#endif
+
GenTreeBlk* gtNewBlkOpNode(
genTreeOps oper, GenTreePtr dst, GenTreePtr srcOrFillVal, GenTreePtr sizeOrClsTok, bool isVolatile);
@@ -1981,6 +2003,7 @@ public:
SIMDIntrinsicID simdIntrinsicID,
var_types baseType,
unsigned size);
+ void SetOpLclRelatedToSIMDIntrinsic(GenTreePtr op);
#endif
GenTreePtr gtNewLclLNode(unsigned lnum, var_types type, IL_OFFSETX ILoffs = BAD_IL_OFFSET);
@@ -2063,13 +2086,13 @@ public:
bool gtHasLocalsWithAddrOp(GenTreePtr tree);
- unsigned gtHashValue(GenTree* tree);
-
unsigned gtSetListOrder(GenTree* list, bool regs, bool isListCallArgs);
void gtWalkOp(GenTree** op1, GenTree** op2, GenTree* adr, bool constOnly);
#ifdef DEBUG
+ unsigned gtHashValue(GenTree* tree);
+
GenTreePtr gtWalkOpEffectiveVal(GenTreePtr op);
#endif
@@ -2653,6 +2676,35 @@ public:
bool lvaIsFieldOfDependentlyPromotedStruct(const LclVarDsc* varDsc);
bool lvaIsGCTracked(const LclVarDsc* varDsc);
+#if defined(FEATURE_SIMD)
+ bool lvaMapSimd12ToSimd16(const LclVarDsc* varDsc)
+ {
+ assert(varDsc->lvType == TYP_SIMD12);
+ assert(varDsc->lvExactSize == 12);
+
+#if defined(_TARGET_64BIT_)
+ assert(varDsc->lvSize() == 16);
+ return true;
+#else // !defined(_TARGET_64BIT_)
+
+ // For 32-bit architectures, we make local variable SIMD12 types 16 bytes instead of just 12. lvSize()
+ // already does this calculation. However, we also need to prevent mapping types if the var is a
+ // depenendently promoted struct field, which must remain its exact size within its parent struct.
+ // However, we don't know this until late, so we may have already pretended the field is bigger
+ // before that.
+ if ((varDsc->lvSize() == 16) && !lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+
+#endif // !defined(_TARGET_64BIT_)
+ }
+#endif // defined(FEATURE_SIMD)
+
BYTE* lvaGetGcLayout(unsigned varNum);
bool lvaTypeIsGC(unsigned varNum);
unsigned lvaGSSecurityCookie; // LclVar number
@@ -2697,21 +2749,21 @@ protected:
static fgWalkPreFn lvaMarkLclRefsCallback;
void lvaMarkLclRefs(GenTreePtr tree);
- // Keeps the mapping from SSA #'s to VN's for the implicit "Heap" variable.
- PerSsaArray lvHeapPerSsaData;
- unsigned lvHeapNumSsaNames;
+ // Keeps the mapping from SSA #'s to VN's for the implicit memory variables.
+ PerSsaArray lvMemoryPerSsaData;
+ unsigned lvMemoryNumSsaNames;
public:
- // Returns the address of the per-Ssa data for "Heap" at the given ssaNum (which is required
+ // Returns the address of the per-Ssa data for memory at the given ssaNum (which is required
// not to be the SsaConfig::RESERVED_SSA_NUM, which indicates that the variable is
// not an SSA variable).
- LclSsaVarDsc* GetHeapPerSsaData(unsigned ssaNum)
+ LclSsaVarDsc* GetMemoryPerSsaData(unsigned ssaNum)
{
assert(ssaNum != SsaConfig::RESERVED_SSA_NUM);
assert(SsaConfig::RESERVED_SSA_NUM == 0);
ssaNum--;
- assert(ssaNum < lvHeapNumSsaNames);
- return &lvHeapPerSsaData.GetRef(ssaNum);
+ assert(ssaNum < lvMemoryNumSsaNames);
+ return &lvMemoryPerSsaData.GetRef(ssaNum);
}
/*
@@ -2780,7 +2832,7 @@ protected:
void impImportNewObjArray(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_CALL_INFO* pCallInfo);
- bool impCanPInvokeInline(BasicBlock* block);
+ bool impCanPInvokeInline();
bool impCanPInvokeInlineCallSite(BasicBlock* block);
void impCheckForPInvokeCall(
GenTreePtr call, CORINFO_METHOD_HANDLE methHnd, CORINFO_SIG_INFO* sig, unsigned mflags, BasicBlock* block);
@@ -2831,7 +2883,8 @@ protected:
void impImportLeave(BasicBlock* block);
void impResetLeaveBlock(BasicBlock* block, unsigned jmpAddr);
- GenTreePtr impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
+ GenTreePtr impIntrinsic(GenTreePtr newobjThis,
+ CORINFO_CLASS_HANDLE clsHnd,
CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig,
int memberRef,
@@ -3425,6 +3478,7 @@ public:
bool fgComputePredsDone; // Have we computed the bbPreds list
bool fgCheapPredsValid; // Is the bbCheapPreds list valid?
bool fgDomsComputed; // Have we computed the dominator sets?
+ bool fgOptimizedFinally; // Did we optimize any try-finallys?
bool fgHasSwitch; // any BBJ_SWITCH jumps?
bool fgHasPostfix; // any postfix ++/-- found?
@@ -3493,8 +3547,20 @@ public:
void fgImport();
+ void fgTransformFatCalli();
+
void fgInline();
+ void fgRemoveEmptyTry();
+
+ void fgRemoveEmptyFinally();
+
+ void fgCloneFinally();
+
+ void fgCleanupContinuation(BasicBlock* continuation);
+
+ void fgUpdateFinallyTargetFlags();
+
GenTreePtr fgGetCritSectOfStaticMethod();
#if !defined(_TARGET_X86_)
@@ -3570,10 +3636,9 @@ public:
void fgLocalVarLivenessInit();
#ifdef LEGACY_BACKEND
- GenTreePtr fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode, GenTreePtr relopNode, GenTreePtr asgdLclVar);
+ GenTreePtr fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode, GenTreePtr relopNode);
#else
- void fgPerNodeLocalVarLiveness(GenTree* node, GenTree* asgdLclVar);
- void fgPerStatementLocalVarLiveness(GenTree* node, GenTree* asgdLclVar);
+ void fgPerNodeLocalVarLiveness(GenTree* node);
#endif
void fgPerBlockLocalVarLiveness();
@@ -3741,18 +3806,18 @@ public:
// tree node).
void fgValueNumber();
- // Updates "fgCurHeap" via the assignment H[elemTypeEq][arrVN][inx][fldSeq] = rhsVN.
+ // Computes new GcHeap VN via the assignment H[elemTypeEq][arrVN][inx][fldSeq] = rhsVN.
// Assumes that "elemTypeEq" is the (equivalence class rep) of the array element type.
// The 'indType' is the indirection type of the lhs of the assignment and will typically
// match the element type of the array or fldSeq. When this type doesn't match
// or if the fldSeq is 'NotAField' we invalidate the array contents H[elemTypeEq][arrVN]
//
- void fgValueNumberArrIndexAssign(CORINFO_CLASS_HANDLE elemTypeEq,
- ValueNum arrVN,
- ValueNum inxVN,
- FieldSeqNode* fldSeq,
- ValueNum rhsVN,
- var_types indType);
+ ValueNum fgValueNumberArrIndexAssign(CORINFO_CLASS_HANDLE elemTypeEq,
+ ValueNum arrVN,
+ ValueNum inxVN,
+ FieldSeqNode* fldSeq,
+ ValueNum rhsVN,
+ var_types indType);
// Requires that "tree" is a GT_IND marked as an array index, and that its address argument
// has been parsed to yield the other input arguments. If evaluation of the address
@@ -3772,33 +3837,43 @@ public:
// Requires "funcApp" to be a VNF_PtrToArrElem, and "addrXvn" to represent the exception set thrown
// by evaluating the array index expression "tree". Returns the value number resulting from
- // dereferencing the array in the current heap state. If "tree" is non-null, it must be the
+ // dereferencing the array in the current GcHeap state. If "tree" is non-null, it must be the
// "GT_IND" that does the dereference, and it is given the returned value number.
ValueNum fgValueNumberArrIndexVal(GenTreePtr tree, struct VNFuncApp* funcApp, ValueNum addrXvn);
+ // Compute the value number for a byref-exposed load of the given type via the given pointerVN.
+ ValueNum fgValueNumberByrefExposedLoad(var_types type, ValueNum pointerVN);
+
unsigned fgVNPassesCompleted; // Number of times fgValueNumber has been run.
// Utility functions for fgValueNumber.
- // Perform value-numbering for the trees in "blk". When giving VN's to the SSA
- // names defined by phi definitions at the start of "blk", "newVNsForPhis" indicates
- // that these should be given new VN's, irrespective of the values of the LHS.
- // If "false", then we may assume that all inputs to phi RHS's of such definitions
- // have already been assigned value numbers; if they are all assigned the *same* value
- // number, then the LHS SSA name gets the same VN.
- void fgValueNumberBlock(BasicBlock* blk, bool newVNsForPhis);
+ // Perform value-numbering for the trees in "blk".
+ void fgValueNumberBlock(BasicBlock* blk);
// Requires that "entryBlock" is the entry block of loop "loopNum", and that "loopNum" is the
// innermost loop of which "entryBlock" is the entry. Returns the value number that should be
- // assumed for the heap at the start "entryBlk".
- ValueNum fgHeapVNForLoopSideEffects(BasicBlock* entryBlock, unsigned loopNum);
+ // assumed for the memoryKind at the start "entryBlk".
+ ValueNum fgMemoryVNForLoopSideEffects(MemoryKind memoryKind, BasicBlock* entryBlock, unsigned loopNum);
- // Called when an operation (performed by "tree", described by "msg") may cause the global Heap to be mutated.
- void fgMutateHeap(GenTreePtr tree DEBUGARG(const char* msg));
+ // Called when an operation (performed by "tree", described by "msg") may cause the GcHeap to be mutated.
+ // As GcHeap is a subset of ByrefExposed, this will also annotate the ByrefExposed mutation.
+ void fgMutateGcHeap(GenTreePtr tree DEBUGARG(const char* msg));
- // Tree caused an update in the current heap VN. If "tree" has an associated heap SSA #, record that
+ // Called when an operation (performed by "tree", described by "msg") may cause an address-exposed local to be
+ // mutated.
+ void fgMutateAddressExposedLocal(GenTreePtr tree DEBUGARG(const char* msg));
+
+ // For a GC heap store at curTree, record the new curMemoryVN's and update curTree's MemorySsaMap.
+ // As GcHeap is a subset of ByrefExposed, this will also record the ByrefExposed store.
+ void recordGcHeapStore(GenTreePtr curTree, ValueNum gcHeapVN DEBUGARG(const char* msg));
+
+ // For a store to an address-exposed local at curTree, record the new curMemoryVN and update curTree's MemorySsaMap.
+ void recordAddressExposedLocalStore(GenTreePtr curTree, ValueNum memoryVN DEBUGARG(const char* msg));
+
+ // Tree caused an update in the current memory VN. If "tree" has an associated heap SSA #, record that
// value in that SSA #.
- void fgValueNumberRecordHeapSsa(GenTreePtr tree);
+ void fgValueNumberRecordMemorySsa(MemoryKind memoryKind, GenTreePtr tree);
// The input 'tree' is a leaf node that is a constant
// Assign the proper value number to the tree
@@ -3837,11 +3912,11 @@ public:
// Requires "helpFunc" to be pure. Returns the corresponding VNFunc.
VNFunc fgValueNumberHelperMethVNFunc(CorInfoHelpFunc helpFunc);
- // This is the current value number for the "Heap" implicit variable while
- // doing value numbering. This is the value number under the "liberal" interpretation
- // of heap values; the "conservative" interpretation needs no VN, since every access of
- // the heap yields an unknown value.
- ValueNum fgCurHeapVN;
+ // These are the current value number for the memory implicit variables while
+ // doing value numbering. These are the value numbers under the "liberal" interpretation
+ // of memory values; the "conservative" interpretation needs no VN, since every access of
+ // memory yields an unknown value.
+ ValueNum fgCurMemoryVN[MemoryKindCount];
// Return a "pseudo"-class handle for an array element type. If "elemType" is TYP_STRUCT,
// requires "elemStructType" to be non-null (and to have a low-order zero). Otherwise, low order bit
@@ -4272,6 +4347,7 @@ public:
void fgDebugCheckNodeLinks(BasicBlock* block, GenTreePtr stmt);
void fgDebugCheckFlags(GenTreePtr tree);
void fgDebugCheckFlagsHelper(GenTreePtr tree, unsigned treeFlags, unsigned chkFlags);
+ void fgDebugCheckTryFinallyExits();
#endif
#ifdef LEGACY_BACKEND
@@ -4524,7 +4600,6 @@ private:
static MorphAddrContext s_CopyBlockMAC;
#ifdef FEATURE_SIMD
- GenTreePtr fgCopySIMDNode(GenTreeSIMD* simdNode);
GenTreePtr getSIMDStructFromField(GenTreePtr tree,
var_types* baseTypeOut,
unsigned* indexOut,
@@ -4613,11 +4688,13 @@ private:
VARSET_TP fgCurUseSet; // vars used by block (before an assignment)
VARSET_TP fgCurDefSet; // vars assigned by block (before a use)
- bool fgCurHeapUse; // True iff the current basic block uses the heap before defining it.
- bool fgCurHeapDef; // True iff the current basic block defines the heap.
- bool fgCurHeapHavoc; // True if the current basic block is known to set the heap to a "havoc" value.
+ MemoryKindSet fgCurMemoryUse; // True iff the current basic block uses memory.
+ MemoryKindSet fgCurMemoryDef; // True iff the current basic block modifies memory.
+ MemoryKindSet fgCurMemoryHavoc; // True if the current basic block is known to set memory to a "havoc" value.
- void fgMarkUseDef(GenTreeLclVarCommon* tree, GenTree* asgdLclVar = nullptr);
+ bool byrefStatesMatchGcHeapStates; // True iff GcHeap and ByrefExposed memory have all the same def points.
+
+ void fgMarkUseDef(GenTreeLclVarCommon* tree);
void fgBeginScopeLife(VARSET_TP* inScope, VarScopeDsc* var);
void fgEndScopeLife(VARSET_TP* inScope, VarScopeDsc* var);
@@ -4686,6 +4763,9 @@ private:
#ifdef DEBUG
static fgWalkPreFn fgDebugCheckInlineCandidates;
+
+ void CheckNoFatPointerCandidatesLeft();
+ static fgWalkPreFn fgDebugCheckFatPointerCandidates;
#endif
void fgPromoteStructs();
@@ -4968,9 +5048,10 @@ public:
#define LPFLG_ASGVARS_INC 0x8000 // "lpAsgVars" is incomplete -- vars beyond those representable in an AllVarSet
// type are assigned to.
- bool lpLoopHasHeapHavoc; // The loop contains an operation that we assume has arbitrary heap side effects.
- // If this is set, the fields below may not be accurate (since they become irrelevant.)
- bool lpContainsCall; // True if executing the loop body *may* execute a call
+ bool lpLoopHasMemoryHavoc[MemoryKindCount]; // The loop contains an operation that we assume has arbitrary
+ // memory side effects. If this is set, the fields below
+ // may not be accurate (since they become irrelevant.)
+ bool lpContainsCall; // True if executing the loop body *may* execute a call
VARSET_TP lpVarInOut; // The set of variables that are IN or OUT during the execution of this loop
VARSET_TP lpVarUseDef; // The set of variables that are USE or DEF during the execution of this loop
@@ -5307,6 +5388,9 @@ protected:
treeStmtLstPtr csdTreeList; // list of matching tree nodes: head
treeStmtLstPtr csdTreeLast; // list of matching tree nodes: tail
+
+ ValueNum defConservativeVN; // if all def occurrences share the same conservative value
+ // number, this will reflect it; otherwise, NoVN.
};
static const size_t s_optCSEhashSize;
@@ -5462,11 +5546,27 @@ public:
}
};
-#define OMF_HAS_NEWARRAY 0x00000001 // Method contains 'new' of an array
-#define OMF_HAS_NEWOBJ 0x00000002 // Method contains 'new' of an object type.
-#define OMF_HAS_ARRAYREF 0x00000004 // Method contains array element loads or stores.
-#define OMF_HAS_VTABLEREF 0x00000008 // Method contains method table reference.
-#define OMF_HAS_NULLCHECK 0x00000010 // Method contains null check.
+#define OMF_HAS_NEWARRAY 0x00000001 // Method contains 'new' of an array
+#define OMF_HAS_NEWOBJ 0x00000002 // Method contains 'new' of an object type.
+#define OMF_HAS_ARRAYREF 0x00000004 // Method contains array element loads or stores.
+#define OMF_HAS_VTABLEREF 0x00000008 // Method contains method table reference.
+#define OMF_HAS_NULLCHECK 0x00000010 // Method contains null check.
+#define OMF_HAS_FATPOINTER 0x00000020 // Method contains call, that needs fat pointer transformation.
+
+ bool doesMethodHaveFatPointer()
+ {
+ return (optMethodFlags & OMF_HAS_FATPOINTER) != 0;
+ }
+
+ void setMethodHasFatPointer()
+ {
+ optMethodFlags |= OMF_HAS_FATPOINTER;
+ }
+
+ void clearMethodHasFatPointer()
+ {
+ optMethodFlags &= ~OMF_HAS_FATPOINTER;
+ }
unsigned optMethodFlags;
@@ -5931,10 +6031,6 @@ protected:
ssize_t optGetArrayRefScaleAndIndex(GenTreePtr mul, GenTreePtr* pIndex DEBUGARG(bool bRngChk));
GenTreePtr optFindLocalInit(BasicBlock* block, GenTreePtr local, VARSET_TP* pKilledInOut, bool* isKilledAfterInit);
-#if FANCY_ARRAY_OPT
- bool optIsNoMore(GenTreePtr op1, GenTreePtr op2, int add1 = 0, int add2 = 0);
-#endif
-
bool optReachWithoutCall(BasicBlock* srcBB, BasicBlock* dstBB);
protected:
@@ -6845,10 +6941,15 @@ private:
void unwindReserveFunc(FuncInfoDsc* func);
void unwindEmitFunc(FuncInfoDsc* func, void* pHotCode, void* pColdCode);
-#if defined(_TARGET_AMD64_)
+#if defined(_TARGET_AMD64_) || (defined(_TARGET_X86_) && FEATURE_EH_FUNCLETS)
void unwindReserveFuncHelper(FuncInfoDsc* func, bool isHotCode);
void unwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode, void* pColdCode, bool isHotCode);
+
+#endif // _TARGET_AMD64_ || (_TARGET_X86_ && FEATURE_EH_FUNCLETS)
+
+#if defined(_TARGET_AMD64_)
+
UNATIVE_OFFSET unwindGetCurrentOffset(FuncInfoDsc* func);
void unwindBegPrologWindows();
@@ -6932,6 +7033,20 @@ private:
// Should we support SIMD intrinsics?
bool featureSIMD;
+ // Have we identified any SIMD types?
+ // This is currently used by struct promotion to avoid getting type information for a struct
+ // field to see if it is a SIMD type, if we haven't seen any SIMD types or operations in
+ // the method.
+ bool _usesSIMDTypes;
+ bool usesSIMDTypes()
+ {
+ return _usesSIMDTypes;
+ }
+ void setUsesSIMDTypes(bool value)
+ {
+ _usesSIMDTypes = value;
+ }
+
// This is a temp lclVar allocated on the stack as TYP_SIMD. It is used to implement intrinsics
// that require indexed access to the individual fields of the vector, which is not well supported
// by the hardware. It is allocated when/if such situations are encountered during Lowering.
@@ -7121,6 +7236,9 @@ private:
GenTree** op1,
GenTree** op2);
+ // Creates a GT_SIMD tree for Abs intrinsic.
+ GenTreePtr impSIMDAbs(CORINFO_CLASS_HANDLE typeHnd, var_types baseType, unsigned simdVectorSize, GenTree* op1);
+
#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
// Transforms operands and returns the SIMD intrinsic to be applied on
// transformed operands to obtain == comparison result.
@@ -7623,8 +7741,6 @@ public:
static const bool compNoPInvokeInlineCB;
#endif
- bool compMustInlinePInvokeCalli; // Unmanaged CALLI in IL stubs must be inlined
-
#ifdef DEBUG
bool compGcChecks; // Check arguments and return values to ensure they are sane
bool compStackCheckOnRet; // Check ESP on return to ensure it is correct
@@ -7783,11 +7899,22 @@ public:
/* These should not be exhaustively used as they might */ \
/* hide/trivialize other areas */ \
\
- STRESS_MODE(REGS) STRESS_MODE(DBL_ALN) STRESS_MODE(LCL_FLDS) STRESS_MODE(UNROLL_LOOPS) \
- STRESS_MODE(MAKE_CSE) STRESS_MODE(LEGACY_INLINE) STRESS_MODE(CLONE_EXPR) \
- STRESS_MODE(USE_FCOMI) STRESS_MODE(USE_CMOV) STRESS_MODE(FOLD) \
- STRESS_MODE(BB_PROFILE) STRESS_MODE(OPT_BOOLS_GC) STRESS_MODE(REMORPH_TREES) \
- STRESS_MODE(64RSLT_MUL) STRESS_MODE(DO_WHILE_LOOPS) STRESS_MODE(MIN_OPTS) \
+ STRESS_MODE(REGS) \
+ STRESS_MODE(DBL_ALN) \
+ STRESS_MODE(LCL_FLDS) \
+ STRESS_MODE(UNROLL_LOOPS) \
+ STRESS_MODE(MAKE_CSE) \
+ STRESS_MODE(LEGACY_INLINE) \
+ STRESS_MODE(CLONE_EXPR) \
+ STRESS_MODE(USE_FCOMI) \
+ STRESS_MODE(USE_CMOV) \
+ STRESS_MODE(FOLD) \
+ STRESS_MODE(BB_PROFILE) \
+ STRESS_MODE(OPT_BOOLS_GC) \
+ STRESS_MODE(REMORPH_TREES) \
+ STRESS_MODE(64RSLT_MUL) \
+ STRESS_MODE(DO_WHILE_LOOPS) \
+ STRESS_MODE(MIN_OPTS) \
STRESS_MODE(REVERSE_FLAG) /* Will set GTF_REVERSE_OPS whenever we can */ \
STRESS_MODE(REVERSE_COMMA) /* Will reverse commas created with gtNewCommaNode */ \
STRESS_MODE(TAILCALL) /* Will make the call as a tailcall whenever legal */ \
@@ -7796,17 +7923,23 @@ public:
STRESS_MODE(NULL_OBJECT_CHECK) \
STRESS_MODE(PINVOKE_RESTORE_ESP) \
STRESS_MODE(RANDOM_INLINE) \
+ STRESS_MODE(SWITCH_CMP_BR_EXPANSION) \
+ STRESS_MODE(GENERIC_VARN) \
+ \
+ /* After COUNT_VARN, stress level 2 does all of these all the time */ \
\
- STRESS_MODE(GENERIC_VARN) STRESS_MODE(COUNT_VARN) \
+ STRESS_MODE(COUNT_VARN) \
\
/* "Check" stress areas that can be exhaustively used if we */ \
/* dont care about performance at all */ \
\
STRESS_MODE(FORCE_INLINE) /* Treat every method as AggressiveInlining */ \
STRESS_MODE(CHK_FLOW_UPDATE) \
- STRESS_MODE(EMITTER) STRESS_MODE(CHK_REIMPORT) STRESS_MODE(FLATFP) \
- \
- STRESS_MODE(GENERIC_CHECK) STRESS_MODE(COUNT) \
+ STRESS_MODE(EMITTER) \
+ STRESS_MODE(CHK_REIMPORT) \
+ STRESS_MODE(FLATFP) \
+ STRESS_MODE(GENERIC_CHECK) \
+ STRESS_MODE(COUNT)
enum compStressArea
{
@@ -8951,21 +9084,28 @@ public:
return compRoot->m_arrayInfoMap;
}
- NodeToUnsignedMap* m_heapSsaMap;
+ NodeToUnsignedMap* m_memorySsaMap[MemoryKindCount];
- // In some cases, we want to assign intermediate SSA #'s to heap states, and know what nodes create those heap
- // states. (We do this for try blocks, where, if the try block doesn't do a call that loses track of the heap state,
- // all the possible heap states are possible initial states of the corresponding catch block(s).)
- NodeToUnsignedMap* GetHeapSsaMap()
+ // In some cases, we want to assign intermediate SSA #'s to memory states, and know what nodes create those memory
+ // states. (We do this for try blocks, where, if the try block doesn't do a call that loses track of the memory
+ // state, all the possible memory states are possible initial states of the corresponding catch block(s).)
+ NodeToUnsignedMap* GetMemorySsaMap(MemoryKind memoryKind)
{
+ if (memoryKind == GcHeap && byrefStatesMatchGcHeapStates)
+ {
+ // Use the same map for GCHeap and ByrefExposed when their states match.
+ memoryKind = ByrefExposed;
+ }
+
+ assert(memoryKind < MemoryKindCount);
Compiler* compRoot = impInlineRoot();
- if (compRoot->m_heapSsaMap == nullptr)
+ if (compRoot->m_memorySsaMap[memoryKind] == nullptr)
{
// Create a CompAllocator that labels sub-structure with CMK_ArrayInfoMap, and use that for allocation.
- IAllocator* ialloc = new (this, CMK_ArrayInfoMap) CompAllocator(this, CMK_ArrayInfoMap);
- compRoot->m_heapSsaMap = new (ialloc) NodeToUnsignedMap(ialloc);
+ IAllocator* ialloc = new (this, CMK_ArrayInfoMap) CompAllocator(this, CMK_ArrayInfoMap);
+ compRoot->m_memorySsaMap[memoryKind] = new (ialloc) NodeToUnsignedMap(ialloc);
}
- return compRoot->m_heapSsaMap;
+ return compRoot->m_memorySsaMap[memoryKind];
}
// The Refany type is the only struct type whose structure is implicitly assumed by IL. We need its fields.
diff --git a/src/jit/compiler.hpp b/src/jit/compiler.hpp
index e8358fd..6baf601 100644
--- a/src/jit/compiler.hpp
+++ b/src/jit/compiler.hpp
@@ -500,6 +500,52 @@ inline regNumber genRegNumFromMask(regMaskTP mask)
return regNum;
}
+//------------------------------------------------------------------------------
+// genTypeCanRepresentValue: Checks if a value can be represented by a given type.
+//
+// Arguments:
+// value - the value to check
+// type - the type
+//
+// Return Value:
+// True if the value is representable, false otherwise.
+//
+// Notes:
+// If the type is not integral or ref like (ref/byref/array) then false is
+// always returned.
+
+template <typename TValue>
+inline bool genTypeCanRepresentValue(var_types type, TValue value)
+{
+ switch (type)
+ {
+ case TYP_UBYTE:
+ case TYP_BOOL:
+ return FitsIn<UINT8>(value);
+ case TYP_BYTE:
+ return FitsIn<INT8>(value);
+ case TYP_USHORT:
+ case TYP_CHAR:
+ return FitsIn<UINT16>(value);
+ case TYP_SHORT:
+ return FitsIn<INT16>(value);
+ case TYP_UINT:
+ return FitsIn<UINT32>(value);
+ case TYP_INT:
+ return FitsIn<INT32>(value);
+ case TYP_ULONG:
+ return FitsIn<UINT64>(value);
+ case TYP_LONG:
+ return FitsIn<INT64>(value);
+ case TYP_REF:
+ case TYP_BYREF:
+ case TYP_ARRAY:
+ return FitsIn<UINT_PTR>(value);
+ default:
+ return false;
+ }
+}
+
/*****************************************************************************
*
* Return the size in bytes of the given type.
@@ -1137,7 +1183,6 @@ inline GenTreePtr Compiler::gtNewFieldRef(
tree->gtField.gtFldObj = obj;
tree->gtField.gtFldHnd = fldHnd;
tree->gtField.gtFldOffset = offset;
- tree->gtFlags |= GTF_GLOB_REF;
#ifdef FEATURE_READYTORUN_COMPILER
tree->gtField.gtFieldLookup.addr = nullptr;
@@ -1154,6 +1199,18 @@ inline GenTreePtr Compiler::gtNewFieldRef(
{
unsigned lclNum = obj->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
lvaTable[lclNum].lvFieldAccessed = 1;
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+ // These structs are passed by reference; we should probably be able to treat these
+ // as non-global refs, but downstream logic expects these to be marked this way.
+ if (lvaTable[lclNum].lvIsParam)
+ {
+ tree->gtFlags |= GTF_GLOB_REF;
+ }
+#endif // defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+ }
+ else
+ {
+ tree->gtFlags |= GTF_GLOB_REF;
}
return tree;
@@ -4626,15 +4683,14 @@ inline void BasicBlock::InitVarSets(Compiler* comp)
{
VarSetOps::AssignNoCopy(comp, bbVarUse, VarSetOps::MakeEmpty(comp));
VarSetOps::AssignNoCopy(comp, bbVarDef, VarSetOps::MakeEmpty(comp));
- VarSetOps::AssignNoCopy(comp, bbVarTmp, VarSetOps::MakeEmpty(comp));
VarSetOps::AssignNoCopy(comp, bbLiveIn, VarSetOps::MakeEmpty(comp));
VarSetOps::AssignNoCopy(comp, bbLiveOut, VarSetOps::MakeEmpty(comp));
VarSetOps::AssignNoCopy(comp, bbScope, VarSetOps::MakeEmpty(comp));
- bbHeapUse = false;
- bbHeapDef = false;
- bbHeapLiveIn = false;
- bbHeapLiveOut = false;
+ bbMemoryUse = emptyMemoryKindSet;
+ bbMemoryDef = emptyMemoryKindSet;
+ bbMemoryLiveIn = emptyMemoryKindSet;
+ bbMemoryLiveOut = emptyMemoryKindSet;
}
// Returns true if the basic block ends with GT_JMP
diff --git a/src/jit/compmemkind.h b/src/jit/compmemkind.h
index e27d207..b22bf6d 100644
--- a/src/jit/compmemkind.h
+++ b/src/jit/compmemkind.h
@@ -39,7 +39,7 @@ CompMemKindMacro(IndirAssignMap)
CompMemKindMacro(FieldSeqStore)
CompMemKindMacro(ZeroOffsetFieldMap)
CompMemKindMacro(ArrayInfoMap)
-CompMemKindMacro(HeapPhiArg)
+CompMemKindMacro(MemoryPhiArg)
CompMemKindMacro(CSE)
CompMemKindMacro(GC)
CompMemKindMacro(CorSig)
diff --git a/src/jit/compphases.h b/src/jit/compphases.h
index ac1bb63..5038d6e 100644
--- a/src/jit/compphases.h
+++ b/src/jit/compphases.h
@@ -11,9 +11,10 @@
// corresponding array of string names of those phases. This include file undefines CompPhaseNameMacro
// after the last use.
// The arguments are:
-// CompPhaseNameMacro(enumName, stringName, hasChildren, parent)
+// CompPhaseNameMacro(enumName, stringName, shortName, hasChildren, parent)
// "enumName" is an Enumeration-style all-caps name.
// "stringName" is a self-explanatory.
+// "shortName" is an abbreviated form for stringName
// "hasChildren" is true if this phase is broken out into subphases.
// (We should never do EndPhase on a phase that has children, only on 'leaf phases.')
// "parent" is -1 for leaf phases, otherwise it is the "enumName" of the parent phase.
@@ -25,6 +26,9 @@ CompPhaseNameMacro(PHASE_POST_IMPORT, "Post-import",
CompPhaseNameMacro(PHASE_MORPH_INIT, "Morph - Init", "MOR-INIT" ,false, -1)
CompPhaseNameMacro(PHASE_MORPH_INLINE, "Morph - Inlining", "MOR-INL", false, -1)
CompPhaseNameMacro(PHASE_MORPH_IMPBYREF, "Morph - ByRefs", "MOR-BYREF",false, -1)
+CompPhaseNameMacro(PHASE_EMPTY_TRY, "Remove empty try", "EMPTYTRY", false, -1)
+CompPhaseNameMacro(PHASE_EMPTY_FINALLY, "Remove empty finally", "EMPTYFIN", false, -1)
+CompPhaseNameMacro(PHASE_CLONE_FINALLY, "Clone finally", "CLONEFIN", false, -1)
CompPhaseNameMacro(PHASE_STR_ADRLCL, "Morph - Structs/AddrExp", "MOR-STRAL",false, -1)
CompPhaseNameMacro(PHASE_MORPH_GLOBAL, "Morph - Global", "MOR-GLOB", false, -1)
CompPhaseNameMacro(PHASE_MORPH_END, "Morph - Finish", "MOR-END", false, -1)
diff --git a/src/jit/decomposelongs.cpp b/src/jit/decomposelongs.cpp
index 98b8b08..407ae1c 100644
--- a/src/jit/decomposelongs.cpp
+++ b/src/jit/decomposelongs.cpp
@@ -249,6 +249,12 @@ GenTree* DecomposeLongs::DecomposeNode(GenTree* tree)
nextNode = DecomposeRotate(use);
break;
+#ifdef FEATURE_SIMD
+ case GT_SIMD:
+ nextNode = DecomposeSimd(use);
+ break;
+#endif // FEATURE_SIMD
+
case GT_LOCKADD:
case GT_XADD:
case GT_XCHG:
@@ -411,6 +417,8 @@ GenTree* DecomposeLongs::DecomposeLclFld(LIR::Use& use)
GenTree* hiResult = m_compiler->gtNewLclFldNode(loResult->gtLclNum, TYP_INT, loResult->gtLclOffs + 4);
Range().InsertAfter(loResult, hiResult);
+ m_compiler->lvaIncRefCnts(hiResult);
+
return FinalizeDecomposition(use, loResult, hiResult, hiResult);
}
@@ -1560,6 +1568,163 @@ GenTree* DecomposeLongs::DecomposeUMod(LIR::Use& use)
return FinalizeDecomposition(use, loResult, hiResult, hiResult);
}
+#ifdef FEATURE_SIMD
+
+//------------------------------------------------------------------------
+// DecomposeSimd: Decompose GT_SIMD.
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeSimd(LIR::Use& use)
+{
+ GenTree* tree = use.Def();
+ genTreeOps oper = tree->OperGet();
+
+ assert(oper == GT_SIMD);
+
+ GenTreeSIMD* simdTree = tree->AsSIMD();
+
+ switch (simdTree->gtSIMDIntrinsicID)
+ {
+ case SIMDIntrinsicGetItem:
+ return DecomposeSimdGetItem(use);
+
+ default:
+ noway_assert(!"unexpected GT_SIMD node in long decomposition");
+ break;
+ }
+
+ return nullptr;
+}
+
+//------------------------------------------------------------------------
+// DecomposeSimdGetItem: Decompose GT_SIMD -- SIMDIntrinsicGetItem.
+//
+// Decompose a get[i] node on Vector<long>. For:
+//
+// GT_SIMD{get_item}[long](simd_var, index)
+//
+// create:
+//
+// tmp_simd_var = simd_var
+// tmp_index = index
+// loResult = GT_SIMD{get_item}[int](tmp_simd_var, tmp_index * 2)
+// hiResult = GT_SIMD{get_item}[int](tmp_simd_var, tmp_index * 2 + 1)
+// return: GT_LONG(loResult, hiResult)
+//
+// This isn't optimal codegen, since SIMDIntrinsicGetItem sometimes requires
+// temps that could be shared, for example.
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeSimdGetItem(LIR::Use& use)
+{
+ GenTree* tree = use.Def();
+ genTreeOps oper = tree->OperGet();
+
+ assert(oper == GT_SIMD);
+
+ GenTreeSIMD* simdTree = tree->AsSIMD();
+ var_types baseType = simdTree->gtSIMDBaseType;
+ unsigned simdSize = simdTree->gtSIMDSize;
+
+ assert(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicGetItem);
+ assert(varTypeIsLong(baseType));
+ assert(varTypeIsLong(simdTree));
+ assert(varTypeIsSIMD(simdTree->gtOp.gtOp1->gtType));
+ assert(simdTree->gtOp.gtOp2->gtType == TYP_INT);
+
+ bool indexIsConst = simdTree->gtOp.gtOp2->IsCnsIntOrI();
+ ssize_t index = 0;
+ if (indexIsConst)
+ {
+ index = simdTree->gtOp.gtOp2->gtIntCon.gtIconVal;
+ }
+
+ LIR::Use op1(Range(), &simdTree->gtOp.gtOp1, simdTree);
+ unsigned simdTmpVarNum = op1.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ JITDUMP("[DecomposeSimdGetItem]: Saving op1 tree to a temp var:\n");
+ DISPTREERANGE(Range(), op1.Def());
+
+ unsigned indexTmpVarNum = 0;
+ if (!indexIsConst)
+ {
+ LIR::Use op2(Range(), &simdTree->gtOp.gtOp2, simdTree);
+ indexTmpVarNum = op2.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ JITDUMP("[DecomposeSimdGetItem]: Saving op2 tree to a temp var:\n");
+ DISPTREERANGE(Range(), op2.Def());
+ }
+
+ // Create:
+ // loResult = GT_SIMD{get_item}[int](tmp_simd_var, index * 2)
+
+ GenTree* simdTmpVar1 = m_compiler->gtNewLclLNode(simdTmpVarNum, simdTree->gtOp.gtOp1->gtType);
+ GenTree* indexTimesTwo1;
+
+ if (indexIsConst)
+ {
+ // Reuse the existing index constant node.
+ indexTimesTwo1 = simdTree->gtOp.gtOp2;
+ Range().Remove(indexTimesTwo1);
+ indexTimesTwo1->gtIntCon.gtIconVal = index * 2;
+
+ Range().InsertBefore(simdTree, simdTmpVar1, indexTimesTwo1);
+ }
+ else
+ {
+ GenTree* indexTmpVar1 = m_compiler->gtNewLclLNode(indexTmpVarNum, TYP_INT);
+ GenTree* two1 = m_compiler->gtNewIconNode(2, TYP_INT);
+ indexTimesTwo1 = m_compiler->gtNewOperNode(GT_MUL, TYP_INT, indexTmpVar1, two1);
+ Range().InsertBefore(simdTree, simdTmpVar1, indexTmpVar1, two1, indexTimesTwo1);
+ }
+
+ GenTree* loResult =
+ m_compiler->gtNewSIMDNode(TYP_INT, simdTmpVar1, indexTimesTwo1, SIMDIntrinsicGetItem, TYP_INT, simdSize);
+ Range().InsertBefore(simdTree, loResult);
+
+ // Create:
+ // hiResult = GT_SIMD{get_item}[int](tmp_simd_var, index * 2 + 1)
+
+ GenTree* simdTmpVar2 = m_compiler->gtNewLclLNode(simdTmpVarNum, simdTree->gtOp.gtOp1->gtType);
+ GenTree* indexTimesTwoPlusOne;
+
+ if (indexIsConst)
+ {
+ indexTimesTwoPlusOne = m_compiler->gtNewIconNode(index * 2 + 1, TYP_INT);
+ Range().InsertBefore(simdTree, simdTmpVar2, indexTimesTwoPlusOne);
+ }
+ else
+ {
+ GenTree* indexTmpVar2 = m_compiler->gtNewLclLNode(indexTmpVarNum, TYP_INT);
+ GenTree* two2 = m_compiler->gtNewIconNode(2, TYP_INT);
+ GenTree* indexTimesTwo2 = m_compiler->gtNewOperNode(GT_MUL, TYP_INT, indexTmpVar2, two2);
+ GenTree* one = m_compiler->gtNewIconNode(1, TYP_INT);
+ indexTimesTwoPlusOne = m_compiler->gtNewOperNode(GT_ADD, TYP_INT, indexTimesTwo2, one);
+ Range().InsertBefore(simdTree, simdTmpVar2, indexTmpVar2, two2, indexTimesTwo2);
+ Range().InsertBefore(simdTree, one, indexTimesTwoPlusOne);
+ }
+
+ GenTree* hiResult =
+ m_compiler->gtNewSIMDNode(TYP_INT, simdTmpVar2, indexTimesTwoPlusOne, SIMDIntrinsicGetItem, TYP_INT, simdSize);
+ Range().InsertBefore(simdTree, hiResult);
+
+ // Done with the original tree; remove it.
+
+ Range().Remove(simdTree);
+
+ return FinalizeDecomposition(use, loResult, hiResult, hiResult);
+}
+
+#endif // FEATURE_SIMD
+
//------------------------------------------------------------------------
// StoreNodeToVar: Check if the user is a STORE_LCL_VAR, and if it isn't,
// store the node to a var. Then decompose the new LclVar.
diff --git a/src/jit/decomposelongs.h b/src/jit/decomposelongs.h
index 8965a0b..ff4f4ac 100644
--- a/src/jit/decomposelongs.h
+++ b/src/jit/decomposelongs.h
@@ -55,6 +55,8 @@ private:
GenTree* DecomposeRotate(LIR::Use& use);
GenTree* DecomposeMul(LIR::Use& use);
GenTree* DecomposeUMod(LIR::Use& use);
+ GenTree* DecomposeSimd(LIR::Use& use);
+ GenTree* DecomposeSimdGetItem(LIR::Use& use);
// Helper functions
GenTree* FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult, GenTree* insertResultAfter);
diff --git a/src/jit/ee_il_dll.cpp b/src/jit/ee_il_dll.cpp
index dcadaa9..d5705ab 100644
--- a/src/jit/ee_il_dll.cpp
+++ b/src/jit/ee_il_dll.cpp
@@ -409,13 +409,16 @@ unsigned CILJit::getMaxIntrinsicSIMDVectorLength(DWORD cpuCompileFlags)
{
if (JitConfig.EnableAVX() != 0)
{
+ JITDUMP("getMaxIntrinsicSIMDVectorLength: returning 32\n");
return 32;
}
}
#endif // FEATURE_AVX_SUPPORT
+ JITDUMP("getMaxIntrinsicSIMDVectorLength: returning 16\n");
return 16;
#endif // _TARGET_XARCH_
#else // !FEATURE_SIMD
+ JITDUMP("getMaxIntrinsicSIMDVectorLength: returning 0\n");
return 0;
#endif // !FEATURE_SIMD
}
diff --git a/src/jit/emit.cpp b/src/jit/emit.cpp
index 0929b73..1e566b2 100644
--- a/src/jit/emit.cpp
+++ b/src/jit/emit.cpp
@@ -1643,12 +1643,10 @@ void emitter::emitCreatePlaceholderIG(insGroupPlaceholderType igType,
{
igPh->igFlags |= IGF_FUNCLET_PROLOG;
}
-#ifdef DEBUG
else if (igType == IGPT_FUNCLET_EPILOG)
{
igPh->igFlags |= IGF_FUNCLET_EPILOG;
}
-#endif // DEBUG
#endif // FEATURE_EH_FUNCLETS
/* Link it into the placeholder list */
diff --git a/src/jit/emit.h b/src/jit/emit.h
index 5b1a395..f57cc0a 100644
--- a/src/jit/emit.h
+++ b/src/jit/emit.h
@@ -270,16 +270,14 @@ struct insGroup
#define IGF_FINALLY_TARGET 0x0004 // this group is the start of a basic block that is returned to after a finally.
#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
#define IGF_FUNCLET_PROLOG 0x0008 // this group belongs to a funclet prolog
-#ifdef DEBUG
-#define IGF_FUNCLET_EPILOG 0x0010 // this group belongs to a funclet epilog. Currently, this is only needed for DEBUG.
-#endif
-#define IGF_EPILOG 0x0020 // this group belongs to a main function epilog
-#define IGF_NOGCINTERRUPT 0x0040 // this IG is is a no-interrupt region (prolog, epilog, etc.)
-#define IGF_UPD_ISZ 0x0080 // some instruction sizes updated
-#define IGF_PLACEHOLDER 0x0100 // this is a placeholder group, to be filled in later
-#define IGF_EMIT_ADD 0x0200 // this is a block added by the emitter
- // because the codegen block was too big. Also used for
- // placeholder IGs that aren't also labels.
+#define IGF_FUNCLET_EPILOG 0x0010 // this group belongs to a funclet epilog.
+#define IGF_EPILOG 0x0020 // this group belongs to a main function epilog
+#define IGF_NOGCINTERRUPT 0x0040 // this IG is is a no-interrupt region (prolog, epilog, etc.)
+#define IGF_UPD_ISZ 0x0080 // some instruction sizes updated
+#define IGF_PLACEHOLDER 0x0100 // this is a placeholder group, to be filled in later
+#define IGF_EMIT_ADD 0x0200 // this is a block added by the emitter
+ // because the codegen block was too big. Also used for
+ // placeholder IGs that aren't also labels.
// Mask of IGF_* flags that should be propagated to new blocks when they are created.
// This allows prologs and epilogs to be any number of IGs, but still be
@@ -491,12 +489,11 @@ protected:
return (ig != nullptr) && ((ig->igFlags & IGF_FUNCLET_PROLOG) != 0);
}
-#ifdef DEBUG
bool emitIGisInFuncletEpilog(const insGroup* ig)
{
return (ig != nullptr) && ((ig->igFlags & IGF_FUNCLET_EPILOG) != 0);
}
-#endif // DEBUG
+
#endif // FEATURE_EH_FUNCLETS
// If "ig" corresponds to the start of a basic block that is the
diff --git a/src/jit/emitarm.cpp b/src/jit/emitarm.cpp
index 45928ca..1b3ef1b 100644
--- a/src/jit/emitarm.cpp
+++ b/src/jit/emitarm.cpp
@@ -7536,31 +7536,53 @@ void emitter::emitInsMov(instruction ins, emitAttr attr, GenTree* node)
switch (node->OperGet())
{
case GT_IND:
- {
- GenTree* addr = node->gtGetOp1();
- assert(!addr->isContained());
- codeGen->genConsumeReg(addr);
- emitIns_R_R(ins, attr, node->gtRegNum, addr->gtRegNum);
- }
- break;
-
case GT_STOREIND:
{
- GenTree* addr = node->gtGetOp1();
- GenTree* data = node->gtOp.gtOp2;
+ GenTreeIndir* indir = node->AsIndir();
+ GenTree* addr = indir->Addr();
+ GenTree* data = indir->gtOp.gtOp2;
- assert(!addr->isContained());
- assert(!data->isContained());
- codeGen->genConsumeReg(addr);
- codeGen->genConsumeReg(data);
+ regNumber reg = (node->OperGet() == GT_IND) ? node->gtRegNum : data->gtRegNum;
- if (addr->OperGet() == GT_CLS_VAR_ADDR)
+ if (addr->isContained())
{
- emitIns_C_R(ins, attr, addr->gtClsVar.gtClsVarHnd, data->gtRegNum, 0);
+ assert(addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_LEA);
+
+ int offset = 0;
+ DWORD lsl = 0;
+
+ if (addr->OperGet() == GT_LEA)
+ {
+ offset = (int)addr->AsAddrMode()->gtOffset;
+ if (addr->AsAddrMode()->gtScale > 0)
+ {
+ assert(isPow2(addr->AsAddrMode()->gtScale));
+ BitScanForward(&lsl, addr->AsAddrMode()->gtScale);
+ }
+ }
+
+ GenTree* memBase = indir->Base();
+
+ if (indir->HasIndex())
+ {
+ NYI_ARM("emitInsMov HasIndex");
+ }
+ else
+ {
+ // TODO check offset is valid for encoding
+ emitIns_R_R_I(ins, attr, reg, memBase->gtRegNum, offset);
+ }
}
else
{
- emitIns_R_R(ins, attr, addr->gtRegNum, data->gtRegNum);
+ if (addr->OperGet() == GT_CLS_VAR_ADDR)
+ {
+ emitIns_C_R(ins, attr, addr->gtClsVar.gtClsVarHnd, data->gtRegNum, 0);
+ }
+ else
+ {
+ emitIns_R_R(ins, attr, reg, addr->gtRegNum);
+ }
}
}
break;
@@ -7581,7 +7603,6 @@ void emitter::emitInsMov(instruction ins, emitAttr attr, GenTree* node)
else
{
assert(!data->isContained());
- codeGen->genConsumeReg(data);
emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0);
codeGen->genUpdateLife(varNode);
}
diff --git a/src/jit/emitarm64.cpp b/src/jit/emitarm64.cpp
index 12c4087..dd4bac8 100644
--- a/src/jit/emitarm64.cpp
+++ b/src/jit/emitarm64.cpp
@@ -10892,7 +10892,6 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR
}
else // addr is not contained, so we evaluate it into a register
{
- codeGen->genConsumeReg(addr);
// Then load/store dataReg from/to [addrReg]
emitIns_R_R(ins, ldstAttr, dataReg, addr->gtRegNum);
}
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
index b6bacfa..be5cefb 100644
--- a/src/jit/emitxarch.cpp
+++ b/src/jit/emitxarch.cpp
@@ -57,10 +57,6 @@ bool emitter::IsAVXInstruction(instruction ins)
#endif
}
-#ifdef _TARGET_AMD64_
-#define REX_PREFIX_MASK 0xFF00000000LL
-#endif // _TARGET_AMD64_
-
#ifdef FEATURE_AVX_SUPPORT
// Returns true if the AVX instruction is a binary operator that requires 3 operands.
// When we emit an instruction with only two operands, we will duplicate the destination
@@ -717,12 +713,10 @@ unsigned emitter::emitGetPrefixSize(code_t code)
return 3;
}
-#ifdef _TARGET_AMD64_
- if (code & REX_PREFIX_MASK)
+ if (hasRexPrefix(code))
{
return 1;
}
-#endif // _TARGET_AMD64_
return 0;
}
@@ -898,7 +892,8 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
// The following SSE2 instructions write to a general purpose integer register.
if (!IsSSEOrAVXInstruction(ins) || ins == INS_mov_xmm2i || ins == INS_cvttsd2si
#ifndef LEGACY_BACKEND
- || ins == INS_cvttss2si || ins == INS_cvtsd2si || ins == INS_cvtss2si
+ || ins == INS_cvttss2si || ins == INS_cvtsd2si || ins == INS_cvtss2si || ins == INS_pmovmskb ||
+ ins == INS_pextrw
#endif // !LEGACY_BACKEND
)
{
@@ -1881,10 +1876,9 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code)
}
}
-#ifdef _TARGET_AMD64_
size += emitGetVexPrefixAdjustedSize(ins, attrSize, code);
- if (code & REX_PREFIX_MASK)
+ if (hasRexPrefix(code))
{
// REX prefix
size += emitGetRexPrefixSize(ins);
@@ -1899,7 +1893,6 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code)
// Should have a REX byte
size += emitGetRexPrefixSize(ins);
}
-#endif // _TARGET_AMD64_
if (rgx == REG_NA)
{
@@ -2302,9 +2295,7 @@ void emitter::emitIns(instruction ins)
}
#endif // DEBUG
-#ifdef _TARGET_AMD64_
- assert((code & REX_PREFIX_MASK) == 0); // Can't have a REX bit with no operands, right?
-#endif // _TARGET_AMD64_
+ assert(!hasRexPrefix(code)); // Can't have a REX bit with no operands, right?
if (code & 0xFF000000)
{
@@ -2786,20 +2777,19 @@ CORINFO_FIELD_HANDLE emitter::emitFltOrDblConst(GenTreeDblCon* tree, emitAttr at
regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src)
{
// dst can only be a reg or modrm
- assert(!dst->isContained() || dst->isContainedMemoryOp() ||
- instrIs3opImul(ins)); // dst on these isn't really the dst
+ assert(!dst->isContained() || dst->isUsedFromMemory() || instrIs3opImul(ins)); // dst on these isn't really the dst
#ifdef DEBUG
// src can be anything but both src and dst cannot be addr modes
// or at least cannot be contained addr modes
- if (dst->isContainedMemoryOp())
+ if (dst->isUsedFromMemory())
{
- assert(!src->isContainedMemoryOp());
+ assert(!src->isUsedFromMemory());
}
- if (src->isContainedMemoryOp())
+ if (src->isUsedFromMemory())
{
- assert(!dst->isContainedMemoryOp());
+ assert(!dst->isUsedFromMemory());
}
#endif
@@ -2837,7 +2827,7 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
// find local field if any
GenTreeLclFld* lclField = nullptr;
- if (src->isContainedLclField())
+ if (src->isLclFldUsedFromMemory())
{
lclField = src->AsLclFld();
}
@@ -2848,12 +2838,12 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
// find contained lcl var if any
GenTreeLclVar* lclVar = nullptr;
- if (src->isContainedLclVar())
+ if (src->isLclVarUsedFromMemory())
{
assert(src->IsRegOptional());
lclVar = src->AsLclVar();
}
- else if (dst->isContainedLclVar())
+ if (dst->isLclVarUsedFromMemory())
{
assert(dst->IsRegOptional());
lclVar = dst->AsLclVar();
@@ -2861,12 +2851,12 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
// find contained spill tmp if any
TempDsc* tmpDsc = nullptr;
- if (src->isContainedSpillTemp())
+ if (src->isUsedFromSpillTemp())
{
assert(src->IsRegOptional());
tmpDsc = codeGen->getSpillTempDsc(src);
}
- else if (dst->isContainedSpillTemp())
+ else if (dst->isUsedFromSpillTemp())
{
assert(dst->IsRegOptional());
tmpDsc = codeGen->getSpillTempDsc(dst);
@@ -2952,7 +2942,7 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
if (varNum != BAD_VAR_NUM || tmpDsc != nullptr)
{
// Is the memory op in the source position?
- if (src->isContainedMemoryOp())
+ if (src->isUsedFromMemory())
{
if (instrHasImplicitRegPairDest(ins))
{
@@ -3997,16 +3987,14 @@ void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE f
code_t code = insCodeMI(ins);
UNATIVE_OFFSET sz = emitInsSizeCV(id, code, val);
-#ifdef _TARGET_AMD64_
// Vex prefix
sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins));
// REX prefix, if not already included in "code"
- if (TakesRexWPrefix(ins, attr) && (code & REX_PREFIX_MASK) == 0)
+ if (TakesRexWPrefix(ins, attr) && !hasRexPrefix(code))
{
sz += emitGetRexPrefixSize(ins);
}
-#endif // _TARGET_AMD64_
id->idAddr()->iiaFieldHnd = fldHnd;
id->idCodeSize(sz);
@@ -8055,10 +8043,7 @@ DONE:
}
else
{
- if (emitInsCanOnlyWriteSSE2OrAVXReg(id))
- {
- }
- else
+ if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
{
switch (id->idInsFmt())
{
@@ -8450,10 +8435,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
}
else
{
- if (emitInsCanOnlyWriteSSE2OrAVXReg(id))
- {
- }
- else
+ if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
{
switch (id->idInsFmt())
{
@@ -8883,10 +8865,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
}
else
{
- if (emitInsCanOnlyWriteSSE2OrAVXReg(id))
- {
- }
- else
+ if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
{
switch (id->idInsFmt())
{
@@ -9428,10 +9407,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
}
else
{
- if (emitInsCanOnlyWriteSSE2OrAVXReg(id))
- {
- }
- else
+ if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
{
switch (id->idInsFmt())
{
@@ -10832,6 +10808,12 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
dst += emitOutputByte(dst, emitGetInsSC(id));
sz = emitSizeOfInsDsc(id);
+
+ // Kill any GC ref in the destination register if necessary.
+ if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
+ {
+ emitGCregDeadUpd(id->idReg1(), dst);
+ }
break;
/********************************************************************/
@@ -11202,9 +11184,14 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
assert(sz == emitSizeOfInsDsc(id));
#if !FEATURE_FIXED_OUT_ARGS
+ bool updateStackLevel = !emitIGisInProlog(ig) && !emitIGisInEpilog(ig);
+
+#if FEATURE_EH_FUNCLETS
+ updateStackLevel = updateStackLevel && !emitIGisInFuncletProlog(ig) && !emitIGisInFuncletEpilog(ig);
+#endif // FEATURE_EH_FUNCLETS
// Make sure we keep the current stack level up to date
- if (!emitIGisInProlog(ig) && !emitIGisInEpilog(ig))
+ if (updateStackLevel)
{
switch (ins)
{
diff --git a/src/jit/emitxarch.h b/src/jit/emitxarch.h
index 98256cd..9c435e5 100644
--- a/src/jit/emitxarch.h
+++ b/src/jit/emitxarch.h
@@ -109,6 +109,16 @@ void SetUseSSE3_4(bool value)
}
bool Is4ByteSSE4Instruction(instruction ins);
+bool hasRexPrefix(code_t code)
+{
+#ifdef _TARGET_AMD64_
+ const code_t REX_PREFIX_MASK = 0xFF00000000LL;
+ return (code & REX_PREFIX_MASK) != 0;
+#else // !_TARGET_AMD64_
+ return false;
+#endif // !_TARGET_AMD64_
+}
+
#ifdef FEATURE_AVX_SUPPORT
// 3-byte VEX prefix starts with byte 0xC4
@@ -150,6 +160,26 @@ void SetUseAVX(bool value)
useAVXEncodings = value;
}
+bool containsAVXInstruction = false;
+bool ContainsAVX()
+{
+ return containsAVXInstruction;
+}
+void SetContainsAVX(bool value)
+{
+ containsAVXInstruction = value;
+}
+
+bool contains256bitAVXInstruction = false;
+bool Contains256bitAVX()
+{
+ return contains256bitAVXInstruction;
+}
+void SetContains256bitAVX(bool value)
+{
+ contains256bitAVXInstruction = value;
+}
+
bool IsThreeOperandBinaryAVXInstruction(instruction ins);
bool IsThreeOperandMoveAVXInstruction(instruction ins);
bool IsThreeOperandAVXInstruction(instruction ins)
@@ -158,7 +188,15 @@ bool IsThreeOperandAVXInstruction(instruction ins)
}
bool Is4ByteAVXInstruction(instruction ins);
#else // !FEATURE_AVX_SUPPORT
-bool UseAVX()
+bool UseAVX()
+{
+ return false;
+}
+bool ContainsAVX()
+{
+ return false;
+}
+bool Contains256bitAVX()
{
return false;
}
diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp
index 441569c..50318b0 100644
--- a/src/jit/flowgraph.cpp
+++ b/src/jit/flowgraph.cpp
@@ -8550,8 +8550,12 @@ void Compiler::fgAddInternal()
GenTreeStmt* Compiler::fgNewStmtFromTree(GenTreePtr tree, BasicBlock* block, IL_OFFSETX offs)
{
GenTreeStmt* stmt = gtNewStmt(tree, offs);
- gtSetStmtInfo(stmt);
- fgSetStmtSeq(stmt);
+
+ if (fgStmtListThreaded)
+ {
+ gtSetStmtInfo(stmt);
+ fgSetStmtSeq(stmt);
+ }
#if DEBUG
if (block != nullptr)
@@ -11654,6 +11658,7 @@ DONE:
void Compiler::fgClearFinallyTargetBit(BasicBlock* block)
{
+ assert(fgComputePredsDone);
assert((block->bbFlags & BBF_FINALLY_TARGET) != 0);
for (flowList* pred = block->bbPreds; pred; pred = pred->flNext)
@@ -12946,6 +12951,12 @@ bool Compiler::fgOptimizeBranchToEmptyUnconditional(BasicBlock* block, BasicBloc
optimizeJump = false;
}
+ // Don't optimize a jump to a cloned finally
+ if (bDest->bbFlags & BBF_CLONED_FINALLY_BEGIN)
+ {
+ optimizeJump = false;
+ }
+
#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
// Don't optimize a jump to a finally target. For BB1->BB2->BB3, where
// BB2 is a finally target, if we changed BB1 to jump directly to BB3,
@@ -13747,7 +13758,7 @@ bool Compiler::fgOptimizeBranchToNext(BasicBlock* block, BasicBlock* bNext, Basi
{
assert(block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_ALWAYS);
assert(block->bbJumpDest == bNext);
- assert(block->bbNext = bNext);
+ assert(block->bbNext == bNext);
assert(block->bbPrev == bPrev);
if (block->bbJumpKind == BBJ_ALWAYS)
@@ -17782,7 +17793,7 @@ void Compiler::fgSetTreeSeqHelper(GenTreePtr tree, bool isLIR)
if (kind & GTK_SMPOP)
{
GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr op2 = tree->gtGetOp2IfPresent();
// Special handling for GT_LIST
if (tree->OperGet() == GT_LIST)
@@ -18004,8 +18015,8 @@ void Compiler::fgSetTreeSeqHelper(GenTreePtr tree, bool isLIR)
case GT_SIMD_CHK:
#endif // FEATURE_SIMD
// Evaluate the trees left to right
- fgSetTreeSeqHelper(tree->gtBoundsChk.gtArrLen, isLIR);
fgSetTreeSeqHelper(tree->gtBoundsChk.gtIndex, isLIR);
+ fgSetTreeSeqHelper(tree->gtBoundsChk.gtArrLen, isLIR);
break;
case GT_STORE_DYN_BLK:
@@ -20318,7 +20329,7 @@ void Compiler::fgDebugCheckFlags(GenTreePtr tree)
else if (kind & GTK_SMPOP)
{
GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr op2 = tree->gtGetOp2IfPresent();
// During GS work, we make shadow copies for params.
// In gsParamsToShadows(), we create a shadow var of TYP_INT for every small type param.
@@ -21970,6 +21981,13 @@ _Done:
compNeedsGSSecurityCookie |= InlineeCompiler->compNeedsGSSecurityCookie;
compGSReorderStackLayout |= InlineeCompiler->compGSReorderStackLayout;
+#ifdef FEATURE_SIMD
+ if (InlineeCompiler->usesSIMDTypes())
+ {
+ setUsesSIMDTypes(true);
+ }
+#endif // FEATURE_SIMD
+
// Update unmanaged call count
info.compCallUnmanaged += InlineeCompiler->info.compCallUnmanaged;
@@ -22471,3 +22489,1770 @@ void Compiler::fgLclFldAssign(unsigned lclNum)
lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
}
}
+
+//------------------------------------------------------------------------
+// fgRemoveEmptyFinally: Remove try/finallys where the finally is empty
+//
+// Notes:
+// Removes all try/finallys in the method with empty finallys.
+// These typically arise from inlining empty Dispose methods.
+//
+// Converts callfinally to a jump to the finally continuation.
+// Removes the finally, and reparents all blocks in the try to the
+// enclosing try or method region.
+//
+// Currently limited to trivially empty finallys: those with one basic
+// block containing only single RETFILT statement. It is possible but
+// not likely that more complex-looking finallys will eventually become
+// empty (from say subsequent optimization). An SPMI run with
+// just the "detection" part of this phase run after optimization
+// found only one example where a new empty finally was detected.
+
+void Compiler::fgRemoveEmptyFinally()
+{
+ JITDUMP("\n*************** In fgRemoveEmptyFinally()\n");
+
+ if (compHndBBtabCount == 0)
+ {
+ JITDUMP("No EH in this method, nothing to remove.\n");
+ return;
+ }
+
+ if (opts.MinOpts())
+ {
+ JITDUMP("Method compiled with minOpts, no removal.\n");
+ return;
+ }
+
+ if (opts.compDbgCode)
+ {
+ JITDUMP("Method compiled with debug codegen, no removal.\n");
+ return;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** Before fgRemoveEmptyFinally()\n");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ printf("\n");
+ }
+#endif // DEBUG
+
+ // Look for finallys or faults that are empty.
+ unsigned finallyCount = 0;
+ unsigned emptyCount = 0;
+ unsigned XTnum = 0;
+ while (XTnum < compHndBBtabCount)
+ {
+ EHblkDsc* const HBtab = &compHndBBtab[XTnum];
+
+ // Check if this is a try/finally. We could also look for empty
+ // try/fault but presumably those are rare.
+ if (!HBtab->HasFinallyHandler())
+ {
+ JITDUMP("EH#%u is not a try-finally; skipping.\n", XTnum);
+ XTnum++;
+ continue;
+ }
+
+ finallyCount++;
+
+ // Look at blocks involved.
+ BasicBlock* const firstBlock = HBtab->ebdHndBeg;
+ BasicBlock* const lastBlock = HBtab->ebdHndLast;
+
+ // Limit for now to finallys that are single blocks.
+ if (firstBlock != lastBlock)
+ {
+ JITDUMP("EH#%u finally has multiple basic blocks; skipping.\n", XTnum);
+ XTnum++;
+ continue;
+ }
+
+ // Limit for now to finallys that contain only a GT_RETFILT.
+ bool isEmpty = true;
+
+ for (GenTreeStmt* stmt = firstBlock->firstStmt(); stmt != nullptr; stmt = stmt->gtNextStmt)
+ {
+ GenTreePtr stmtExpr = stmt->gtStmtExpr;
+
+ if (stmtExpr->gtOper != GT_RETFILT)
+ {
+ isEmpty = false;
+ break;
+ }
+ }
+
+ if (!isEmpty)
+ {
+ JITDUMP("EH#%u finally is not empty; skipping.\n", XTnum);
+ XTnum++;
+ continue;
+ }
+
+ JITDUMP("EH#%u has empty finally, removing the region.\n", XTnum);
+
+ // Find all the call finallys that invoke this finally,
+ // and modify them to jump to the return point.
+ BasicBlock* firstCallFinallyRangeBlock = nullptr;
+ BasicBlock* endCallFinallyRangeBlock = nullptr;
+ ehGetCallFinallyBlockRange(XTnum, &firstCallFinallyRangeBlock, &endCallFinallyRangeBlock);
+
+ BasicBlock* currentBlock = firstCallFinallyRangeBlock;
+
+ while (currentBlock != endCallFinallyRangeBlock)
+ {
+ BasicBlock* nextBlock = currentBlock->bbNext;
+
+ if ((currentBlock->bbJumpKind == BBJ_CALLFINALLY) && (currentBlock->bbJumpDest == firstBlock))
+ {
+ // Retarget the call finally to jump to the return
+ // point.
+ //
+ // We don't expect to see retless finallys here, since
+ // the finally is empty.
+ noway_assert(currentBlock->isBBCallAlwaysPair());
+
+ BasicBlock* const leaveBlock = currentBlock->bbNext;
+ BasicBlock* const postTryFinallyBlock = leaveBlock->bbJumpDest;
+
+ noway_assert(leaveBlock->bbJumpKind == BBJ_ALWAYS);
+
+ currentBlock->bbJumpDest = postTryFinallyBlock;
+ currentBlock->bbJumpKind = BBJ_ALWAYS;
+
+ // Ref count updates.
+ fgAddRefPred(postTryFinallyBlock, currentBlock);
+ // fgRemoveRefPred(firstBlock, currentBlock);
+
+ // Delete the leave block, which should be marked as
+ // keep always.
+ assert((leaveBlock->bbFlags & BBF_KEEP_BBJ_ALWAYS) != 0);
+ nextBlock = leaveBlock->bbNext;
+
+ leaveBlock->bbFlags &= ~BBF_KEEP_BBJ_ALWAYS;
+ fgRemoveBlock(leaveBlock, true);
+
+ // Cleanup the postTryFinallyBlock
+ fgCleanupContinuation(postTryFinallyBlock);
+
+ // Make sure iteration isn't going off the deep end.
+ assert(leaveBlock != endCallFinallyRangeBlock);
+ }
+
+ currentBlock = nextBlock;
+ }
+
+ // Handler block should now be unreferenced, since the only
+ // explicit references to it were in call finallys.
+ firstBlock->bbRefs = 0;
+
+ // Remove the handler block.
+ const bool unreachable = true;
+ firstBlock->bbFlags &= ~BBF_DONT_REMOVE;
+ fgRemoveBlock(firstBlock, unreachable);
+
+ // Find enclosing try region for the try, if any, and update
+ // the try region. Note the handler region (if any) won't
+ // change.
+ BasicBlock* const firstTryBlock = HBtab->ebdTryBeg;
+ BasicBlock* const lastTryBlock = HBtab->ebdTryLast;
+ assert(firstTryBlock->getTryIndex() == XTnum);
+
+ for (BasicBlock* block = firstTryBlock; block != nullptr; block = block->bbNext)
+ {
+ // Look for blocks directly contained in this try, and
+ // update the try region appropriately.
+ //
+ // Try region for blocks transitively contained (say in a
+ // child try) will get updated by the subsequent call to
+ // fgRemoveEHTableEntry.
+ if (block->getTryIndex() == XTnum)
+ {
+ if (firstBlock->hasTryIndex())
+ {
+ block->setTryIndex(firstBlock->getTryIndex());
+ }
+ else
+ {
+ block->clearTryIndex();
+ }
+ }
+
+ if (block == firstTryBlock)
+ {
+ assert((block->bbFlags & BBF_TRY_BEG) != 0);
+ block->bbFlags &= ~BBF_TRY_BEG;
+ }
+
+ if (block == lastTryBlock)
+ {
+ break;
+ }
+ }
+
+ // Remove the try-finally EH region. This will compact the EH table
+ // so XTnum now points at the next entry.
+ fgRemoveEHTableEntry(XTnum);
+
+ emptyCount++;
+ }
+
+ if (emptyCount > 0)
+ {
+ JITDUMP("fgRemoveEmptyFinally() removed %u try-finally clauses from %u finallys\n", emptyCount, finallyCount);
+ fgOptimizedFinally = true;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** After fgRemoveEmptyFinally()\n");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ printf("\n");
+ }
+
+ fgVerifyHandlerTab();
+ fgDebugCheckBBlist(false, false);
+
+#endif // DEBUG
+ }
+}
+
+//------------------------------------------------------------------------
+// fgRemoveEmptyTry: Optimize try/finallys where the try is empty
+//
+// Notes:
+// In runtimes where thread abort is not possible, `try {} finally {S}`
+// can be optimized to simply `S`. This method looks for such
+// cases and removes the try-finally from the EH table, making
+// suitable flow, block flag, statement, and region updates.
+//
+// This optimization is not legal in runtimes that support thread
+// abort because those runtimes ensure that a finally is completely
+// executed before continuing to process the thread abort. With
+// this optimization, the code block `S` can lose special
+// within-finally status and so complete execution is no longer
+// guaranteed.
+
+void Compiler::fgRemoveEmptyTry()
+{
+ JITDUMP("\n*************** In fgRemoveEmptyTry()\n");
+
+#ifdef FEATURE_CORECLR
+ bool enableRemoveEmptyTry = true;
+#else
+ // Code in a finally gets special treatment in the presence of
+ // thread abort.
+ bool enableRemoveEmptyTry = false;
+#endif // FEATURE_CORECLR
+
+#ifdef DEBUG
+ // Allow override to enable/disable.
+ enableRemoveEmptyTry = (JitConfig.JitEnableRemoveEmptyTry() == 1);
+#endif // DEBUG
+
+ if (!enableRemoveEmptyTry)
+ {
+ JITDUMP("Empty try removal disabled.\n");
+ return;
+ }
+
+ if (compHndBBtabCount == 0)
+ {
+ JITDUMP("No EH in this method, nothing to remove.\n");
+ return;
+ }
+
+ if (opts.MinOpts())
+ {
+ JITDUMP("Method compiled with minOpts, no removal.\n");
+ return;
+ }
+
+ if (opts.compDbgCode)
+ {
+ JITDUMP("Method compiled with debug codegen, no removal.\n");
+ return;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** Before fgRemoveEmptyTry()\n");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ printf("\n");
+ }
+#endif // DEBUG
+
+ // Look for try-finallys where the try is empty.
+ unsigned emptyCount = 0;
+ unsigned XTnum = 0;
+ while (XTnum < compHndBBtabCount)
+ {
+ EHblkDsc* const HBtab = &compHndBBtab[XTnum];
+
+ // Check if this is a try/finally. We could also look for empty
+ // try/fault but presumably those are rare.
+ if (!HBtab->HasFinallyHandler())
+ {
+ JITDUMP("EH#%u is not a try-finally; skipping.\n", XTnum);
+ XTnum++;
+ continue;
+ }
+
+ // Examine the try region
+ BasicBlock* const firstTryBlock = HBtab->ebdTryBeg;
+ BasicBlock* const lastTryBlock = HBtab->ebdTryLast;
+ BasicBlock* const firstHandlerBlock = HBtab->ebdHndBeg;
+ BasicBlock* const lastHandlerBlock = HBtab->ebdHndLast;
+ BasicBlock* const endHandlerBlock = lastHandlerBlock->bbNext;
+
+ assert(firstTryBlock->getTryIndex() == XTnum);
+
+ // Limit for now to trys that contain only a callfinally pair
+ // or branch to same.
+ if (!firstTryBlock->isEmpty())
+ {
+ JITDUMP("EH#%u first try block BB%02u not empty; skipping.\n", XTnum, firstTryBlock->bbNum);
+ XTnum++;
+ continue;
+ }
+
+#if FEATURE_EH_CALLFINALLY_THUNKS
+
+ // Look for blocks that are always jumps to a call finally
+ // pair that targets the finally
+ if (firstTryBlock->bbJumpKind != BBJ_ALWAYS)
+ {
+ JITDUMP("EH#%u first try block BB%02u not jump to a callfinally; skipping.\n", XTnum, firstTryBlock->bbNum);
+ XTnum++;
+ continue;
+ }
+
+ BasicBlock* const callFinally = firstTryBlock->bbJumpDest;
+
+ // Look for call always pair. Note this will also disqualify
+ // empty try removal in cases where the finally doesn't
+ // return.
+ if (!callFinally->isBBCallAlwaysPair() || (callFinally->bbJumpDest != firstHandlerBlock))
+ {
+ JITDUMP("EH#%u first try block BB%02u always jumps but not to a callfinally; skipping.\n", XTnum,
+ firstTryBlock->bbNum);
+ XTnum++;
+ continue;
+ }
+
+ // Try itself must be a single block.
+ if (firstTryBlock != lastTryBlock)
+ {
+ JITDUMP("EH#%u first try block BB%02u not only block in try; skipping.\n", XTnum,
+ firstTryBlock->bbNext->bbNum);
+ XTnum++;
+ continue;
+ }
+
+#else
+ // Look for call always pair within the try itself. Note this
+ // will also disqualify empty try removal in cases where the
+ // finally doesn't return.
+ if (!firstTryBlock->isBBCallAlwaysPair() || (firstTryBlock->bbJumpDest != firstHandlerBlock))
+ {
+ JITDUMP("EH#%u first try block BB%02u not a callfinally; skipping.\n", XTnum, firstTryBlock->bbNum);
+ XTnum++;
+ continue;
+ }
+
+ BasicBlock* const callFinally = firstTryBlock;
+
+ // Try must be a callalways pair of blocks.
+ if (firstTryBlock->bbNext != lastTryBlock)
+ {
+ JITDUMP("EH#%u block BB%02u not last block in try; skipping.\n", XTnum, firstTryBlock->bbNext->bbNum);
+ XTnum++;
+ continue;
+ }
+
+#endif // FEATURE_EH_CALLFINALLY_THUNKS
+
+ JITDUMP("EH#%u has empty try, removing the try region and promoting the finally.\n", XTnum);
+
+ // There should be just one callfinally that invokes this
+ // finally, the one we found above. Verify this.
+ BasicBlock* firstCallFinallyRangeBlock = nullptr;
+ BasicBlock* endCallFinallyRangeBlock = nullptr;
+ bool verifiedSingleCallfinally = true;
+ ehGetCallFinallyBlockRange(XTnum, &firstCallFinallyRangeBlock, &endCallFinallyRangeBlock);
+
+ for (BasicBlock* block = firstCallFinallyRangeBlock; block != endCallFinallyRangeBlock; block = block->bbNext)
+ {
+ if ((block->bbJumpKind == BBJ_CALLFINALLY) && (block->bbJumpDest == firstHandlerBlock))
+ {
+ assert(block->isBBCallAlwaysPair());
+
+ if (block != callFinally)
+ {
+ JITDUMP("EH#%u found unexpected callfinally BB%02u; skipping.\n");
+ verifiedSingleCallfinally = false;
+ break;
+ }
+
+ block = block->bbNext;
+ }
+ }
+
+ if (!verifiedSingleCallfinally)
+ {
+ JITDUMP("EH#%u -- unexpectedly -- has multiple callfinallys; skipping.\n");
+ XTnum++;
+ assert(verifiedSingleCallfinally);
+ continue;
+ }
+
+ // Time to optimize.
+ //
+ // (1) Convert the callfinally to a normal jump to the handler
+ callFinally->bbJumpKind = BBJ_ALWAYS;
+
+ // Identify the leave block and the continuation
+ BasicBlock* const leave = callFinally->bbNext;
+ BasicBlock* const continuation = leave->bbJumpDest;
+
+ // (2) Cleanup the leave so it can be deleted by subsequent opts
+ assert((leave->bbFlags & BBF_KEEP_BBJ_ALWAYS) != 0);
+ leave->bbFlags &= ~BBF_KEEP_BBJ_ALWAYS;
+
+ // (3) Cleanup the continuation
+ fgCleanupContinuation(continuation);
+
+ // (4) Find enclosing try region for the try, if any, and
+ // update the try region for the blocks in the try. Note the
+ // handler region (if any) won't change.
+ //
+ // Kind of overkill to loop here, but hey.
+ for (BasicBlock* block = firstTryBlock; block != nullptr; block = block->bbNext)
+ {
+ // Look for blocks directly contained in this try, and
+ // update the try region appropriately.
+ //
+ // The try region for blocks transitively contained (say in a
+ // child try) will get updated by the subsequent call to
+ // fgRemoveEHTableEntry.
+ if (block->getTryIndex() == XTnum)
+ {
+ if (firstHandlerBlock->hasTryIndex())
+ {
+ block->setTryIndex(firstHandlerBlock->getTryIndex());
+ }
+ else
+ {
+ block->clearTryIndex();
+ }
+ }
+
+ if (block == firstTryBlock)
+ {
+ assert((block->bbFlags & BBF_TRY_BEG) != 0);
+ block->bbFlags &= ~BBF_TRY_BEG;
+ }
+
+ if (block == lastTryBlock)
+ {
+ break;
+ }
+ }
+
+ // (5) Update the directly contained handler blocks' handler index.
+ // Handler index of any nested blocks will update when we
+ // remove the EH table entry. Change handler exits to jump to
+ // the continuation. Clear catch type on handler entry.
+ for (BasicBlock* block = firstHandlerBlock; block != endHandlerBlock; block = block->bbNext)
+ {
+ if (block == firstHandlerBlock)
+ {
+ block->bbCatchTyp = BBCT_NONE;
+ }
+
+ if (block->getHndIndex() == XTnum)
+ {
+ if (firstTryBlock->hasHndIndex())
+ {
+ block->setHndIndex(firstTryBlock->getHndIndex());
+ }
+ else
+ {
+ block->clearHndIndex();
+ }
+
+ if (block->bbJumpKind == BBJ_EHFINALLYRET)
+ {
+ GenTreeStmt* finallyRet = block->lastStmt();
+ GenTreePtr finallyRetExpr = finallyRet->gtStmtExpr;
+ assert(finallyRetExpr->gtOper == GT_RETFILT);
+ fgRemoveStmt(block, finallyRet);
+ block->bbJumpKind = BBJ_ALWAYS;
+ block->bbJumpDest = continuation;
+ }
+ }
+ }
+
+ // (6) Remove the try-finally EH region. This will compact the
+ // EH table so XTnum now points at the next entry and will update
+ // the EH region indices of any nested EH in the (former) handler.
+ fgRemoveEHTableEntry(XTnum);
+
+ // Another one bites the dust...
+ emptyCount++;
+ }
+
+ if (emptyCount > 0)
+ {
+ JITDUMP("fgRemoveEmptyTry() optimized %u empty-try try-finally clauses\n", emptyCount);
+ fgOptimizedFinally = true;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** After fgRemoveEmptyTry()\n");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ printf("\n");
+ }
+
+ fgVerifyHandlerTab();
+ fgDebugCheckBBlist(false, false);
+
+#endif // DEBUG
+ }
+}
+
+//------------------------------------------------------------------------
+// fgCloneFinally: Optimize normal exit path from a try/finally
+//
+// Notes:
+// Handles finallys that are not enclosed by or enclosing other
+// handler regions.
+//
+// Converts the "normal exit" callfinally to a jump to a cloned copy
+// of the finally, which in turn jumps to the finally continuation.
+//
+// If all callfinallys for a given finally are converted to jump to
+// the clone, the try-finally is modified into a try-fault,
+// distingushable from organic try-faults by handler type
+// EH_HANDLER_FAULT_WAS_FINALLY vs the organic EH_HANDLER_FAULT.
+//
+// Does not yet handle thread abort. The open issues here are how
+// to maintain the proper description of the cloned finally blocks
+// as a handler (for thread abort purposes), how to prevent code
+// motion in or out of these blocks, and how to report this cloned
+// handler to the runtime. Some building blocks for thread abort
+// exist (see below) but more work needed.
+//
+// The first and last blocks of the cloned finally are marked with
+// BBF_CLONED_FINALLY_BEGIN and BBF_CLONED_FINALLY_END. However
+// these markers currently can get lost during subsequent
+// optimizations.
+
+void Compiler::fgCloneFinally()
+{
+ JITDUMP("\n*************** In fgCloneFinally()\n");
+
+#ifdef FEATURE_CORECLR
+ bool enableCloning = true;
+#else
+ // Finally cloning currently doesn't provide sufficient protection
+ // for the cloned code in the presence of thread abort.
+ bool enableCloning = false;
+#endif // FEATURE_CORECLR
+
+#ifdef DEBUG
+ // Allow override to enable/disable.
+ enableCloning = (JitConfig.JitEnableFinallyCloning() == 1);
+#endif // DEBUG
+
+ if (!enableCloning)
+ {
+ JITDUMP("Finally cloning disabled.\n");
+ return;
+ }
+
+ if (compHndBBtabCount == 0)
+ {
+ JITDUMP("No EH in this method, no cloning.\n");
+ return;
+ }
+
+ if (opts.MinOpts())
+ {
+ JITDUMP("Method compiled with minOpts, no cloning.\n");
+ return;
+ }
+
+ if (opts.compDbgCode)
+ {
+ JITDUMP("Method compiled with debug codegen, no cloning.\n");
+ return;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** Before fgCloneFinally()\n");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ printf("\n");
+ }
+
+ // Verify try-finally exits look good before we start.
+ fgDebugCheckTryFinallyExits();
+
+#endif // DEBUG
+
+ // Look for finallys that are not contained within other handlers,
+ // and which do not themselves contain EH.
+ //
+ // Note these cases potentially could be handled, but are less
+ // obviously profitable and require modification of the handler
+ // table.
+ unsigned XTnum = 0;
+ EHblkDsc* HBtab = compHndBBtab;
+ unsigned cloneCount = 0;
+ for (; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ {
+ // Check if this is a try/finally
+ if (!HBtab->HasFinallyHandler())
+ {
+ JITDUMP("EH#%u is not a try-finally; skipping.\n", XTnum);
+ continue;
+ }
+
+ // Check if enclosed by another handler.
+ const unsigned enclosingHandlerRegion = ehGetEnclosingHndIndex(XTnum);
+
+ if (enclosingHandlerRegion != EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ JITDUMP("EH#%u is enclosed by handler EH#%u; skipping.\n", XTnum, enclosingHandlerRegion);
+ continue;
+ }
+
+ bool containsEH = false;
+ unsigned exampleEnclosedHandlerRegion = 0;
+
+ // Only need to look at lower numbered regions because the
+ // handler table is ordered by nesting.
+ for (unsigned i = 0; i < XTnum; i++)
+ {
+ if (ehGetEnclosingHndIndex(i) == XTnum)
+ {
+ exampleEnclosedHandlerRegion = i;
+ containsEH = true;
+ break;
+ }
+ }
+
+ if (containsEH)
+ {
+ JITDUMP("Finally for EH#%u encloses handler EH#%u; skipping.\n", XTnum, exampleEnclosedHandlerRegion);
+ continue;
+ }
+
+ // Look at blocks involved.
+ BasicBlock* const firstBlock = HBtab->ebdHndBeg;
+ BasicBlock* const lastBlock = HBtab->ebdHndLast;
+ assert(firstBlock != nullptr);
+ assert(lastBlock != nullptr);
+ BasicBlock* nextBlock = lastBlock->bbNext;
+ unsigned regionBBCount = 0;
+ unsigned regionStmtCount = 0;
+ bool hasFinallyRet = false;
+ bool isAllRare = true;
+ bool hasSwitch = false;
+
+ for (const BasicBlock* block = firstBlock; block != nextBlock; block = block->bbNext)
+ {
+ if (block->bbJumpKind == BBJ_SWITCH)
+ {
+ hasSwitch = true;
+ break;
+ }
+
+ regionBBCount++;
+
+ // Should we compute statement cost here, or is it
+ // premature...? For now just count statements I guess.
+ for (GenTreeStmt* stmt = block->firstStmt(); stmt != nullptr; stmt = stmt->gtNextStmt)
+ {
+ regionStmtCount++;
+ }
+
+ hasFinallyRet = hasFinallyRet || (block->bbJumpKind == BBJ_EHFINALLYRET);
+ isAllRare = isAllRare && block->isRunRarely();
+ }
+
+ // Skip cloning if the finally has a switch.
+ if (hasSwitch)
+ {
+ JITDUMP("Finally in EH#%u has a switch; skipping.\n", XTnum);
+ continue;
+ }
+
+ // Skip cloning if the finally must throw.
+ if (!hasFinallyRet)
+ {
+ JITDUMP("Finally in EH#%u does not return; skipping.\n", XTnum);
+ continue;
+ }
+
+ // Skip cloning if the finally is rarely run code.
+ if (isAllRare)
+ {
+ JITDUMP("Finally in EH#%u is run rarely; skipping.\n", XTnum);
+ continue;
+ }
+
+ // Empirical studies from CoreCLR and CoreFX show that less
+ // that 1% of finally regions have more than 15
+ // statements. So, to avoid potentially excessive code growth,
+ // only clone finallys that have 15 or fewer statements.
+ const unsigned stmtCountLimit = 15;
+ if (regionStmtCount > stmtCountLimit)
+ {
+ JITDUMP("Finally in EH#%u has %u statements, limit is %u; skipping.\n", XTnum, regionStmtCount,
+ stmtCountLimit);
+ continue;
+ }
+
+ JITDUMP("EH#%u is a candidate for finally cloning:"
+ " %u blocks, %u statements\n",
+ XTnum, regionBBCount, regionStmtCount);
+
+ // Walk the try region backwards looking for the last block
+ // that transfers control to a callfinally.
+ BasicBlock* const firstTryBlock = HBtab->ebdTryBeg;
+ BasicBlock* const lastTryBlock = HBtab->ebdTryLast;
+ assert(firstTryBlock->getTryIndex() == XTnum);
+ assert(lastTryBlock->getTryIndex() == XTnum);
+ BasicBlock* const beforeTryBlock = firstTryBlock->bbPrev;
+
+ BasicBlock* normalCallFinallyBlock = nullptr;
+ BasicBlock* normalCallFinallyReturn = nullptr;
+ BasicBlock* cloneInsertAfter = HBtab->ebdTryLast;
+ bool tryToRelocateCallFinally = false;
+
+ for (BasicBlock* block = lastTryBlock; block != beforeTryBlock; block = block->bbPrev)
+ {
+#if FEATURE_EH_CALLFINALLY_THUNKS
+ // Look for blocks that are always jumps to a call finally
+ // pair that targets our finally.
+ if (block->bbJumpKind != BBJ_ALWAYS)
+ {
+ continue;
+ }
+
+ BasicBlock* const jumpDest = block->bbJumpDest;
+
+ if (!jumpDest->isBBCallAlwaysPair() || (jumpDest->bbJumpDest != firstBlock))
+ {
+ continue;
+ }
+#else
+ // Look for call finally pair directly within the try
+ if (!block->isBBCallAlwaysPair() || (block->bbJumpDest != firstBlock))
+ {
+ continue;
+ }
+
+ BasicBlock* const jumpDest = block;
+#endif // FEATURE_EH_CALLFINALLY_THUNKS
+
+ // Found our block.
+ BasicBlock* const finallyReturnBlock = jumpDest->bbNext;
+ BasicBlock* const postTryFinallyBlock = finallyReturnBlock->bbJumpDest;
+
+ normalCallFinallyBlock = jumpDest;
+ normalCallFinallyReturn = postTryFinallyBlock;
+
+#if FEATURE_EH_CALLFINALLY_THUNKS
+ // When there are callfinally thunks, we don't expect to see the
+ // callfinally within a handler region either.
+ assert(!jumpDest->hasHndIndex());
+
+ // Update the clone insertion point to just after the
+ // call always pair.
+ cloneInsertAfter = finallyReturnBlock;
+
+ // We will consider moving the callfinally so we can fall
+ // through from the try into the clone.
+ tryToRelocateCallFinally = true;
+
+ JITDUMP("Chose path to clone: try block BB%02u jumps to callfinally at BB%02u;"
+ " the call returns to BB%02u which jumps to BB%02u\n",
+ block->bbNum, jumpDest->bbNum, finallyReturnBlock->bbNum, postTryFinallyBlock->bbNum);
+#else
+ JITDUMP("Chose path to clone: try block BB%02u is a callfinally;"
+ " the call returns to BB%02u which jumps to BB%02u\n",
+ block->bbNum, finallyReturnBlock->bbNum, postTryFinallyBlock->bbNum);
+#endif // FEATURE_EH_CALLFINALLY_THUNKS
+
+ break;
+ }
+
+ // If there is no call to the finally, don't clone.
+ if (normalCallFinallyBlock == nullptr)
+ {
+ JITDUMP("EH#%u: no calls from the try to the finally, skipping.\n", XTnum);
+ continue;
+ }
+
+ JITDUMP("Will update callfinally block BB%02u to jump to the clone;"
+ " clone will jump to BB%02u\n",
+ normalCallFinallyBlock->bbNum, normalCallFinallyReturn->bbNum);
+
+ // If there are multiple callfinallys and we're in the
+ // callfinally thunk model, all the callfinallys are placed
+ // just outside the try region. We'd like our chosen
+ // callfinally to come first after the try, so we can fall out of the try
+ // into the clone.
+ BasicBlock* firstCallFinallyRangeBlock = nullptr;
+ BasicBlock* endCallFinallyRangeBlock = nullptr;
+ ehGetCallFinallyBlockRange(XTnum, &firstCallFinallyRangeBlock, &endCallFinallyRangeBlock);
+
+ if (tryToRelocateCallFinally)
+ {
+ BasicBlock* firstCallFinallyBlock = nullptr;
+
+ for (BasicBlock* block = firstCallFinallyRangeBlock; block != endCallFinallyRangeBlock;
+ block = block->bbNext)
+ {
+ if (block->isBBCallAlwaysPair())
+ {
+ if (block->bbJumpDest == firstBlock)
+ {
+ firstCallFinallyBlock = block;
+ break;
+ }
+ }
+ }
+
+ // We better have found at least one call finally.
+ assert(firstCallFinallyBlock != nullptr);
+
+ // If there is more than one callfinally, move the one we are
+ // going to retarget to be first in the callfinally range.
+ if (firstCallFinallyBlock != normalCallFinallyBlock)
+ {
+ JITDUMP("Moving callfinally BB%02u to be first in line, before BB%02u\n", normalCallFinallyBlock->bbNum,
+ firstCallFinallyBlock->bbNum);
+
+ BasicBlock* const firstToMove = normalCallFinallyBlock;
+ BasicBlock* const lastToMove = normalCallFinallyBlock->bbNext;
+ BasicBlock* const placeToMoveAfter = firstCallFinallyBlock->bbPrev;
+
+ fgUnlinkRange(firstToMove, lastToMove);
+ fgMoveBlocksAfter(firstToMove, lastToMove, placeToMoveAfter);
+
+#ifdef DEBUG
+ // Sanity checks
+ fgDebugCheckBBlist(false, false);
+ fgVerifyHandlerTab();
+#endif // DEBUG
+
+ assert(nextBlock == lastBlock->bbNext);
+
+ // Update where the callfinally range begins, since we might
+ // have altered this with callfinally rearrangement, and/or
+ // the range begin might have been pretty loose to begin with.
+ firstCallFinallyRangeBlock = normalCallFinallyBlock;
+ }
+ }
+
+ // Clone the finally and retarget the normal return path and
+ // any other path that happens to share that same return
+ // point. For instance a construct like:
+ //
+ // try { } catch { } finally { }
+ //
+ // will have two call finally blocks, one for the normal exit
+ // from the try, and the the other for the exit from the
+ // catch. They'll both pass the same return point which is the
+ // statement after the finally, so they can share the clone.
+ //
+ // Clone the finally body, and splice it into the flow graph
+ // within in the parent region of the try.
+ const unsigned finallyTryIndex = firstBlock->bbTryIndex;
+ BasicBlock* insertAfter = nullptr;
+ BlockToBlockMap blockMap(getAllocator());
+ bool clonedOk = true;
+ unsigned cloneBBCount = 0;
+
+ for (BasicBlock* block = firstBlock; block != nextBlock; block = block->bbNext)
+ {
+ BasicBlock* newBlock;
+
+ if (block == firstBlock)
+ {
+ // Put first cloned finally block into the approprate
+ // region, somewhere within or after the range of
+ // callfinallys, depending on the EH implementation.
+ const unsigned hndIndex = 0;
+ BasicBlock* const nearBlk = cloneInsertAfter;
+ newBlock = fgNewBBinRegion(block->bbJumpKind, finallyTryIndex, hndIndex, nearBlk);
+
+ // If the clone ends up just after the finally, adjust
+ // the stopping point for finally traversal.
+ if (newBlock->bbNext == nextBlock)
+ {
+ assert(newBlock->bbPrev == lastBlock);
+ nextBlock = newBlock;
+ }
+ }
+ else
+ {
+ // Put subsequent blocks in the same region...
+ const bool extendRegion = true;
+ newBlock = fgNewBBafter(block->bbJumpKind, insertAfter, extendRegion);
+ }
+
+ cloneBBCount++;
+ assert(cloneBBCount <= regionBBCount);
+
+ insertAfter = newBlock;
+ blockMap.Set(block, newBlock);
+
+ clonedOk = BasicBlock::CloneBlockState(this, newBlock, block);
+
+ if (!clonedOk)
+ {
+ break;
+ }
+
+ // Update block flags. Note a block can be both first and last.
+ if (block == firstBlock)
+ {
+ // Mark the block as the start of the cloned finally.
+ newBlock->bbFlags |= BBF_CLONED_FINALLY_BEGIN;
+ }
+
+ if (block == lastBlock)
+ {
+ // Mark the block as the end of the cloned finally.
+ newBlock->bbFlags |= BBF_CLONED_FINALLY_END;
+ }
+
+ // Make sure clone block state hasn't munged the try region.
+ assert(newBlock->bbTryIndex == finallyTryIndex);
+
+ // Cloned handler block is no longer within the handler.
+ newBlock->clearHndIndex();
+
+ // Jump dests are set in a post-pass; make sure CloneBlockState hasn't tried to set them.
+ assert(newBlock->bbJumpDest == nullptr);
+ }
+
+ if (!clonedOk)
+ {
+ // TODO: cleanup the partial clone?
+ JITDUMP("Unable to clone the finally; skipping.\n");
+ continue;
+ }
+
+ // We should have cloned all the finally region blocks.
+ assert(cloneBBCount == regionBBCount);
+
+ JITDUMP("Cloned finally blocks are: BB%2u ... BB%2u\n", blockMap[firstBlock]->bbNum,
+ blockMap[lastBlock]->bbNum);
+
+ // Redirect redirect any branches within the newly-cloned
+ // finally, and any finally returns to jump to the return
+ // point.
+ for (BasicBlock* block = firstBlock; block != nextBlock; block = block->bbNext)
+ {
+ BasicBlock* newBlock = blockMap[block];
+
+ if (block->bbJumpKind == BBJ_EHFINALLYRET)
+ {
+ GenTreeStmt* finallyRet = newBlock->lastStmt();
+ GenTreePtr finallyRetExpr = finallyRet->gtStmtExpr;
+ assert(finallyRetExpr->gtOper == GT_RETFILT);
+ fgRemoveStmt(newBlock, finallyRet);
+ newBlock->bbJumpKind = BBJ_ALWAYS;
+ newBlock->bbJumpDest = normalCallFinallyReturn;
+
+ fgAddRefPred(normalCallFinallyReturn, newBlock);
+ }
+ else
+ {
+ optCopyBlkDest(block, newBlock);
+ optRedirectBlock(newBlock, &blockMap);
+ }
+ }
+
+ // Modify the targeting call finallys to branch to the cloned
+ // finally. Make a note if we see some calls that can't be
+ // retargeted (since they want to return to other places).
+ BasicBlock* const firstCloneBlock = blockMap[firstBlock];
+ bool retargetedAllCalls = true;
+ BasicBlock* currentBlock = firstCallFinallyRangeBlock;
+
+ while (currentBlock != endCallFinallyRangeBlock)
+ {
+ BasicBlock* nextBlockToScan = currentBlock->bbNext;
+
+ if (currentBlock->isBBCallAlwaysPair())
+ {
+ if (currentBlock->bbJumpDest == firstBlock)
+ {
+ BasicBlock* const leaveBlock = currentBlock->bbNext;
+ BasicBlock* const postTryFinallyBlock = leaveBlock->bbJumpDest;
+
+ // Note we must retarget all callfinallies that have this
+ // continuation, or we can't clean up the continuation
+ // block properly below, since it will be reachable both
+ // by the cloned finally and by the called finally.
+ if (postTryFinallyBlock == normalCallFinallyReturn)
+ {
+ // This call returns to the expected spot, so
+ // retarget it to branch to the clone.
+ currentBlock->bbJumpDest = firstCloneBlock;
+ currentBlock->bbJumpKind = BBJ_ALWAYS;
+
+ // Ref count updates.
+ fgAddRefPred(firstCloneBlock, currentBlock);
+ // fgRemoveRefPred(firstBlock, currentBlock);
+
+ // Delete the leave block, which should be marked as
+ // keep always.
+ assert((leaveBlock->bbFlags & BBF_KEEP_BBJ_ALWAYS) != 0);
+ nextBlock = leaveBlock->bbNext;
+
+ leaveBlock->bbFlags &= ~BBF_KEEP_BBJ_ALWAYS;
+ fgRemoveBlock(leaveBlock, true);
+
+ // Make sure iteration isn't going off the deep end.
+ assert(leaveBlock != endCallFinallyRangeBlock);
+ }
+ else
+ {
+ // We can't retarget this call since it
+ // returns somewhere else.
+ retargetedAllCalls = false;
+ }
+ }
+ }
+
+ currentBlock = nextBlockToScan;
+ }
+
+ // If we retargeted all calls, modify EH descriptor to be
+ // try-fault instead of try-finally, and then non-cloned
+ // finally catch type to be fault.
+ if (retargetedAllCalls)
+ {
+ JITDUMP("All callfinallys retargeted; changing finally to fault.\n");
+ HBtab->ebdHandlerType = EH_HANDLER_FAULT_WAS_FINALLY;
+ firstBlock->bbCatchTyp = BBCT_FAULT;
+ }
+ else
+ {
+ JITDUMP("Some callfinallys *not* retargeted, so region must remain as a finally.\n");
+ }
+
+ // Modify first block of cloned finally to be a "normal" block.
+ BasicBlock* firstClonedBlock = blockMap[firstBlock];
+ firstClonedBlock->bbCatchTyp = BBCT_NONE;
+
+ // Cleanup the contination
+ fgCleanupContinuation(normalCallFinallyReturn);
+
+ // Todo -- mark cloned blocks as a cloned finally....
+
+ // Done!
+ JITDUMP("\nDone with EH#%u\n\n", XTnum);
+ cloneCount++;
+ }
+
+ if (cloneCount > 0)
+ {
+ JITDUMP("fgCloneFinally() cloned %u finally handlers\n", cloneCount);
+ fgOptimizedFinally = true;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** After fgCloneFinally()\n");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ printf("\n");
+ }
+
+ fgVerifyHandlerTab();
+ fgDebugCheckBBlist(false, false);
+ fgDebugCheckTryFinallyExits();
+
+#endif // DEBUG
+ }
+}
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// fgDebugCheckTryFinallyExits: validate normal flow from try-finally
+// or try-fault-was-finally.
+//
+// Notes:
+//
+// Normal control flow exiting the try block of a try-finally must
+// pass through the finally. This checker attempts to verify that by
+// looking at the control flow graph.
+//
+// Each path that exits the try of a try-finally (including try-faults
+// that were optimized into try-finallys by fgCloneFinally) should
+// thus either execute a callfinally to the associated finally or else
+// jump to a block with the BBF_CLONED_FINALLY_BEGIN flag set.
+//
+// Depending on when this check is done, there may also be an empty
+// block along the path.
+//
+// Depending on the model for invoking finallys, the callfinallies may
+// lie within the try region (callfinally thunks) or in the enclosing
+// region.
+
+void Compiler::fgDebugCheckTryFinallyExits()
+{
+ unsigned XTnum = 0;
+ EHblkDsc* HBtab = compHndBBtab;
+ unsigned cloneCount = 0;
+ bool allTryExitsValid = true;
+ for (; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ {
+ const EHHandlerType handlerType = HBtab->ebdHandlerType;
+ const bool isFinally = (handlerType == EH_HANDLER_FINALLY);
+ const bool wasFinally = (handlerType == EH_HANDLER_FAULT_WAS_FINALLY);
+
+ // Screen out regions that are or were not finallys.
+ if (!isFinally && !wasFinally)
+ {
+ continue;
+ }
+
+ // Walk blocks of the try, looking for normal control flow to
+ // an ancestor region.
+
+ BasicBlock* const firstTryBlock = HBtab->ebdTryBeg;
+ BasicBlock* const lastTryBlock = HBtab->ebdTryLast;
+ assert(firstTryBlock->getTryIndex() <= XTnum);
+ assert(lastTryBlock->getTryIndex() <= XTnum);
+ BasicBlock* const afterTryBlock = lastTryBlock->bbNext;
+ BasicBlock* const finallyBlock = isFinally ? HBtab->ebdHndBeg : nullptr;
+
+ for (BasicBlock* block = firstTryBlock; block != afterTryBlock; block = block->bbNext)
+ {
+ // Only check the directly contained blocks.
+ assert(block->hasTryIndex());
+
+ if (block->getTryIndex() != XTnum)
+ {
+ continue;
+ }
+
+ // Look at each of the normal control flow possibilities.
+ const unsigned numSuccs = block->NumSucc();
+
+ for (unsigned i = 0; i < numSuccs; i++)
+ {
+ BasicBlock* const succBlock = block->GetSucc(i);
+
+ if (succBlock->hasTryIndex() && succBlock->getTryIndex() <= XTnum)
+ {
+ // Successor does not exit this try region.
+ continue;
+ }
+
+#if FEATURE_EH_CALLFINALLY_THUNKS
+
+ // When there are callfinally thunks, callfinallies
+ // logically "belong" to a child region and the exit
+ // path validity will be checked when looking at the
+ // try blocks in that region.
+ if (block->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ continue;
+ }
+
+#endif // FEATURE_EH_CALLFINALLY_THUNKS
+
+ // Now we know block lies directly within the try of a
+ // try-finally, and succBlock is in an enclosing
+ // region (possibly the method region). So this path
+ // represents flow out of the try and should be
+ // checked.
+ //
+ // There are various ways control can properly leave a
+ // try-finally (or try-fault-was-finally):
+ //
+ // (a1) via a jump to a callfinally (only for finallys, only for call finally thunks)
+ // (a2) via a callfinally (only for finallys, only for !call finally thunks)
+ // (b) via a jump to a begin finally clone block
+ // (c) via a jump to an empty block to (b)
+ // (d) via a fallthrough to an empty block to (b)
+ // (e) via the always half of a callfinally pair
+ // (f) via an always jump clonefinally exit
+ bool isCallToFinally = false;
+
+#if FEATURE_EH_CALLFINALLY_THUNKS
+ if (succBlock->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ // case (a1)
+ isCallToFinally = isFinally && (succBlock->bbJumpDest == finallyBlock);
+ }
+#else
+ if (block->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ // case (a2)
+ isCallToFinally = isFinally && (block->bbJumpDest == finallyBlock);
+ }
+#endif // FEATURE_EH_CALLFINALLY_THUNKS
+
+ bool isJumpToClonedFinally = false;
+
+ if (succBlock->bbFlags & BBF_CLONED_FINALLY_BEGIN)
+ {
+ // case (b)
+ isJumpToClonedFinally = true;
+ }
+ else if (succBlock->bbJumpKind == BBJ_ALWAYS)
+ {
+ if (succBlock->isEmpty())
+ {
+ // case (c)
+ BasicBlock* const succSuccBlock = succBlock->bbJumpDest;
+
+ if (succSuccBlock->bbFlags & BBF_CLONED_FINALLY_BEGIN)
+ {
+ isJumpToClonedFinally = true;
+ }
+ }
+ }
+ else if (succBlock->bbJumpKind == BBJ_NONE)
+ {
+ if (succBlock->isEmpty())
+ {
+ BasicBlock* const succSuccBlock = succBlock->bbNext;
+
+ // case (d)
+ if (succSuccBlock->bbFlags & BBF_CLONED_FINALLY_BEGIN)
+ {
+ isJumpToClonedFinally = true;
+ }
+ }
+ }
+
+ bool isReturnFromFinally = false;
+
+ // Case (e). Ideally we'd have something stronger to
+ // check here -- eg that we are returning from a call
+ // to the right finally -- but there are odd cases
+ // like orphaned second halves of callfinally pairs
+ // that we need to tolerate.
+ if (block->bbFlags & BBF_KEEP_BBJ_ALWAYS)
+ {
+ isReturnFromFinally = true;
+ }
+
+ // Case (f)
+ if (block->bbFlags & BBF_CLONED_FINALLY_END)
+ {
+ isReturnFromFinally = true;
+ }
+
+ const bool thisExitValid = isCallToFinally || isJumpToClonedFinally || isReturnFromFinally;
+
+ if (!thisExitValid)
+ {
+ JITDUMP("fgCheckTryFinallyExitS: EH#%u exit via BB%02u -> BB%02u is invalid\n", XTnum, block->bbNum,
+ succBlock->bbNum);
+ }
+
+ allTryExitsValid = allTryExitsValid & thisExitValid;
+ }
+ }
+ }
+
+ if (!allTryExitsValid)
+ {
+ JITDUMP("fgCheckTryFinallyExits: method contains invalid try exit paths\n");
+ assert(allTryExitsValid);
+ }
+}
+
+#endif // DEBUG
+
+//------------------------------------------------------------------------
+// fgCleanupContinuation: cleanup a finally continuation after a
+// finally is removed or converted to normal control flow.
+//
+// Notes:
+// The continuation is the block targeted by the second half of
+// a callfinally/always pair.
+//
+// Used by finally cloning, empty try removal, and empty
+// finally removal.
+//
+// BBF_FINALLY_TARGET bbFlag is left unchanged by this method
+// since it cannot be incrementally updated. Proper updates happen
+// when fgUpdateFinallyTargetFlags runs after all finally optimizations.
+
+void Compiler::fgCleanupContinuation(BasicBlock* continuation)
+{
+ // The continuation may be a finalStep block.
+ // It is now a normal block, so clear the special keep
+ // always flag.
+ continuation->bbFlags &= ~BBF_KEEP_BBJ_ALWAYS;
+
+#if !FEATURE_EH_FUNCLETS
+ // Remove the GT_END_LFIN from the continuation,
+ // Note we only expect to see one such statement.
+ bool foundEndLFin = false;
+ for (GenTreeStmt* stmt = continuation->firstStmt(); stmt != nullptr; stmt = stmt->gtNextStmt)
+ {
+ GenTreePtr expr = stmt->gtStmtExpr;
+ if (expr->gtOper == GT_END_LFIN)
+ {
+ assert(!foundEndLFin);
+ fgRemoveStmt(continuation, stmt);
+ foundEndLFin = true;
+ }
+ }
+ assert(foundEndLFin);
+#endif // !FEATURE_EH_FUNCLETS
+}
+
+//------------------------------------------------------------------------
+// fgUpdateFinallyTargetFlags: recompute BBF_FINALLY_TARGET bits for all blocks
+// after finally optimizations have run.
+
+void Compiler::fgUpdateFinallyTargetFlags()
+{
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+ // Any fixup required?
+ if (!fgOptimizedFinally)
+ {
+ JITDUMP("In fgUpdateFinallyTargetFlags - no finally opts, no fixup required\n");
+ return;
+ }
+
+ JITDUMP("In fgUpdateFinallyTargetFlags, updating finally target flag bits\n");
+
+ // Walk all blocks, and reset the target bits.
+ for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ block->bbFlags &= ~BBF_FINALLY_TARGET;
+ }
+
+ // Walk all blocks again, and set the target bits.
+ for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ if (block->isBBCallAlwaysPair())
+ {
+ BasicBlock* const leave = block->bbNext;
+ BasicBlock* const continuation = leave->bbJumpDest;
+
+ if ((continuation->bbFlags & BBF_FINALLY_TARGET) == 0)
+ {
+ JITDUMP("Found callfinally BB%02u; setting finally target bit on BB%02u\n", block->bbNum,
+ continuation->bbNum);
+
+ continuation->bbFlags |= BBF_FINALLY_TARGET;
+ }
+ }
+ }
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+}
+
+// FatCalliTransformer transforms calli that can use fat function pointer.
+// Fat function pointer is pointer with the second least significant bit set,
+// if the bit is set, the pointer (after clearing the bit) actually points to
+// a tuple <method pointer, instantiation argument pointer> where
+// instantiationArgument is a hidden first argument required by method pointer.
+//
+// Fat pointers are used in CoreRT as a replacement for instantiating stubs,
+// because CoreRT can't generate stubs in runtime.
+//
+// Jit is responsible for the checking the bit, do the regular call if it is not set
+// or load hidden argument, fix the pointer and make a call with the fixed pointer and
+// the instantiation argument.
+//
+// before:
+// current block
+// {
+// previous statements
+// transforming statement
+// {
+// call with GTF_CALL_M_FAT_POINTER_CHECK flag set in function ptr
+// }
+// subsequent statements
+// }
+//
+// after:
+// current block
+// {
+// previous statements
+// } BBJ_NONE check block
+// check block
+// {
+// jump to else if function ptr has GTF_CALL_M_FAT_POINTER_CHECK set.
+// } BBJ_COND then block, else block
+// then block
+// {
+// original statement
+// } BBJ_ALWAYS remainder block
+// else block
+// {
+// unset GTF_CALL_M_FAT_POINTER_CHECK
+// load actual function pointer
+// load instantiation argument
+// create newArgList = (instantiation argument, original argList)
+// call (actual function pointer, newArgList)
+// } BBJ_NONE remainder block
+// remainder block
+// {
+// subsequent statements
+// }
+//
+class FatCalliTransformer
+{
+public:
+ FatCalliTransformer(Compiler* compiler) : compiler(compiler)
+ {
+ }
+
+ //------------------------------------------------------------------------
+ // Run: run transformation for each block.
+ //
+ void Run()
+ {
+ for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ TransformBlock(block);
+ }
+ }
+
+private:
+ //------------------------------------------------------------------------
+ // TransformBlock: look through statements and transform statements with fat pointer calls.
+ //
+ void TransformBlock(BasicBlock* block)
+ {
+ for (GenTreeStmt* stmt = block->firstStmt(); stmt != nullptr; stmt = stmt->gtNextStmt)
+ {
+ if (ContainsFatCalli(stmt))
+ {
+ StatementTransformer stmtTransformer(compiler, block, stmt);
+ stmtTransformer.Run();
+ }
+ }
+ }
+
+ //------------------------------------------------------------------------
+ // ContainsFatCalli: check does this statement contain fat pointer call.
+ //
+ // Checks fatPointerCandidate in form of call() or lclVar = call().
+ //
+ // Return Value:
+ // true if contains, false otherwise.
+ //
+ bool ContainsFatCalli(GenTreeStmt* stmt)
+ {
+ GenTreePtr fatPointerCandidate = stmt->gtStmtExpr;
+ if (fatPointerCandidate->OperIsAssignment())
+ {
+ fatPointerCandidate = fatPointerCandidate->gtGetOp2();
+ }
+ return fatPointerCandidate->IsCall() && fatPointerCandidate->AsCall()->IsFatPointerCandidate();
+ }
+
+ class StatementTransformer
+ {
+ public:
+ StatementTransformer(Compiler* compiler, BasicBlock* block, GenTreeStmt* stmt)
+ : compiler(compiler), currBlock(block), stmt(stmt)
+ {
+ remainderBlock = nullptr;
+ checkBlock = nullptr;
+ thenBlock = nullptr;
+ elseBlock = nullptr;
+ doesReturnValue = stmt->gtStmtExpr->OperIsAssignment();
+ origCall = GetCall(stmt);
+ fptrAddress = origCall->gtCallAddr;
+ pointerType = fptrAddress->TypeGet();
+ }
+
+ //------------------------------------------------------------------------
+ // Run: transform the statement as described above.
+ //
+ void Run()
+ {
+ ClearFatFlag();
+ CreateRemainder();
+ CreateCheck();
+ CreateThen();
+ CreateElse();
+
+ RemoveOldStatement();
+ SetWeights();
+ ChainFlow();
+ }
+
+ private:
+ //------------------------------------------------------------------------
+ // GetCall: find a call in a statement.
+ //
+ // Arguments:
+ // callStmt - the statement with the call inside.
+ //
+ // Return Value:
+ // call tree node pointer.
+ GenTreeCall* GetCall(GenTreeStmt* callStmt)
+ {
+ GenTreePtr tree = callStmt->gtStmtExpr;
+ GenTreeCall* call = nullptr;
+ if (doesReturnValue)
+ {
+ assert(tree->OperIsAssignment());
+ call = tree->gtGetOp2()->AsCall();
+ }
+ else
+ {
+ call = tree->AsCall(); // call with void return type.
+ }
+ return call;
+ }
+
+ //------------------------------------------------------------------------
+ // ClearFatFlag: clear fat pointer candidate flag from the original call.
+ //
+ void ClearFatFlag()
+ {
+ origCall->ClearFatPointerCandidate();
+ }
+
+ //------------------------------------------------------------------------
+ // CreateRemainder: split current block at the fat call stmt and
+ // insert statements after the call into remainderBlock.
+ //
+ void CreateRemainder()
+ {
+ remainderBlock = compiler->fgSplitBlockAfterStatement(currBlock, stmt);
+ unsigned propagateFlags = currBlock->bbFlags & BBF_GC_SAFE_POINT;
+ remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
+ }
+
+ //------------------------------------------------------------------------
+ // CreateCheck: create check block, that checks fat pointer bit set.
+ //
+ void CreateCheck()
+ {
+ checkBlock = CreateAndInsertBasicBlock(BBJ_COND, currBlock);
+ GenTreePtr fatPointerMask = new (compiler, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, FAT_POINTER_MASK);
+ GenTreePtr fptrAddressCopy = compiler->gtCloneExpr(fptrAddress);
+ GenTreePtr fatPointerAnd = compiler->gtNewOperNode(GT_AND, TYP_I_IMPL, fptrAddressCopy, fatPointerMask);
+ GenTreePtr zero = new (compiler, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, 0);
+ GenTreePtr fatPointerCmp = compiler->gtNewOperNode(GT_NE, TYP_INT, fatPointerAnd, zero);
+ GenTreePtr jmpTree = compiler->gtNewOperNode(GT_JTRUE, TYP_VOID, fatPointerCmp);
+ GenTreePtr jmpStmt = compiler->fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
+ compiler->fgInsertStmtAtEnd(checkBlock, jmpStmt);
+ }
+
+ //------------------------------------------------------------------------
+ // CreateCheck: create then block, that is executed if call address is not fat pointer.
+ //
+ void CreateThen()
+ {
+ thenBlock = CreateAndInsertBasicBlock(BBJ_ALWAYS, checkBlock);
+ GenTreePtr nonFatCallStmt = compiler->gtCloneExpr(stmt)->AsStmt();
+ compiler->fgInsertStmtAtEnd(thenBlock, nonFatCallStmt);
+ }
+
+ //------------------------------------------------------------------------
+ // CreateCheck: create else block, that is executed if call address is fat pointer.
+ //
+ void CreateElse()
+ {
+ elseBlock = CreateAndInsertBasicBlock(BBJ_NONE, thenBlock);
+
+ GenTreePtr fixedFptrAddress = GetFixedFptrAddress();
+ GenTreePtr actualCallAddress = compiler->gtNewOperNode(GT_IND, pointerType, fixedFptrAddress);
+ GenTreePtr hiddenArgument = GetHiddenArgument(fixedFptrAddress);
+
+ GenTreeStmt* fatStmt = CreateFatCallStmt(actualCallAddress, hiddenArgument);
+ compiler->fgInsertStmtAtEnd(elseBlock, fatStmt);
+ }
+
+ //------------------------------------------------------------------------
+ // CreateAndInsertBasicBlock: ask compiler to create new basic block.
+ // and insert in into the basic block list.
+ //
+ // Arguments:
+ // jumpKind - jump kind for the new basic block
+ // insertAfter - basic block, after which compiler has to insert the new one.
+ //
+ // Return Value:
+ // new basic block.
+ BasicBlock* CreateAndInsertBasicBlock(BBjumpKinds jumpKind, BasicBlock* insertAfter)
+ {
+ BasicBlock* block = compiler->fgNewBBafter(jumpKind, insertAfter, true);
+ if ((insertAfter->bbFlags & BBF_INTERNAL) == 0)
+ {
+ block->bbFlags &= ~BBF_INTERNAL;
+ block->bbFlags |= BBF_IMPORTED;
+ }
+ return block;
+ }
+
+ //------------------------------------------------------------------------
+ // GetFixedFptrAddress: clear fat pointer bit from fat pointer address.
+ //
+ // Return Value:
+ // address without fat pointer bit set.
+ GenTreePtr GetFixedFptrAddress()
+ {
+ GenTreePtr fptrAddressCopy = compiler->gtCloneExpr(fptrAddress);
+ GenTreePtr fatPointerMask = new (compiler, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, FAT_POINTER_MASK);
+ return compiler->gtNewOperNode(GT_XOR, pointerType, fptrAddressCopy, fatPointerMask);
+ }
+
+ //------------------------------------------------------------------------
+ // GetHiddenArgument: load hidden argument.
+ //
+ // Arguments:
+ // fixedFptrAddress - pointer to the tuple <methodPointer, instantiationArgumentPointer>
+ //
+ // Return Value:
+ // loaded hidden argument.
+ GenTreePtr GetHiddenArgument(GenTreePtr fixedFptrAddress)
+ {
+ GenTreePtr fixedFptrAddressCopy = compiler->gtCloneExpr(fixedFptrAddress);
+ GenTreePtr wordSize = new (compiler, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, genTypeSize(TYP_I_IMPL));
+ GenTreePtr hiddenArgumentPtrPtr =
+ compiler->gtNewOperNode(GT_ADD, pointerType, fixedFptrAddressCopy, wordSize);
+ GenTreePtr hiddenArgumentPtr = compiler->gtNewOperNode(GT_IND, pointerType, hiddenArgumentPtrPtr);
+ return compiler->gtNewOperNode(GT_IND, fixedFptrAddressCopy->TypeGet(), hiddenArgumentPtr);
+ }
+
+ //------------------------------------------------------------------------
+ // CreateFatCallStmt: create call with fixed call address and hidden argument in the args list.
+ //
+ // Arguments:
+ // actualCallAddress - fixed call address
+ // hiddenArgument - loaded hidden argument
+ //
+ // Return Value:
+ // created call node.
+ GenTreeStmt* CreateFatCallStmt(GenTreePtr actualCallAddress, GenTreePtr hiddenArgument)
+ {
+ GenTreeStmt* fatStmt = compiler->gtCloneExpr(stmt)->AsStmt();
+ GenTreePtr fatTree = fatStmt->gtStmtExpr;
+ GenTreeCall* fatCall = GetCall(fatStmt);
+ fatCall->gtCallAddr = actualCallAddress;
+ GenTreeArgList* args = fatCall->gtCallArgs;
+ args = compiler->gtNewListNode(hiddenArgument, args);
+ fatCall->gtCallArgs = args;
+ return fatStmt;
+ }
+
+ //------------------------------------------------------------------------
+ // RemoveOldStatement: remove original stmt from current block.
+ //
+ void RemoveOldStatement()
+ {
+ compiler->fgRemoveStmt(currBlock, stmt);
+ }
+
+ //------------------------------------------------------------------------
+ // SetWeights: set weights for new blocks.
+ //
+ void SetWeights()
+ {
+ remainderBlock->inheritWeight(currBlock);
+ checkBlock->inheritWeight(currBlock);
+ thenBlock->inheritWeightPercentage(currBlock, HIGH_PROBABILITY);
+ elseBlock->inheritWeightPercentage(currBlock, 100 - HIGH_PROBABILITY);
+ }
+
+ //------------------------------------------------------------------------
+ // ChainFlow: link new blocks into correct cfg.
+ //
+ void ChainFlow()
+ {
+ assert(!compiler->fgComputePredsDone);
+ checkBlock->bbJumpDest = elseBlock;
+ thenBlock->bbJumpDest = remainderBlock;
+ }
+
+ Compiler* compiler;
+ BasicBlock* currBlock;
+ BasicBlock* remainderBlock;
+ BasicBlock* checkBlock;
+ BasicBlock* thenBlock;
+ BasicBlock* elseBlock;
+ GenTreeStmt* stmt;
+ GenTreeCall* origCall;
+ GenTreePtr fptrAddress;
+ var_types pointerType;
+ bool doesReturnValue;
+
+ const int FAT_POINTER_MASK = 0x2;
+ const int HIGH_PROBABILITY = 80;
+ };
+
+ Compiler* compiler;
+};
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// fgDebugCheckFatPointerCandidates: callback to make sure there are no more GTF_CALL_M_FAT_POINTER_CHECK calls.
+//
+Compiler::fgWalkResult Compiler::fgDebugCheckFatPointerCandidates(GenTreePtr* pTree, fgWalkData* data)
+{
+ GenTreePtr tree = *pTree;
+ if (tree->IsCall())
+ {
+ assert(!tree->AsCall()->IsFatPointerCandidate());
+ }
+ return WALK_CONTINUE;
+}
+
+//------------------------------------------------------------------------
+// CheckNoFatPointerCandidatesLeft: walk through blocks and check that there are no fat pointer candidates left.
+//
+void Compiler::CheckNoFatPointerCandidatesLeft()
+{
+ assert(!doesMethodHaveFatPointer());
+ for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ for (GenTreeStmt* stmt = fgFirstBB->firstStmt(); stmt != nullptr; stmt = stmt->gtNextStmt)
+ {
+ fgWalkTreePre(&stmt->gtStmtExpr, fgDebugCheckFatPointerCandidates);
+ }
+ }
+}
+#endif
+
+//------------------------------------------------------------------------
+// fgTransformFatCalli: find and transform fat calls.
+//
+void Compiler::fgTransformFatCalli()
+{
+ assert(IsTargetAbi(CORINFO_CORERT_ABI));
+ FatCalliTransformer fatCalliTransformer(this);
+ fatCalliTransformer.Run();
+ clearMethodHasFatPointer();
+#ifdef DEBUG
+ CheckNoFatPointerCandidatesLeft();
+#endif
+}
diff --git a/src/jit/gcencode.cpp b/src/jit/gcencode.cpp
index 128fc4a..dcca19e 100644
--- a/src/jit/gcencode.cpp
+++ b/src/jit/gcencode.cpp
@@ -3778,8 +3778,10 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz
}
#endif // FEATURE_EH_FUNCLETS
+#if FEATURE_FIXED_OUT_ARGS
// outgoing stack area size
gcInfoEncoderWithLog->SetSizeOfStackOutgoingAndScratchArea(compiler->lvaOutgoingArgSpaceSize);
+#endif // FEATURE_FIXED_OUT_ARGS
#if DISPLAY_SIZES
@@ -3941,13 +3943,6 @@ void GCInfo::gcMakeRegPtrTable(GcInfoEncoder* gcInfoEncoder,
// If we haven't continued to the next variable, we should report this as an untracked local.
CLANG_FORMAT_COMMENT_ANCHOR;
-#if DOUBLE_ALIGN
- // For genDoubleAlign(), locals are addressed relative to ESP and
- // arguments are addressed relative to EBP.
-
- if (genDoubleAlign() && varDsc->lvIsParam && !varDsc->lvIsRegArg)
- offset += compiler->codeGen->genTotalFrameSize();
-#endif
GcSlotFlags flags = GC_SLOT_UNTRACKED;
if (varDsc->TypeGet() == TYP_BYREF)
@@ -3998,7 +3993,7 @@ void GCInfo::gcMakeRegPtrTable(GcInfoEncoder* gcInfoEncoder,
// For genDoubleAlign(), locals are addressed relative to ESP and
// arguments are addressed relative to EBP.
- if (genDoubleAlign() && varDsc->lvIsParam && !varDsc->lvIsRegArg)
+ if (compiler->genDoubleAlign() && varDsc->lvIsParam && !varDsc->lvIsRegArg)
offset += compiler->codeGen->genTotalFrameSize();
#endif
GcSlotFlags flags = GC_SLOT_UNTRACKED;
diff --git a/src/jit/gcinfo.cpp b/src/jit/gcinfo.cpp
index b64fd0a..e2f76f3 100644
--- a/src/jit/gcinfo.cpp
+++ b/src/jit/gcinfo.cpp
@@ -265,6 +265,12 @@ GCInfo::WriteBarrierForm GCInfo::gcIsWriteBarrierCandidate(GenTreePtr tgt, GenTr
case GT_STOREIND:
#endif // !LEGACY_BACKEND
case GT_IND: /* Could be the managed heap */
+ if (tgt->TypeGet() == TYP_BYREF)
+ {
+ // Byref values cannot be in managed heap.
+ // This case occurs for Span<T>.
+ return WBF_NoBarrier;
+ }
return gcWriteBarrierFormFromTargetAddress(tgt->gtOp.gtOp1);
case GT_LEA:
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
index 4a6cc74..7af500f 100644
--- a/src/jit/gentree.cpp
+++ b/src/jit/gentree.cpp
@@ -678,7 +678,7 @@ Compiler::fgWalkResult Compiler::fgWalkTreePreRec(GenTreePtr* pTree, fgWalkData*
if (kind & GTK_SMPOP)
{
- if (tree->gtGetOp2())
+ if (tree->gtGetOp2IfPresent())
{
if (tree->gtOp.gtOp1 != nullptr)
{
@@ -847,12 +847,12 @@ Compiler::fgWalkResult Compiler::fgWalkTreePreRec(GenTreePtr* pTree, fgWalkData*
#ifdef FEATURE_SIMD
case GT_SIMD_CHK:
#endif // FEATURE_SIMD
- result = fgWalkTreePreRec<computeStack>(&tree->gtBoundsChk.gtArrLen, fgWalkData);
+ result = fgWalkTreePreRec<computeStack>(&tree->gtBoundsChk.gtIndex, fgWalkData);
if (result == WALK_ABORT)
{
return result;
}
- result = fgWalkTreePreRec<computeStack>(&tree->gtBoundsChk.gtIndex, fgWalkData);
+ result = fgWalkTreePreRec<computeStack>(&tree->gtBoundsChk.gtArrLen, fgWalkData);
if (result == WALK_ABORT)
{
return result;
@@ -1102,12 +1102,12 @@ Compiler::fgWalkResult Compiler::fgWalkTreePostRec(GenTreePtr* pTree, fgWalkData
#ifdef FEATURE_SIMD
case GT_SIMD_CHK:
#endif // FEATURE_SIMD
- result = fgWalkTreePostRec<computeStack>(&tree->gtBoundsChk.gtArrLen, fgWalkData);
+ result = fgWalkTreePostRec<computeStack>(&tree->gtBoundsChk.gtIndex, fgWalkData);
if (result == WALK_ABORT)
{
return result;
}
- result = fgWalkTreePostRec<computeStack>(&tree->gtBoundsChk.gtIndex, fgWalkData);
+ result = fgWalkTreePostRec<computeStack>(&tree->gtBoundsChk.gtArrLen, fgWalkData);
if (result == WALK_ABORT)
{
return result;
@@ -1301,7 +1301,7 @@ Compiler::fgWalkResult Compiler::fgWalkTreeRec(GenTreePtr* pTree, fgWalkData* fg
}
}
- if (tree->gtGetOp2())
+ if (tree->gtGetOp2IfPresent())
{
result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtOp.gtOp2, fgWalkData);
if (result == WALK_ABORT)
@@ -1446,12 +1446,12 @@ Compiler::fgWalkResult Compiler::fgWalkTreeRec(GenTreePtr* pTree, fgWalkData* fg
#ifdef FEATURE_SIMD
case GT_SIMD_CHK:
#endif // FEATURE_SIMD
- result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtBoundsChk.gtArrLen, fgWalkData);
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtBoundsChk.gtIndex, fgWalkData);
if (result == WALK_ABORT)
{
return result;
}
- result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtBoundsChk.gtIndex, fgWalkData);
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtBoundsChk.gtArrLen, fgWalkData);
if (result == WALK_ABORT)
{
return result;
@@ -2378,8 +2378,8 @@ AGAIN:
#ifdef FEATURE_SIMD
case GT_SIMD_CHK:
#endif // FEATURE_SIMD
- return Compare(op1->gtBoundsChk.gtArrLen, op2->gtBoundsChk.gtArrLen) &&
- Compare(op1->gtBoundsChk.gtIndex, op2->gtBoundsChk.gtIndex) &&
+ return Compare(op1->gtBoundsChk.gtIndex, op2->gtBoundsChk.gtIndex) &&
+ Compare(op1->gtBoundsChk.gtArrLen, op2->gtBoundsChk.gtArrLen) &&
(op1->gtBoundsChk.gtThrowKind == op2->gtBoundsChk.gtThrowKind);
case GT_STORE_DYN_BLK:
@@ -2447,7 +2447,7 @@ AGAIN:
if (kind & GTK_SMPOP)
{
- if (tree->gtGetOp2())
+ if (tree->gtGetOp2IfPresent())
{
if (gtHasRef(tree->gtOp.gtOp1, lclNum, defOnly))
{
@@ -2604,11 +2604,11 @@ AGAIN:
#ifdef FEATURE_SIMD
case GT_SIMD_CHK:
#endif // FEATURE_SIMD
- if (gtHasRef(tree->gtBoundsChk.gtArrLen, lclNum, defOnly))
+ if (gtHasRef(tree->gtBoundsChk.gtIndex, lclNum, defOnly))
{
return true;
}
- if (gtHasRef(tree->gtBoundsChk.gtIndex, lclNum, defOnly))
+ if (gtHasRef(tree->gtBoundsChk.gtArrLen, lclNum, defOnly))
{
return true;
}
@@ -2686,6 +2686,8 @@ bool Compiler::gtHasLocalsWithAddrOp(GenTreePtr tree)
return desc.hasAddrTakenLcl;
}
+#ifdef DEBUG
+
/*****************************************************************************
*
* Helper used to compute hash values for trees.
@@ -2701,11 +2703,6 @@ inline unsigned genTreeHashAdd(unsigned old, void* add)
return genTreeHashAdd(old, (unsigned)(size_t)add);
}
-inline unsigned genTreeHashAdd(unsigned old, unsigned add1, unsigned add2)
-{
- return (old + old / 2) ^ add1 ^ add2;
-}
-
/*****************************************************************************
*
* Given an arbitrary expression tree, compute a hash value for it.
@@ -2900,18 +2897,6 @@ AGAIN:
unsigned hsh1 = gtHashValue(op1);
- /* Special case: addition of two values */
-
- if (GenTree::OperIsCommutative(oper))
- {
- unsigned hsh2 = gtHashValue(op2);
-
- /* Produce a hash that allows swapping the operands */
-
- hash = genTreeHashAdd(hash, hsh1, hsh2);
- goto DONE;
- }
-
/* Add op1's hash to the running value and continue with op2 */
hash = genTreeHashAdd(hash, hsh1);
@@ -3001,8 +2986,8 @@ AGAIN:
#ifdef FEATURE_SIMD
case GT_SIMD_CHK:
#endif // FEATURE_SIMD
- hash = genTreeHashAdd(hash, gtHashValue(tree->gtBoundsChk.gtArrLen));
hash = genTreeHashAdd(hash, gtHashValue(tree->gtBoundsChk.gtIndex));
+ hash = genTreeHashAdd(hash, gtHashValue(tree->gtBoundsChk.gtArrLen));
hash = genTreeHashAdd(hash, tree->gtBoundsChk.gtThrowKind);
break;
@@ -3027,6 +3012,8 @@ DONE:
return hash;
}
+#endif // DEBUG
+
/*****************************************************************************
*
* Given an arbitrary expression tree, attempts to find the set of all local variables
@@ -3194,7 +3181,7 @@ AGAIN:
}
}
- if (tree->gtGetOp2())
+ if (tree->gtGetOp2IfPresent())
{
/* It's a binary operator */
if (!lvaLclVarRefsAccum(tree->gtOp.gtOp1, findPtr, refsPtr, &allVars, &trkdVars))
@@ -3265,12 +3252,12 @@ AGAIN:
case GT_SIMD_CHK:
#endif // FEATURE_SIMD
{
- if (!lvaLclVarRefsAccum(tree->gtBoundsChk.gtArrLen, findPtr, refsPtr, &allVars, &trkdVars))
+ if (!lvaLclVarRefsAccum(tree->gtBoundsChk.gtIndex, findPtr, refsPtr, &allVars, &trkdVars))
{
return false;
}
// Otherwise...
- if (!lvaLclVarRefsAccum(tree->gtBoundsChk.gtIndex, findPtr, refsPtr, &allVars, &trkdVars))
+ if (!lvaLclVarRefsAccum(tree->gtBoundsChk.gtArrLen, findPtr, refsPtr, &allVars, &trkdVars))
{
return false;
}
@@ -3375,6 +3362,10 @@ genTreeOps GenTree::ReverseRelop(genTreeOps relop)
GT_GT, // GT_LE
GT_LT, // GT_GE
GT_LE, // GT_GT
+#ifndef LEGACY_BACKEND
+ GT_TEST_NE, // GT_TEST_EQ
+ GT_TEST_EQ, // GT_TEST_NE
+#endif
};
assert(reverseOps[GT_EQ - GT_EQ] == GT_NE);
@@ -3385,6 +3376,11 @@ genTreeOps GenTree::ReverseRelop(genTreeOps relop)
assert(reverseOps[GT_GE - GT_EQ] == GT_LT);
assert(reverseOps[GT_GT - GT_EQ] == GT_LE);
+#ifndef LEGACY_BACKEND
+ assert(reverseOps[GT_TEST_EQ - GT_EQ] == GT_TEST_NE);
+ assert(reverseOps[GT_TEST_NE - GT_EQ] == GT_TEST_EQ);
+#endif
+
assert(OperIsCompare(relop));
assert(relop >= GT_EQ && (unsigned)(relop - GT_EQ) < sizeof(reverseOps));
@@ -3406,6 +3402,10 @@ genTreeOps GenTree::SwapRelop(genTreeOps relop)
GT_GE, // GT_LE
GT_LE, // GT_GE
GT_LT, // GT_GT
+#ifndef LEGACY_BACKEND
+ GT_TEST_EQ, // GT_TEST_EQ
+ GT_TEST_NE, // GT_TEST_NE
+#endif
};
assert(swapOps[GT_EQ - GT_EQ] == GT_EQ);
@@ -3416,6 +3416,11 @@ genTreeOps GenTree::SwapRelop(genTreeOps relop)
assert(swapOps[GT_GE - GT_EQ] == GT_LE);
assert(swapOps[GT_GT - GT_EQ] == GT_LT);
+#ifndef LEGACY_BACKEND
+ assert(swapOps[GT_TEST_EQ - GT_EQ] == GT_TEST_EQ);
+ assert(swapOps[GT_TEST_NE - GT_EQ] == GT_TEST_NE);
+#endif
+
assert(OperIsCompare(relop));
assert(relop >= GT_EQ && (unsigned)(relop - GT_EQ) < sizeof(swapOps));
@@ -4146,7 +4151,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
unsigned lvl2; // scratch variable
GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr op2 = tree->gtGetOp2IfPresent();
costEx = 0;
costSz = 0;
@@ -5622,17 +5627,17 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
costEx = 4; // cmp reg,reg and jae throw (not taken)
costSz = 7; // jump to cold section
- level = gtSetEvalOrder(tree->gtBoundsChk.gtArrLen);
- costEx += tree->gtBoundsChk.gtArrLen->gtCostEx;
- costSz += tree->gtBoundsChk.gtArrLen->gtCostSz;
+ level = gtSetEvalOrder(tree->gtBoundsChk.gtIndex);
+ costEx += tree->gtBoundsChk.gtIndex->gtCostEx;
+ costSz += tree->gtBoundsChk.gtIndex->gtCostSz;
- lvl2 = gtSetEvalOrder(tree->gtBoundsChk.gtIndex);
+ lvl2 = gtSetEvalOrder(tree->gtBoundsChk.gtArrLen);
if (level < lvl2)
{
level = lvl2;
}
- costEx += tree->gtBoundsChk.gtIndex->gtCostEx;
- costSz += tree->gtBoundsChk.gtIndex->gtCostSz;
+ costEx += tree->gtBoundsChk.gtArrLen->gtCostEx;
+ costSz += tree->gtBoundsChk.gtArrLen->gtCostSz;
break;
@@ -5761,7 +5766,7 @@ void Compiler::gtComputeFPlvls(GenTreePtr tree)
if (kind & GTK_SMPOP)
{
GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr op2 = tree->gtGetOp2IfPresent();
/* Check for some special cases */
@@ -5954,16 +5959,16 @@ void Compiler::gtComputeFPlvls(GenTreePtr tree)
break;
case GT_ARR_BOUNDS_CHECK:
- gtComputeFPlvls(tree->gtBoundsChk.gtArrLen);
gtComputeFPlvls(tree->gtBoundsChk.gtIndex);
+ gtComputeFPlvls(tree->gtBoundsChk.gtArrLen);
noway_assert(!isflt);
break;
-#ifdef DEBUG
default:
+#ifdef DEBUG
noway_assert(!"Unhandled special operator in gtComputeFPlvls()");
- break;
#endif
+ break;
}
DONE:
@@ -6134,14 +6139,14 @@ GenTreePtr* GenTree::gtGetChildPointer(GenTreePtr parent)
#ifdef FEATURE_SIMD
case GT_SIMD_CHK:
#endif // FEATURE_SIMD
- if (this == parent->gtBoundsChk.gtArrLen)
- {
- return &(parent->gtBoundsChk.gtArrLen);
- }
if (this == parent->gtBoundsChk.gtIndex)
{
return &(parent->gtBoundsChk.gtIndex);
}
+ if (this == parent->gtBoundsChk.gtArrLen)
+ {
+ return &(parent->gtBoundsChk.gtArrLen);
+ }
if (this == parent->gtBoundsChk.gtIndRngFailBB)
{
return &(parent->gtBoundsChk.gtIndRngFailBB);
@@ -6787,6 +6792,57 @@ GenTreePtr Compiler::gtNewOneConNode(var_types type)
}
}
+#ifdef FEATURE_SIMD
+//---------------------------------------------------------------------
+// gtNewSIMDVectorZero: create a GT_SIMD node for Vector<T>.Zero
+//
+// Arguments:
+// simdType - simd vector type
+// baseType - element type of vector
+// size - size of vector in bytes
+GenTreePtr Compiler::gtNewSIMDVectorZero(var_types simdType, var_types baseType, unsigned size)
+{
+ baseType = genActualType(baseType);
+ GenTree* initVal = gtNewZeroConNode(baseType);
+ initVal->gtType = baseType;
+ return gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, baseType, size);
+}
+
+//---------------------------------------------------------------------
+// gtNewSIMDVectorOne: create a GT_SIMD node for Vector<T>.One
+//
+// Arguments:
+// simdType - simd vector type
+// baseType - element type of vector
+// size - size of vector in bytes
+GenTreePtr Compiler::gtNewSIMDVectorOne(var_types simdType, var_types baseType, unsigned size)
+{
+ GenTree* initVal;
+ if (varTypeIsSmallInt(baseType))
+ {
+ unsigned baseSize = genTypeSize(baseType);
+ int val;
+ if (baseSize == 1)
+ {
+ val = 0x01010101;
+ }
+ else
+ {
+ val = 0x00010001;
+ }
+ initVal = gtNewIconNode(val);
+ }
+ else
+ {
+ initVal = gtNewOneConNode(baseType);
+ }
+
+ baseType = genActualType(baseType);
+ initVal->gtType = baseType;
+ return gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, baseType, size);
+}
+#endif // FEATURE_SIMD
+
GenTreeCall* Compiler::gtNewIndCallNode(GenTreePtr addr, var_types type, GenTreeArgList* args, IL_OFFSETX ilOffset)
{
return gtNewCallNode(CT_INDIRECT, (CORINFO_METHOD_HANDLE)addr, type, args, ilOffset);
@@ -7525,9 +7581,7 @@ void Compiler::gtBlockOpInit(GenTreePtr result, GenTreePtr dst, GenTreePtr srcOr
if (dst->OperIsLocal() && varTypeIsStruct(dst))
{
- unsigned lclNum = dst->AsLclVarCommon()->GetLclNum();
- LclVarDsc* lclVarDsc = &lvaTable[lclNum];
- lclVarDsc->lvUsedInSIMDIntrinsic = true;
+ setLclRelatedToSIMDIntrinsic(dst);
}
}
}
@@ -8031,7 +8085,7 @@ GenTreePtr Compiler::gtCloneExpr(
case GT_SIMD:
{
GenTreeSIMD* simdOp = tree->AsSIMD();
- copy = gtNewSIMDNode(simdOp->TypeGet(), simdOp->gtGetOp1(), simdOp->gtGetOp2(),
+ copy = gtNewSIMDNode(simdOp->TypeGet(), simdOp->gtGetOp1(), simdOp->gtGetOp2IfPresent(),
simdOp->gtSIMDIntrinsicID, simdOp->gtSIMDBaseType, simdOp->gtSIMDSize);
}
break;
@@ -8079,7 +8133,7 @@ GenTreePtr Compiler::gtCloneExpr(
}
}
- if (tree->gtGetOp2())
+ if (tree->gtGetOp2IfPresent())
{
copy->gtOp.gtOp2 = gtCloneExpr(tree->gtOp.gtOp2, addFlags, deepVarNum, deepVarVal);
}
@@ -8130,7 +8184,7 @@ GenTreePtr Compiler::gtCloneExpr(
{
copy->gtFlags |= (copy->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
}
- if (copy->gtGetOp2() != nullptr)
+ if (copy->gtGetOp2IfPresent() != nullptr)
{
copy->gtFlags |= (copy->gtGetOp2()->gtFlags & GTF_ALL_EFFECT);
}
@@ -8290,8 +8344,8 @@ GenTreePtr Compiler::gtCloneExpr(
#endif // FEATURE_SIMD
copy = new (this, oper)
GenTreeBoundsChk(oper, tree->TypeGet(),
- gtCloneExpr(tree->gtBoundsChk.gtArrLen, addFlags, deepVarNum, deepVarVal),
gtCloneExpr(tree->gtBoundsChk.gtIndex, addFlags, deepVarNum, deepVarVal),
+ gtCloneExpr(tree->gtBoundsChk.gtArrLen, addFlags, deepVarNum, deepVarVal),
tree->gtBoundsChk.gtThrowKind);
break;
@@ -9000,9 +9054,9 @@ GenTreePtr GenTree::GetChild(unsigned childNum)
switch (childNum)
{
case 0:
- return AsBoundsChk()->gtArrLen;
- case 1:
return AsBoundsChk()->gtIndex;
+ case 1:
+ return AsBoundsChk()->gtArrLen;
default:
unreached();
}
@@ -9176,9 +9230,9 @@ GenTree** GenTreeUseEdgeIterator::GetNextUseEdge() const
switch (m_state)
{
case 0:
- return &m_node->AsBoundsChk()->gtArrLen;
- case 1:
return &m_node->AsBoundsChk()->gtIndex;
+ case 1:
+ return &m_node->AsBoundsChk()->gtArrLen;
default:
return nullptr;
}
@@ -10208,6 +10262,10 @@ void Compiler::gtDispNode(GenTreePtr tree, IndentStack* indentStack, __in __in_z
case GT_LE:
case GT_GE:
case GT_GT:
+#ifndef LEGACY_BACKEND
+ case GT_TEST_EQ:
+ case GT_TEST_NE:
+#endif
if (tree->gtFlags & GTF_RELOP_NAN_UN)
{
printf("N");
@@ -10226,12 +10284,6 @@ void Compiler::gtDispNode(GenTreePtr tree, IndentStack* indentStack, __in __in_z
--msgLength;
break;
}
- if (tree->gtFlags & GTF_RELOP_SMALL)
- {
- printf("S");
- --msgLength;
- break;
- }
goto DASH;
default:
@@ -10694,7 +10746,17 @@ void Compiler::gtDispConst(GenTree* tree)
case GT_CNS_INT:
if (tree->IsIconHandle(GTF_ICON_STR_HDL))
{
- printf(" 0x%X \"%S\"", dspPtr(tree->gtIntCon.gtIconVal), eeGetCPString(tree->gtIntCon.gtIconVal));
+ const wchar_t* str = eeGetCPString(tree->gtIntCon.gtIconVal);
+ if (str != nullptr)
+ {
+ printf(" 0x%X \"%S\"", dspPtr(tree->gtIntCon.gtIconVal), str);
+ }
+ else
+ {
+ // Note that eGetCPString isn't currently implemented on Linux/ARM
+ // and instead always returns nullptr
+ printf(" 0x%X [ICON_STR_HDL]", dspPtr(tree->gtIntCon.gtIconVal));
+ }
}
else
{
@@ -11255,7 +11317,7 @@ void Compiler::gtDispTree(GenTreePtr tree,
{
if (!topOnly)
{
- if (tree->gtGetOp2())
+ if (tree->gtGetOp2IfPresent())
{
// Label the childMsgs of the GT_COLON operator
// op2 is the then part
@@ -11670,8 +11732,8 @@ void Compiler::gtDispTree(GenTreePtr tree,
printf("\n");
if (!topOnly)
{
- gtDispChild(tree->gtBoundsChk.gtArrLen, indentStack, IIArc, nullptr, topOnly);
- gtDispChild(tree->gtBoundsChk.gtIndex, indentStack, IIArcBottom, nullptr, topOnly);
+ gtDispChild(tree->gtBoundsChk.gtIndex, indentStack, IIArc, nullptr, topOnly);
+ gtDispChild(tree->gtBoundsChk.gtArrLen, indentStack, IIArcBottom, nullptr, topOnly);
}
break;
@@ -11955,6 +12017,10 @@ void Compiler::gtDispLIRNode(GenTree* node)
// 49 spaces for alignment
printf("%-49s", "");
+#ifdef FEATURE_SET_FLAGS
+ // additional flag enlarges the flag field by one character
+ printf(" ");
+#endif
indentStack.Push(operandArc);
indentStack.print();
@@ -12615,7 +12681,7 @@ GenTreePtr Compiler::gtFoldExprConst(GenTreePtr tree)
assert(kind & (GTK_UNOP | GTK_BINOP));
GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr op2 = tree->gtGetOp2IfPresent();
if (!opts.OptEnabled(CLFLG_CONSTANTFOLD))
{
@@ -14411,12 +14477,14 @@ GenTreePtr Compiler::gtBuildCommaList(GenTreePtr list, GenTreePtr expr)
result->gtFlags |= (list->gtFlags & GTF_ALL_EFFECT);
result->gtFlags |= (expr->gtFlags & GTF_ALL_EFFECT);
- // 'list' and 'expr' should have valuenumbers defined for both or for neither one
- noway_assert(list->gtVNPair.BothDefined() == expr->gtVNPair.BothDefined());
+ // 'list' and 'expr' should have valuenumbers defined for both or for neither one (unless we are remorphing,
+ // in which case a prior transform involving either node may have discarded or otherwise invalidated the value
+ // numbers).
+ assert((list->gtVNPair.BothDefined() == expr->gtVNPair.BothDefined()) || !fgGlobalMorph);
// Set the ValueNumber 'gtVNPair' for the new GT_COMMA node
//
- if (expr->gtVNPair.BothDefined())
+ if (list->gtVNPair.BothDefined() && expr->gtVNPair.BothDefined())
{
// The result of a GT_COMMA node is op2, the normal value number is op2vnp
// But we also need to include the union of side effects from op1 and op2.
@@ -14505,7 +14573,7 @@ void Compiler::gtExtractSideEffList(GenTreePtr expr,
if (kind & GTK_SMPOP)
{
GenTreePtr op1 = expr->gtOp.gtOp1;
- GenTreePtr op2 = expr->gtGetOp2();
+ GenTreePtr op2 = expr->gtGetOp2IfPresent();
if (flags & GTF_EXCEPT)
{
@@ -14589,8 +14657,8 @@ void Compiler::gtExtractSideEffList(GenTreePtr expr,
#endif // FEATURE_SIMD
)
{
- gtExtractSideEffList(expr->AsBoundsChk()->gtArrLen, pList, flags);
gtExtractSideEffList(expr->AsBoundsChk()->gtIndex, pList, flags);
+ gtExtractSideEffList(expr->AsBoundsChk()->gtArrLen, pList, flags);
}
if (expr->OperGet() == GT_DYN_BLK || expr->OperGet() == GT_STORE_DYN_BLK)
@@ -15046,7 +15114,6 @@ BasicBlock* Compiler::bbNewBasicBlock(BBjumpKinds jumpKind)
{
VarSetOps::AssignNoCopy(this, block->bbVarUse, VarSetOps::MakeEmpty(this));
VarSetOps::AssignNoCopy(this, block->bbVarDef, VarSetOps::MakeEmpty(this));
- VarSetOps::AssignNoCopy(this, block->bbVarTmp, VarSetOps::MakeEmpty(this));
VarSetOps::AssignNoCopy(this, block->bbLiveIn, VarSetOps::MakeEmpty(this));
VarSetOps::AssignNoCopy(this, block->bbLiveOut, VarSetOps::MakeEmpty(this));
VarSetOps::AssignNoCopy(this, block->bbScope, VarSetOps::MakeEmpty(this));
@@ -15055,20 +15122,22 @@ BasicBlock* Compiler::bbNewBasicBlock(BBjumpKinds jumpKind)
{
VarSetOps::AssignNoCopy(this, block->bbVarUse, VarSetOps::UninitVal());
VarSetOps::AssignNoCopy(this, block->bbVarDef, VarSetOps::UninitVal());
- VarSetOps::AssignNoCopy(this, block->bbVarTmp, VarSetOps::UninitVal());
VarSetOps::AssignNoCopy(this, block->bbLiveIn, VarSetOps::UninitVal());
VarSetOps::AssignNoCopy(this, block->bbLiveOut, VarSetOps::UninitVal());
VarSetOps::AssignNoCopy(this, block->bbScope, VarSetOps::UninitVal());
}
- block->bbHeapUse = false;
- block->bbHeapDef = false;
- block->bbHeapLiveIn = false;
- block->bbHeapLiveOut = false;
+ block->bbMemoryUse = emptyMemoryKindSet;
+ block->bbMemoryDef = emptyMemoryKindSet;
+ block->bbMemoryLiveIn = emptyMemoryKindSet;
+ block->bbMemoryLiveOut = emptyMemoryKindSet;
- block->bbHeapSsaPhiFunc = nullptr;
- block->bbHeapSsaNumIn = 0;
- block->bbHeapSsaNumOut = 0;
+ for (MemoryKind memoryKind : allMemoryKinds())
+ {
+ block->bbMemorySsaPhiFunc[memoryKind] = nullptr;
+ block->bbMemorySsaNumIn[memoryKind] = 0;
+ block->bbMemorySsaNumOut[memoryKind] = 0;
+ }
// Make sure we reserve a NOT_IN_LOOP value that isn't a legal table index.
static_assert_no_msg(MAX_LOOP_NUM < BasicBlock::NOT_IN_LOOP);
@@ -15717,18 +15786,21 @@ unsigned GenTree::IsLclVarUpdateTree(GenTree** pOtherTree, genTreeOps* pOper)
return lclNum;
}
-// return true if this tree node is a subcomponent of parent for codegen purposes
-// (essentially, will be rolled into the same instruction)
-// Note that this method relies upon the value of gtRegNum field to determine
-// if the treenode is contained or not. Therefore you can not call this method
-// until after the LSRA phase has allocated physical registers to the treenodes.
+//------------------------------------------------------------------------
+// isContained: check whether this tree node is a subcomponent of its parent for codegen purposes
+//
+// Return Value:
+// Returns true if there is no code generated explicitly for this node.
+// Essentially, it will be rolled into the code generation for the parent.
+//
+// Assumptions:
+// This method relies upon the value of gtRegNum field to determine whether the tree node
+// is contained.
+// Therefore you can not call this method until after the LSRA phase has allocated physical
+// registers to the treenodes.
+//
bool GenTree::isContained() const
{
- if (isContainedSpillTemp())
- {
- return true;
- }
-
if (gtHasReg())
{
return false;
@@ -15747,7 +15819,6 @@ bool GenTree::isContained() const
return false;
}
- // TODO-Cleanup : this is not clean, would be nice to have some way of marking this.
switch (OperGet())
{
case GT_STOREIND:
@@ -16253,8 +16324,15 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree)
case GT_ASG:
structHnd = gtGetStructHandleIfPresent(tree->gtGetOp1());
break;
- case GT_LCL_VAR:
case GT_LCL_FLD:
+#ifdef FEATURE_SIMD
+ if (varTypeIsSIMD(tree))
+ {
+ structHnd = gtGetStructHandleForSIMD(tree->gtType, TYP_FLOAT);
+ }
+#endif
+ break;
+ case GT_LCL_VAR:
structHnd = lvaTable[tree->AsLclVarCommon()->gtLclNum].lvVerTypeInfo.GetClassHandle();
break;
case GT_RETURN:
@@ -16792,15 +16870,8 @@ bool FieldSeqNode::IsPseudoField()
GenTreeSIMD* Compiler::gtNewSIMDNode(
var_types type, GenTreePtr op1, SIMDIntrinsicID simdIntrinsicID, var_types baseType, unsigned size)
{
- // TODO-CQ: An operand may be a GT_OBJ(GT_ADDR(GT_LCL_VAR))), in which case it should be
- // marked lvUsedInSIMDIntrinsic.
assert(op1 != nullptr);
- if (op1->OperGet() == GT_LCL_VAR)
- {
- unsigned lclNum = op1->AsLclVarCommon()->GetLclNum();
- LclVarDsc* lclVarDsc = &lvaTable[lclNum];
- lclVarDsc->lvUsedInSIMDIntrinsic = true;
- }
+ SetOpLclRelatedToSIMDIntrinsic(op1);
return new (this, GT_SIMD) GenTreeSIMD(type, op1, simdIntrinsicID, baseType, size);
}
@@ -16808,24 +16879,34 @@ GenTreeSIMD* Compiler::gtNewSIMDNode(
GenTreeSIMD* Compiler::gtNewSIMDNode(
var_types type, GenTreePtr op1, GenTreePtr op2, SIMDIntrinsicID simdIntrinsicID, var_types baseType, unsigned size)
{
- // TODO-CQ: An operand may be a GT_OBJ(GT_ADDR(GT_LCL_VAR))), in which case it should be
- // marked lvUsedInSIMDIntrinsic.
assert(op1 != nullptr);
- if (op1->OperIsLocal())
+ SetOpLclRelatedToSIMDIntrinsic(op1);
+ if (op2 != nullptr)
{
- unsigned lclNum = op1->AsLclVarCommon()->GetLclNum();
- LclVarDsc* lclVarDsc = &lvaTable[lclNum];
- lclVarDsc->lvUsedInSIMDIntrinsic = true;
+ SetOpLclRelatedToSIMDIntrinsic(op2);
}
- if (op2 != nullptr && op2->OperIsLocal())
+ return new (this, GT_SIMD) GenTreeSIMD(type, op1, op2, simdIntrinsicID, baseType, size);
+}
+
+//-------------------------------------------------------------------
+// SetOpLclRelatedToSIMDIntrinsic: Determine if the tree has a local var that needs to be set
+// as used by a SIMD intrinsic, and if so, set that local var appropriately.
+//
+// Arguments:
+// op - The tree, to be an operand of a new GT_SIMD node, to check.
+//
+void Compiler::SetOpLclRelatedToSIMDIntrinsic(GenTreePtr op)
+{
+ if (op->OperIsLocal())
{
- unsigned lclNum = op2->AsLclVarCommon()->GetLclNum();
- LclVarDsc* lclVarDsc = &lvaTable[lclNum];
- lclVarDsc->lvUsedInSIMDIntrinsic = true;
+ setLclRelatedToSIMDIntrinsic(op);
+ }
+ else if ((op->OperGet() == GT_OBJ) && (op->gtOp.gtOp1->OperGet() == GT_ADDR) &&
+ op->gtOp.gtOp1->gtOp.gtOp1->OperIsLocal())
+ {
+ setLclRelatedToSIMDIntrinsic(op->gtOp.gtOp1->gtOp.gtOp1);
}
-
- return new (this, GT_SIMD) GenTreeSIMD(type, op1, op2, simdIntrinsicID, baseType, size);
}
bool GenTree::isCommutativeSIMDIntrinsic()
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
index 4611d35..0ea8321 100644
--- a/src/jit/gentree.h
+++ b/src/jit/gentree.h
@@ -566,7 +566,7 @@ public:
bool isContainedIntOrIImmed() const
{
- return isContained() && IsCnsIntOrI() && !isContainedSpillTemp();
+ return isContained() && IsCnsIntOrI() && !isUsedFromSpillTemp();
}
bool isContainedFltOrDblImmed() const
@@ -579,28 +579,34 @@ public:
return OperGet() == GT_LCL_FLD || OperGet() == GT_STORE_LCL_FLD;
}
- bool isContainedLclField() const
+ bool isUsedFromSpillTemp() const;
+
+ // Indicates whether it is a memory op.
+ // Right now it includes Indir and LclField ops.
+ bool isMemoryOp() const
{
- return isContained() && isLclField();
+ return isIndir() || isLclField();
}
- bool isContainedLclVar() const
+ bool isUsedFromMemory() const
{
- return isContained() && (OperGet() == GT_LCL_VAR);
+ return ((isContained() && (isMemoryOp() || (OperGet() == GT_LCL_VAR) || (OperGet() == GT_CNS_DBL))) ||
+ isUsedFromSpillTemp());
}
- bool isContainedSpillTemp() const;
+ bool isLclVarUsedFromMemory() const
+ {
+ return (OperGet() == GT_LCL_VAR) && (isContained() || isUsedFromSpillTemp());
+ }
- // Indicates whether it is a memory op.
- // Right now it includes Indir and LclField ops.
- bool isMemoryOp() const
+ bool isLclFldUsedFromMemory() const
{
- return isIndir() || isLclField();
+ return isLclField() && (isContained() || isUsedFromSpillTemp());
}
- bool isContainedMemoryOp() const
+ bool isUsedFromReg() const
{
- return (isContained() && isMemoryOp()) || isContainedLclVar() || isContainedSpillTemp();
+ return !isContained() && !isUsedFromSpillTemp();
}
regNumber GetRegNum() const
@@ -903,8 +909,6 @@ public:
#define GTF_RELOP_NAN_UN 0x80000000 // GT_<relop> -- Is branch taken if ops are NaN?
#define GTF_RELOP_JMP_USED 0x40000000 // GT_<relop> -- result of compare used for jump or ?:
#define GTF_RELOP_QMARK 0x20000000 // GT_<relop> -- the node is the condition for ?:
-#define GTF_RELOP_SMALL 0x10000000 // GT_<relop> -- We should use a byte or short sized compare (op1->gtType
- // is the small type)
#define GTF_RELOP_ZTT 0x08000000 // GT_<relop> -- Loop test cloned for converting while-loops into do-while
// with explicit "loop test" in the header block.
@@ -1073,6 +1077,17 @@ public:
}
}
+ bool OperIs(genTreeOps oper)
+ {
+ return OperGet() == oper;
+ }
+
+ template <typename... T>
+ bool OperIs(genTreeOps oper, T... rest)
+ {
+ return OperIs(oper) || OperIs(rest...);
+ }
+
static bool OperIsConst(genTreeOps gtOper)
{
return (OperKind(gtOper) & GTK_CONST) != 0;
@@ -1588,8 +1603,14 @@ public:
inline GenTreePtr gtGetOp1();
+ // Directly return op2. Asserts the node is binary. Might return nullptr if the binary node allows
+ // a nullptr op2, such as GT_LIST. This is more efficient than gtGetOp2IfPresent() if you know what
+ // node type you have.
inline GenTreePtr gtGetOp2();
+ // The returned pointer might be nullptr if the node is not binary, or if non-null op2 is not required.
+ inline GenTreePtr gtGetOp2IfPresent();
+
// Given a tree node, if this is a child of that node, return the pointer to the child node so that it
// can be modified; otherwise, return null.
GenTreePtr* gtGetChildPointer(GenTreePtr parent);
@@ -3248,43 +3269,52 @@ struct GenTreeCall final : public GenTree
#endif
}
-#define GTF_CALL_M_EXPLICIT_TAILCALL \
- 0x0001 // GT_CALL -- the call is "tail" prefixed and importer has performed tail call checks
-#define GTF_CALL_M_TAILCALL 0x0002 // GT_CALL -- the call is a tailcall
-#define GTF_CALL_M_VARARGS 0x0004 // GT_CALL -- the call uses varargs ABI
-#define GTF_CALL_M_RETBUFFARG 0x0008 // GT_CALL -- first parameter is the return buffer argument
-#define GTF_CALL_M_DELEGATE_INV 0x0010 // GT_CALL -- call to Delegate.Invoke
-#define GTF_CALL_M_NOGCCHECK 0x0020 // GT_CALL -- not a call for computing full interruptability
-#define GTF_CALL_M_SPECIAL_INTRINSIC 0x0040 // GT_CALL -- function that could be optimized as an intrinsic
- // in special cases. Used to optimize fast way out in morphing
-#define GTF_CALL_M_UNMGD_THISCALL \
- 0x0080 // "this" pointer (first argument) should be enregistered (only for GTF_CALL_UNMANAGED)
-#define GTF_CALL_M_VIRTSTUB_REL_INDIRECT \
- 0x0080 // the virtstub is indirected through a relative address (only for GTF_CALL_VIRT_STUB)
-#define GTF_CALL_M_NONVIRT_SAME_THIS \
- 0x0080 // callee "this" pointer is equal to caller this pointer (only for GTF_CALL_NONVIRT)
-#define GTF_CALL_M_FRAME_VAR_DEATH 0x0100 // GT_CALL -- the compLvFrameListRoot variable dies here (last use)
+// clang-format off
+
+#define GTF_CALL_M_EXPLICIT_TAILCALL 0x00000001 // GT_CALL -- the call is "tail" prefixed and
+ // importer has performed tail call checks
+#define GTF_CALL_M_TAILCALL 0x00000002 // GT_CALL -- the call is a tailcall
+#define GTF_CALL_M_VARARGS 0x00000004 // GT_CALL -- the call uses varargs ABI
+#define GTF_CALL_M_RETBUFFARG 0x00000008 // GT_CALL -- first parameter is the return buffer argument
+#define GTF_CALL_M_DELEGATE_INV 0x00000010 // GT_CALL -- call to Delegate.Invoke
+#define GTF_CALL_M_NOGCCHECK 0x00000020 // GT_CALL -- not a call for computing full interruptability
+#define GTF_CALL_M_SPECIAL_INTRINSIC 0x00000040 // GT_CALL -- function that could be optimized as an intrinsic
+ // in special cases. Used to optimize fast way out in morphing
+#define GTF_CALL_M_UNMGD_THISCALL 0x00000080 // GT_CALL -- "this" pointer (first argument)
+ // should be enregistered (only for GTF_CALL_UNMANAGED)
+#define GTF_CALL_M_VIRTSTUB_REL_INDIRECT 0x00000080 // the virtstub is indirected through
+ // a relative address (only for GTF_CALL_VIRT_STUB)
+#define GTF_CALL_M_NONVIRT_SAME_THIS 0x00000080 // GT_CALL -- callee "this" pointer is
+ // equal to caller this pointer (only for GTF_CALL_NONVIRT)
+#define GTF_CALL_M_FRAME_VAR_DEATH 0x00000100 // GT_CALL -- the compLvFrameListRoot variable dies here (last use)
#ifndef LEGACY_BACKEND
-#define GTF_CALL_M_TAILCALL_VIA_HELPER 0x0200 // GT_CALL -- call is a tail call dispatched via tail call JIT helper.
-#endif // !LEGACY_BACKEND
+#define GTF_CALL_M_TAILCALL_VIA_HELPER 0x00000200 // GT_CALL -- call is a tail call dispatched via tail call JIT helper.
+#endif
#if FEATURE_TAILCALL_OPT
-#define GTF_CALL_M_IMPLICIT_TAILCALL \
- 0x0400 // GT_CALL -- call is an opportunistic tail call and importer has performed tail call checks
-#define GTF_CALL_M_TAILCALL_TO_LOOP \
- 0x0800 // GT_CALL -- call is a fast recursive tail call that can be converted into a loop
+#define GTF_CALL_M_IMPLICIT_TAILCALL 0x00000400 // GT_CALL -- call is an opportunistic
+ // tail call and importer has performed tail call checks
+#define GTF_CALL_M_TAILCALL_TO_LOOP 0x00000800 // GT_CALL -- call is a fast recursive tail call
+ // that can be converted into a loop
#endif
-#define GTF_CALL_M_PINVOKE 0x1000 // GT_CALL -- call is a pinvoke. This mirrors VM flag CORINFO_FLG_PINVOKE.
- // A call marked as Pinvoke is not necessarily a GT_CALL_UNMANAGED. For e.g.
- // an IL Stub dynamically generated for a PInvoke declaration is flagged as
- // a Pinvoke but not as an unmanaged call. See impCheckForPInvokeCall() to
- // know when these flags are set.
+#define GTF_CALL_M_PINVOKE 0x00001000 // GT_CALL -- call is a pinvoke. This mirrors VM flag CORINFO_FLG_PINVOKE.
+ // A call marked as Pinvoke is not necessarily a GT_CALL_UNMANAGED. For e.g.
+ // an IL Stub dynamically generated for a PInvoke declaration is flagged as
+ // a Pinvoke but not as an unmanaged call. See impCheckForPInvokeCall() to
+ // know when these flags are set.
+
+#define GTF_CALL_M_R2R_REL_INDIRECT 0x00002000 // GT_CALL -- ready to run call is indirected through a relative address
+#define GTF_CALL_M_DOES_NOT_RETURN 0x00004000 // GT_CALL -- call does not return
+#define GTF_CALL_M_SECURE_DELEGATE_INV 0x00008000 // GT_CALL -- call is in secure delegate
+#define GTF_CALL_M_FAT_POINTER_CHECK 0x00010000 // GT_CALL -- CoreRT managed calli needs transformation, that checks
+ // special bit in calli address. If it is set, then it is necessary
+ // to restore real function address and load hidden argument
+ // as the first argument for calli. It is CoreRT replacement for instantiating
+ // stubs, because executable code cannot be generated at runtime.
-#define GTF_CALL_M_R2R_REL_INDIRECT 0x2000 // GT_CALL -- ready to run call is indirected through a relative address
-#define GTF_CALL_M_DOES_NOT_RETURN 0x4000 // GT_CALL -- call does not return
-#define GTF_CALL_M_SECURE_DELEGATE_INV 0x8000 // GT_CALL -- call is in secure delegate
+ // clang-format on
bool IsUnmanaged() const
{
@@ -3482,9 +3512,24 @@ struct GenTreeCall final : public GenTree
return (gtCallMoreFlags & GTF_CALL_M_DOES_NOT_RETURN) != 0;
}
+ bool IsFatPointerCandidate() const
+ {
+ return (gtCallMoreFlags & GTF_CALL_M_FAT_POINTER_CHECK) != 0;
+ }
+
bool IsPure(Compiler* compiler) const;
- unsigned short gtCallMoreFlags; // in addition to gtFlags
+ void ClearFatPointerCandidate()
+ {
+ gtCallMoreFlags &= ~GTF_CALL_M_FAT_POINTER_CHECK;
+ }
+
+ void SetFatPointerCandidate()
+ {
+ gtCallMoreFlags |= GTF_CALL_M_FAT_POINTER_CHECK;
+ }
+
+ unsigned gtCallMoreFlags; // in addition to gtFlags
unsigned char gtCallType : 3; // value from the gtCallTypes enumeration
unsigned char gtReturnType : 5; // exact return type
@@ -3764,8 +3809,8 @@ public:
struct GenTreeBoundsChk : public GenTree
{
- GenTreePtr gtArrLen; // An expression for the length of the array being indexed.
GenTreePtr gtIndex; // The index expression.
+ GenTreePtr gtArrLen; // An expression for the length of the array being indexed.
GenTreePtr gtIndRngFailBB; // Label to jump to for array-index-out-of-range
SpecialCodeKind gtThrowKind; // Kind of throw block to branch to on failure
@@ -3775,10 +3820,10 @@ struct GenTreeBoundsChk : public GenTree
optimizer has a chance of eliminating some of the rng checks */
unsigned gtStkDepth;
- GenTreeBoundsChk(genTreeOps oper, var_types type, GenTreePtr arrLen, GenTreePtr index, SpecialCodeKind kind)
+ GenTreeBoundsChk(genTreeOps oper, var_types type, GenTreePtr index, GenTreePtr arrLen, SpecialCodeKind kind)
: GenTree(oper, type)
- , gtArrLen(arrLen)
, gtIndex(index)
+ , gtArrLen(arrLen)
, gtIndRngFailBB(nullptr)
, gtThrowKind(kind)
, gtStkDepth(0)
@@ -4531,6 +4576,9 @@ struct GenTreePhiArg : public GenTreeLclVarCommon
struct GenTreePutArgStk : public GenTreeUnOp
{
unsigned gtSlotNum; // Slot number of the argument to be passed on stack
+#if defined(UNIX_X86_ABI)
+ unsigned gtPadAlign; // Number of padding slots for stack alignment
+#endif
#if FEATURE_FASTTAILCALL
bool putInIncomingArgArea; // Whether this arg needs to be placed in incoming arg area.
@@ -4546,6 +4594,9 @@ struct GenTreePutArgStk : public GenTreeUnOp
DEBUGARG(bool largeNode = false))
: GenTreeUnOp(oper, type DEBUGARG(largeNode))
, gtSlotNum(slotNum)
+#if defined(UNIX_X86_ABI)
+ , gtPadAlign(0)
+#endif
, putInIncomingArgArea(_putInIncomingArgArea)
#ifdef FEATURE_PUT_STRUCT_ARG_STK
, gtPutArgStkKind(Kind::Invalid)
@@ -4567,6 +4618,9 @@ struct GenTreePutArgStk : public GenTreeUnOp
DEBUGARG(bool largeNode = false))
: GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode))
, gtSlotNum(slotNum)
+#if defined(UNIX_X86_ABI)
+ , gtPadAlign(0)
+#endif
, putInIncomingArgArea(_putInIncomingArgArea)
#ifdef FEATURE_PUT_STRUCT_ARG_STK
, gtPutArgStkKind(Kind::Invalid)
@@ -4588,6 +4642,9 @@ struct GenTreePutArgStk : public GenTreeUnOp
DEBUGARG(GenTreePtr callNode = NULL) DEBUGARG(bool largeNode = false))
: GenTreeUnOp(oper, type DEBUGARG(largeNode))
, gtSlotNum(slotNum)
+#if defined(UNIX_X86_ABI)
+ , gtPadAlign(0)
+#endif
#ifdef FEATURE_PUT_STRUCT_ARG_STK
, gtPutArgStkKind(Kind::Invalid)
, gtNumSlots(numSlots)
@@ -4607,6 +4664,9 @@ struct GenTreePutArgStk : public GenTreeUnOp
DEBUGARG(GenTreePtr callNode = NULL) DEBUGARG(bool largeNode = false))
: GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode))
, gtSlotNum(slotNum)
+#if defined(UNIX_X86_ABI)
+ , gtPadAlign(0)
+#endif
#ifdef FEATURE_PUT_STRUCT_ARG_STK
, gtPutArgStkKind(Kind::Invalid)
, gtNumSlots(numSlots)
@@ -4625,6 +4685,18 @@ struct GenTreePutArgStk : public GenTreeUnOp
return gtSlotNum * TARGET_POINTER_SIZE;
}
+#if defined(UNIX_X86_ABI)
+ unsigned getArgPadding()
+ {
+ return gtPadAlign;
+ }
+
+ void setArgPadding(unsigned padAlign)
+ {
+ gtPadAlign = padAlign;
+ }
+#endif
+
#ifdef FEATURE_PUT_STRUCT_ARG_STK
unsigned getArgSize()
{
@@ -4968,7 +5040,7 @@ inline bool GenTree::IsIntegralConstVector(ssize_t constVal)
if ((gtOper == GT_SIMD) && (gtSIMD.gtSIMDIntrinsicID == SIMDIntrinsicInit) && gtGetOp1()->IsIntegralConst(constVal))
{
assert(varTypeIsIntegral(gtSIMD.gtSIMDBaseType));
- assert(gtGetOp2() == nullptr);
+ assert(gtGetOp2IfPresent() == nullptr);
return true;
}
#endif
@@ -5149,12 +5221,24 @@ inline bool GenTree::RequiresNonNullOp2(genTreeOps oper)
inline GenTreePtr GenTree::gtGetOp2()
{
+ assert(OperIsBinary());
+
+ GenTreePtr op2 = gtOp.gtOp2;
+
+ // Only allow null op2 if the node type allows it, e.g. GT_LIST.
+ assert((op2 != nullptr) || !RequiresNonNullOp2(gtOper));
+
+ return op2;
+}
+
+inline GenTreePtr GenTree::gtGetOp2IfPresent()
+{
/* gtOp.gtOp2 is only valid for GTK_BINOP nodes. */
GenTreePtr op2 = OperIsBinary() ? gtOp.gtOp2 : nullptr;
// This documents the genTreeOps for which gtOp.gtOp2 cannot be nullptr.
- // This helps prefix in its analyis of code which calls gtGetOp2()
+ // This helps prefix in its analysis of code which calls gtGetOp2()
assert((op2 != nullptr) || !RequiresNonNullOp2(gtOper));
@@ -5319,10 +5403,10 @@ inline bool GenTreeBlk::HasGCPtr()
return false;
}
-inline bool GenTree::isContainedSpillTemp() const
+inline bool GenTree::isUsedFromSpillTemp() const
{
#if !defined(LEGACY_BACKEND)
- // If spilled and no reg at use, then it is treated as contained.
+ // If spilled and no reg at use, then it is used from the spill temp location rather than being reloaded.
if (((gtFlags & GTF_SPILLED) != 0) && ((gtFlags & GTF_NOREG_AT_USE) != 0))
{
return true;
diff --git a/src/jit/gschecks.cpp b/src/jit/gschecks.cpp
index 9255d8f..e4f1c25 100644
--- a/src/jit/gschecks.cpp
+++ b/src/jit/gschecks.cpp
@@ -409,7 +409,8 @@ void Compiler::gsParamsToShadows()
lvaTable[shadowVar].lvUsedInSIMDIntrinsic = varDsc->lvUsedInSIMDIntrinsic;
if (varDsc->lvSIMDType)
{
- lvaTable[shadowVar].lvBaseType = varDsc->lvBaseType;
+ lvaTable[shadowVar].lvExactSize = varDsc->lvExactSize;
+ lvaTable[shadowVar].lvBaseType = varDsc->lvBaseType;
}
#endif
lvaTable[shadowVar].lvRegStruct = varDsc->lvRegStruct;
diff --git a/src/jit/gtlist.h b/src/jit/gtlist.h
index 92265a7..2d9255b 100644
--- a/src/jit/gtlist.h
+++ b/src/jit/gtlist.h
@@ -145,6 +145,17 @@ GTNODE(LT , "<" ,GenTreeOp ,0,GTK_BINOP|GTK_RE
GTNODE(LE , "<=" ,GenTreeOp ,0,GTK_BINOP|GTK_RELOP)
GTNODE(GE , ">=" ,GenTreeOp ,0,GTK_BINOP|GTK_RELOP)
GTNODE(GT , ">" ,GenTreeOp ,0,GTK_BINOP|GTK_RELOP)
+#ifndef LEGACY_BACKEND
+// These are similar to GT_EQ/GT_NE but they generate "test" instead of "cmp" instructions.
+// Currently these are generated during lowering for code like ((x & y) eq|ne 0) only on
+// XArch but ARM could too use these for the same purpose as there is a "tst" instruction.
+// Note that the general case of comparing a register against 0 is handled directly by
+// codegen which emits a "test reg, reg" instruction, that would be more difficult to do
+// during lowering because the source operand is used twice so it has to be a lclvar.
+// Because of this there is no need to also add GT_TEST_LT/LE/GE/GT opers.
+GTNODE(TEST_EQ , "testEQ" ,GenTreeOp ,0,GTK_BINOP|GTK_RELOP)
+GTNODE(TEST_NE , "testNE" ,GenTreeOp ,0,GTK_BINOP|GTK_RELOP)
+#endif
GTNODE(COMMA , "comma" ,GenTreeOp ,0,GTK_BINOP|GTK_NOTLIR)
@@ -269,7 +280,7 @@ GTNODE(EMITNOP , "emitnop" ,GenTree ,0,GTK_LEAF|GTK_NOV
GTNODE(PINVOKE_PROLOG ,"pinvoke_prolog",GenTree ,0,GTK_LEAF|GTK_NOVALUE) // pinvoke prolog seq
GTNODE(PINVOKE_EPILOG ,"pinvoke_epilog",GenTree ,0,GTK_LEAF|GTK_NOVALUE) // pinvoke epilog seq
GTNODE(PUTARG_REG , "putarg_reg" ,GenTreeOp ,0,GTK_UNOP) // operator that places outgoing arg in register
-GTNODE(PUTARG_STK , "putarg_stk" ,GenTreePutArgStk ,0,GTK_UNOP) // operator that places outgoing arg in stack
+GTNODE(PUTARG_STK , "putarg_stk" ,GenTreePutArgStk ,0,GTK_UNOP|GTK_NOVALUE) // operator that places outgoing arg in stack
GTNODE(RETURNTRAP , "returnTrap" ,GenTreeOp ,0,GTK_UNOP|GTK_NOVALUE) // a conditional call to wait on gc
GTNODE(SWAP , "swap" ,GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE) // op1 and op2 swap (registers)
GTNODE(IL_OFFSET , "il_offset" ,GenTreeStmt ,0,GTK_LEAF|GTK_NOVALUE) // marks an IL offset for debugging purposes
diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp
index cb09ff8..b1e0f48 100644
--- a/src/jit/importer.cpp
+++ b/src/jit/importer.cpp
@@ -1489,17 +1489,16 @@ var_types Compiler::impNormStructType(CORINFO_CLASS_HANDLE structHnd,
const DWORD structFlags = info.compCompHnd->getClassAttribs(structHnd);
var_types structType = TYP_STRUCT;
-#ifdef FEATURE_CORECLR
- const bool hasGCPtrs = (structFlags & CORINFO_FLG_CONTAINS_GC_PTR) != 0;
-#else
- // Desktop CLR won't report FLG_CONTAINS_GC_PTR for RefAnyClass - need to check explicitly.
- const bool isRefAny = (structHnd == impGetRefAnyClass());
- const bool hasGCPtrs = isRefAny || ((structFlags & CORINFO_FLG_CONTAINS_GC_PTR) != 0);
-#endif
+ // On coreclr the check for GC includes a "may" to account for the special
+ // ByRef like span structs. The added check for "CONTAINS_STACK_PTR" is the particular bit.
+ // When this is set the struct will contain a ByRef that could be a GC pointer or a native
+ // pointer.
+ const bool mayContainGCPtrs =
+ ((structFlags & CORINFO_FLG_CONTAINS_STACK_PTR) != 0 || ((structFlags & CORINFO_FLG_CONTAINS_GC_PTR) != 0));
#ifdef FEATURE_SIMD
// Check to see if this is a SIMD type.
- if (featureSIMD && !hasGCPtrs)
+ if (featureSIMD && !mayContainGCPtrs)
{
unsigned originalSize = info.compCompHnd->getClassSize(structHnd);
@@ -1515,10 +1514,8 @@ var_types Compiler::impNormStructType(CORINFO_CLASS_HANDLE structHnd,
{
*pSimdBaseType = simdBaseType;
}
-#ifdef _TARGET_AMD64_
- // Amd64: also indicate that we use floating point registers
+ // Also indicate that we use floating point registers.
compFloatingPointUsed = true;
-#endif
}
}
}
@@ -1532,9 +1529,10 @@ var_types Compiler::impNormStructType(CORINFO_CLASS_HANDLE structHnd,
// Verify that the quick test up above via the class attributes gave a
// safe view of the type's GCness.
//
- // Note there are cases where hasGCPtrs is true but getClassGClayout
+ // Note there are cases where mayContainGCPtrs is true but getClassGClayout
// does not report any gc fields.
- assert(hasGCPtrs || (numGCVars == 0));
+
+ assert(mayContainGCPtrs || (numGCVars == 0));
if (pNumGCVars != nullptr)
{
@@ -1638,21 +1636,52 @@ GenTreePtr Compiler::impNormStructVal(GenTreePtr structVal,
case GT_COMMA:
{
- // The second thing is the block node.
+ // The second thing could either be a block node or a GT_SIMD or a GT_COMMA node.
GenTree* blockNode = structVal->gtOp.gtOp2;
assert(blockNode->gtType == structType);
- // It had better be a block node - any others should not occur here.
- assert(blockNode->OperIsBlk());
-
- // Sink the GT_COMMA below the blockNode addr.
- GenTree* blockNodeAddr = blockNode->gtOp.gtOp1;
- assert(blockNodeAddr->gtType == TYP_BYREF);
- GenTree* commaNode = structVal;
- commaNode->gtType = TYP_BYREF;
- commaNode->gtOp.gtOp2 = blockNodeAddr;
- blockNode->gtOp.gtOp1 = commaNode;
- structVal = blockNode;
- alreadyNormalized = true;
+
+ // Is this GT_COMMA(op1, GT_COMMA())?
+ GenTree* parent = structVal;
+ if (blockNode->OperGet() == GT_COMMA)
+ {
+ // Find the last node in the comma chain.
+ do
+ {
+ assert(blockNode->gtType == structType);
+ parent = blockNode;
+ blockNode = blockNode->gtOp.gtOp2;
+ } while (blockNode->OperGet() == GT_COMMA);
+ }
+
+#ifdef FEATURE_SIMD
+ if (blockNode->OperGet() == GT_SIMD)
+ {
+ parent->gtOp.gtOp2 = impNormStructVal(blockNode, structHnd, curLevel, forceNormalization);
+ alreadyNormalized = true;
+ }
+ else
+#endif
+ {
+ assert(blockNode->OperIsBlk());
+
+ // Sink the GT_COMMA below the blockNode addr.
+ // That is GT_COMMA(op1, op2=blockNode) is tranformed into
+ // blockNode(GT_COMMA(TYP_BYREF, op1, op2's op1)).
+ //
+ // In case of a chained GT_COMMA case, we sink the last
+ // GT_COMMA below the blockNode addr.
+ GenTree* blockNodeAddr = blockNode->gtOp.gtOp1;
+ assert(blockNodeAddr->gtType == TYP_BYREF);
+ GenTree* commaNode = parent;
+ commaNode->gtType = TYP_BYREF;
+ commaNode->gtOp.gtOp2 = blockNodeAddr;
+ blockNode->gtOp.gtOp1 = commaNode;
+ if (parent == structVal)
+ {
+ structVal = blockNode;
+ }
+ alreadyNormalized = true;
+ }
}
break;
@@ -3240,7 +3269,8 @@ GenTreePtr Compiler::impInitializeArrayIntrinsic(CORINFO_SIG_INFO* sig)
// Returns the GenTree that should be used to do the intrinsic instead of the call.
// Returns NULL if an intrinsic cannot be used
-GenTreePtr Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
+GenTreePtr Compiler::impIntrinsic(GenTreePtr newobjThis,
+ CORINFO_CLASS_HANDLE clsHnd,
CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig,
int memberRef,
@@ -3252,7 +3282,7 @@ GenTreePtr Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
#if COR_JIT_EE_VERSION > 460
CorInfoIntrinsics intrinsicID = info.compCompHnd->getIntrinsicID(method, &mustExpand);
#else
- CorInfoIntrinsics intrinsicID = info.compCompHnd->getIntrinsicID(method);
+ CorInfoIntrinsics intrinsicID = info.compCompHnd->getIntrinsicID(method);
#endif
*pIntrinsicID = intrinsicID;
@@ -3576,7 +3606,33 @@ GenTreePtr Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
retNode = op1;
break;
#endif
-
+ // Implement ByReference Ctor. This wraps the assignment of the ref into a byref-like field
+ // in a value type. The canonical example of this is Span<T>. In effect this is just a
+ // substitution. The parameter byref will be assigned into the newly allocated object.
+ case CORINFO_INTRINSIC_ByReference_Ctor:
+ {
+ // Remove call to constructor and directly assign the byref passed
+ // to the call to the first slot of the ByReference struct.
+ op1 = impPopStack().val;
+ GenTreePtr thisptr = newobjThis;
+ CORINFO_FIELD_HANDLE fldHnd = info.compCompHnd->getFieldInClass(clsHnd, 0);
+ GenTreePtr field = gtNewFieldRef(TYP_BYREF, fldHnd, thisptr, 0, false);
+ GenTreePtr assign = gtNewAssignNode(field, op1);
+ GenTreePtr byReferenceStruct = gtCloneExpr(thisptr->gtGetOp1());
+ assert(byReferenceStruct != nullptr);
+ impPushOnStack(byReferenceStruct, typeInfo(TI_STRUCT, clsHnd));
+ retNode = assign;
+ break;
+ }
+ // Implement ptr value getter for ByReference struct.
+ case CORINFO_INTRINSIC_ByReference_Value:
+ {
+ op1 = impPopStack().val;
+ CORINFO_FIELD_HANDLE fldHnd = info.compCompHnd->getFieldInClass(clsHnd, 0);
+ GenTreePtr field = gtNewFieldRef(TYP_BYREF, fldHnd, op1, 0, false);
+ retNode = field;
+ break;
+ }
default:
/* Unknown intrinsic */
break;
@@ -5359,29 +5415,23 @@ GenTreePtr Compiler::impTransformThis(GenTreePtr thisPtr,
}
//------------------------------------------------------------------------
-// impCanPInvokeInline: examine information from a call to see if the call
-// qualifies as an inline pinvoke.
-//
-// Arguments:
-// block - block contaning the call, or for inlinees, block
-// containing the call being inlined
+// impCanPInvokeInline: check whether PInvoke inlining should enabled in current method.
//
// Return Value:
-// true if this call qualifies as an inline pinvoke, false otherwise
+// true if PInvoke inlining should be enabled in current method, false otherwise
//
// Notes:
-// Checks basic legality and then a number of ambient conditions
-// where we could pinvoke but choose not to
+// Checks a number of ambient conditions where we could pinvoke but choose not to
-bool Compiler::impCanPInvokeInline(BasicBlock* block)
+bool Compiler::impCanPInvokeInline()
{
- return impCanPInvokeInlineCallSite(block) && getInlinePInvokeEnabled() && (!opts.compDbgCode) &&
- (compCodeOpt() != SMALL_CODE) && (!opts.compNoPInvokeInlineCB) // profiler is preventing inline pinvoke
+ return getInlinePInvokeEnabled() && (!opts.compDbgCode) && (compCodeOpt() != SMALL_CODE) &&
+ (!opts.compNoPInvokeInlineCB) // profiler is preventing inline pinvoke
;
}
//------------------------------------------------------------------------
-// impCanPInvokeInlineSallSite: basic legality checks using information
+// impCanPInvokeInlineCallSite: basic legality checks using information
// from a call to see if the call qualifies as an inline pinvoke.
//
// Arguments:
@@ -5410,6 +5460,17 @@ bool Compiler::impCanPInvokeInline(BasicBlock* block)
bool Compiler::impCanPInvokeInlineCallSite(BasicBlock* block)
{
+ if (block->hasHndIndex())
+ {
+ return false;
+ }
+
+ // The remaining limitations do not apply to CoreRT
+ if (IsTargetAbi(CORINFO_CORERT_ABI))
+ {
+ return true;
+ }
+
#ifdef _TARGET_AMD64_
// On x64, we disable pinvoke inlining inside of try regions.
// Here is the comment from JIT64 explaining why:
@@ -5431,12 +5492,13 @@ bool Compiler::impCanPInvokeInlineCallSite(BasicBlock* block)
//
// A desktop test case where this seems to matter is
// jit\jit64\ebvts\mcpp\sources2\ijw\__clrcall\vector_ctor_dtor.02\deldtor_clr.exe
- const bool inX64Try = block->hasTryIndex();
-#else
- const bool inX64Try = false;
+ if (block->hasTryIndex())
+ {
+ return false;
+ }
#endif // _TARGET_AMD64_
- return !inX64Try && !block->hasHndIndex();
+ return true;
}
//------------------------------------------------------------------------
@@ -5502,27 +5564,38 @@ void Compiler::impCheckForPInvokeCall(
}
optNativeCallCount++;
- if (opts.compMustInlinePInvokeCalli && methHnd == nullptr)
+ if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB) && methHnd == nullptr)
{
- // Always inline pinvoke.
+ // PInvoke CALLI in IL stubs must be inlined
}
else
{
- // Check legality and profitability.
- if (!impCanPInvokeInline(block))
+ // Check legality
+ if (!impCanPInvokeInlineCallSite(block))
{
return;
}
- if (info.compCompHnd->pInvokeMarshalingRequired(methHnd, sig))
+ // PInvoke CALL in IL stubs must be inlined on CoreRT. Skip the ambient conditions checks and
+ // profitability checks
+ if (!(opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB) && IsTargetAbi(CORINFO_CORERT_ABI)))
{
- return;
+ if (!impCanPInvokeInline())
+ {
+ return;
+ }
+
+ // Size-speed tradeoff: don't use inline pinvoke at rarely
+ // executed call sites. The non-inline version is more
+ // compact.
+ if (block->isRunRarely())
+ {
+ return;
+ }
}
- // Size-speed tradeoff: don't use inline pinvoke at rarely
- // executed call sites. The non-inline version is more
- // compact.
- if (block->isRunRarely())
+ // The expensive check should be last
+ if (info.compCompHnd->pInvokeMarshalingRequired(methHnd, sig))
{
return;
}
@@ -6189,7 +6262,7 @@ bool Compiler::impIsTailCallILPattern(bool tailPrefixed,
((nextOpcode == CEE_NOP) || ((nextOpcode == CEE_POP) && (++cntPop == 1)))); // Next opcode = nop or exactly
// one pop seen so far.
#else
- nextOpcode = (OPCODE)getU1LittleEndian(codeAddrOfNextOpcode);
+ nextOpcode = (OPCODE)getU1LittleEndian(codeAddrOfNextOpcode);
#endif
if (isCallPopAndRet)
@@ -6359,6 +6432,7 @@ var_types Compiler::impImportCall(OPCODE opcode,
eeGetSig(pResolvedToken->token, info.compScopeHnd, impTokenLookupContextHandle, &calliSig);
callRetTyp = JITtype2varType(calliSig.retType);
+ clsHnd = calliSig.retTypeClass;
call = impImportIndirectCall(&calliSig, ilOffset);
@@ -6387,6 +6461,16 @@ var_types Compiler::impImportCall(OPCODE opcode,
call->gtCall.callSig = new (this, CMK_CorSig) CORINFO_SIG_INFO;
*call->gtCall.callSig = calliSig;
#endif // DEBUG
+
+ if (IsTargetAbi(CORINFO_CORERT_ABI))
+ {
+ bool managedCall = (calliSig.callConv & GTF_CALL_UNMANAGED) == 0;
+ if (managedCall)
+ {
+ call->AsCall()->SetFatPointerCandidate();
+ setMethodHasFatPointer();
+ }
+ }
}
else // (opcode != CEE_CALLI)
{
@@ -6435,7 +6519,7 @@ var_types Compiler::impImportCall(OPCODE opcode,
if (mflags & CORINFO_FLG_DONT_INLINE_CALLER)
{
- compInlineResult->NoteFatal(InlineObservation::CALLEE_STACK_CRAWL_MARK);
+ compInlineResult->NoteFatal(InlineObservation::CALLEE_HAS_NOINLINE_CALLEE);
return callRetTyp;
}
@@ -6490,7 +6574,7 @@ var_types Compiler::impImportCall(OPCODE opcode,
// <NICE> Factor this into getCallInfo </NICE>
if ((mflags & CORINFO_FLG_INTRINSIC) && !pConstrainedResolvedToken)
{
- call = impIntrinsic(clsHnd, methHnd, sig, pResolvedToken->token, readonlyCall,
+ call = impIntrinsic(newobjThis, clsHnd, methHnd, sig, pResolvedToken->token, readonlyCall,
(canTailCall && (tailCall != 0)), &intrinsicID);
if (call != nullptr)
@@ -6533,7 +6617,6 @@ var_types Compiler::impImportCall(OPCODE opcode,
if ((mflags & CORINFO_FLG_VIRTUAL) && (mflags & CORINFO_FLG_EnC) && (opcode == CEE_CALLVIRT))
{
NO_WAY("Virtual call to a function added via EnC is not supported");
- goto DONE_CALL;
}
if ((sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_DEFAULT &&
@@ -7469,10 +7552,8 @@ DONE:
}
}
-// Note: we assume that small return types are already normalized by the managed callee
-// or by the pinvoke stub for calls to unmanaged code.
-
-DONE_CALL:
+ // Note: we assume that small return types are already normalized by the managed callee
+ // or by the pinvoke stub for calls to unmanaged code.
if (!bIntrinsicImported)
{
@@ -7517,6 +7598,7 @@ DONE_CALL:
impMarkInlineCandidate(call, exactContextHnd, callInfo);
}
+DONE_CALL:
// Push or append the result of the call
if (callRetTyp == TYP_VOID)
{
@@ -7569,9 +7651,11 @@ DONE_CALL:
}
}
- if (call->gtOper == GT_CALL)
+ if (call->IsCall())
{
// Sometimes "call" is not a GT_CALL (if we imported an intrinsic that didn't turn into a call)
+
+ bool fatPointerCandidate = call->AsCall()->IsFatPointerCandidate();
if (varTypeIsStruct(callRetTyp))
{
call = impFixupCallStructReturn(call, sig->retTypeClass);
@@ -7580,6 +7664,7 @@ DONE_CALL:
if ((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0)
{
assert(opts.OptEnabled(CLFLG_INLINING));
+ assert(!fatPointerCandidate); // We should not try to inline calli.
// Make the call its own tree (spill the stack if needed).
impAppendTree(call, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
@@ -7589,6 +7674,24 @@ DONE_CALL:
}
else
{
+ if (fatPointerCandidate)
+ {
+ // fatPointer candidates should be in statements of the form call() or var = call().
+ // Such form allows to find statements with fat calls without walking through whole trees
+ // and removes problems with cutting trees.
+ assert(!bIntrinsicImported);
+ assert(IsTargetAbi(CORINFO_CORERT_ABI));
+ if (call->OperGet() != GT_LCL_VAR) // can be already converted by impFixupCallStructReturn.
+ {
+ unsigned calliSlot = lvaGrabTemp(true DEBUGARG("calli"));
+ LclVarDsc* varDsc = &lvaTable[calliSlot];
+ varDsc->lvVerTypeInfo = tiRetVal;
+ impAssignTempGen(calliSlot, call, clsHnd, (unsigned)CHECK_SPILL_NONE);
+ // impAssignTempGen can change src arg list and return type for call that returns struct.
+ var_types type = genActualType(lvaTable[calliSlot].TypeGet());
+ call = gtNewLclvNode(calliSlot, type);
+ }
+ }
// For non-candidates we must also spill, since we
// might have locals live on the eval stack that this
// call can modify.
diff --git a/src/jit/inline.def b/src/jit/inline.def
index ff0b211..2a6f5a3 100644
--- a/src/jit/inline.def
+++ b/src/jit/inline.def
@@ -39,6 +39,7 @@ INLINE_OBSERVATION(HAS_LEAVE, bool, "has leave",
INLINE_OBSERVATION(HAS_MANAGED_VARARGS, bool, "managed varargs", FATAL, CALLEE)
INLINE_OBSERVATION(HAS_NATIVE_VARARGS, bool, "native varargs", FATAL, CALLEE)
INLINE_OBSERVATION(HAS_NO_BODY, bool, "has no body", FATAL, CALLEE)
+INLINE_OBSERVATION(HAS_NOINLINE_CALLEE, bool, "in corelib, noinline callee", FATAL, CALLEE)
INLINE_OBSERVATION(HAS_NULL_FOR_LDELEM, bool, "has null pointer for ldelem", FATAL, CALLEE)
INLINE_OBSERVATION(IS_ARRAY_METHOD, bool, "is array method", FATAL, CALLEE)
INLINE_OBSERVATION(IS_GENERIC_VIRTUAL, bool, "generic virtual", FATAL, CALLEE)
@@ -55,7 +56,6 @@ INLINE_OBSERVATION(NEEDS_SECURITY_CHECK, bool, "needs security check",
INLINE_OBSERVATION(NO_METHOD_INFO, bool, "cannot get method info", FATAL, CALLEE)
INLINE_OBSERVATION(NOT_PROFITABLE_INLINE, bool, "unprofitable inline", FATAL, CALLEE)
INLINE_OBSERVATION(RANDOM_REJECT, bool, "random reject", FATAL, CALLEE)
-INLINE_OBSERVATION(STACK_CRAWL_MARK, bool, "uses stack crawl mark", FATAL, CALLEE)
INLINE_OBSERVATION(STFLD_NEEDS_HELPER, bool, "stfld needs helper", FATAL, CALLEE)
INLINE_OBSERVATION(THROW_WITH_INVALID_STACK, bool, "throw with invalid stack", FATAL, CALLEE)
INLINE_OBSERVATION(TOO_MANY_ARGUMENTS, bool, "too many arguments", FATAL, CALLEE)
diff --git a/src/jit/instr.cpp b/src/jit/instr.cpp
index edc4483..7332ba6 100644
--- a/src/jit/instr.cpp
+++ b/src/jit/instr.cpp
@@ -3513,6 +3513,12 @@ instruction CodeGen::ins_CopyIntToFloat(var_types srcType, var_types dstType)
{
// On SSE2/AVX - the same instruction is used for moving double/quad word to XMM/YMM register.
assert((srcType == TYP_INT) || (srcType == TYP_UINT) || (srcType == TYP_LONG) || (srcType == TYP_ULONG));
+
+#if !defined(_TARGET_64BIT_)
+ // No 64-bit registers on x86.
+ assert((srcType != TYP_LONG) && (srcType != TYP_ULONG));
+#endif // !defined(_TARGET_64BIT_)
+
return INS_mov_i2xmm;
}
@@ -3520,6 +3526,12 @@ instruction CodeGen::ins_CopyFloatToInt(var_types srcType, var_types dstType)
{
// On SSE2/AVX - the same instruction is used for moving double/quad word of XMM/YMM to an integer register.
assert((dstType == TYP_INT) || (dstType == TYP_UINT) || (dstType == TYP_LONG) || (dstType == TYP_ULONG));
+
+#if !defined(_TARGET_64BIT_)
+ // No 64-bit registers on x86.
+ assert((dstType != TYP_LONG) && (dstType != TYP_ULONG));
+#endif // !defined(_TARGET_64BIT_)
+
return INS_mov_xmm2i;
}
diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h
index 4317334..8ab3a84 100644
--- a/src/jit/instrsxarch.h
+++ b/src/jit/instrsxarch.h
@@ -320,6 +320,9 @@ INST3( pcmpgtq, "pcmpgtq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SS
INST3( pmulld, "pmulld" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x40)) // Packed multiply 32 bit unsigned integers and store lower 32 bits of each result
INST3( ptest, "ptest" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x17)) // Packed logical compare
INST3( phaddd, "phaddd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x02)) // Packed horizontal add
+INST3( pabsb, "pabsb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1C)) // Packed absolute value of bytes
+INST3( pabsw, "pabsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1D)) // Packed absolute value of 16-bit integers
+INST3( pabsd, "pabsd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1E)) // Packed absolute value of 32-bit integers
INST3(LAST_SSE4_INSTRUCTION, "LAST_SSE4_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
INST3(FIRST_AVX_INSTRUCTION, "FIRST_AVX_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
diff --git a/src/jit/jit.h b/src/jit/jit.h
index 220294f..05b154e 100644
--- a/src/jit/jit.h
+++ b/src/jit/jit.h
@@ -416,14 +416,6 @@ typedef ptrdiff_t ssize_t;
//=============================================================================
-#define FANCY_ARRAY_OPT 0 // optimize more complex index checks
-
-//=============================================================================
-
-#define LONG_ASG_OPS 0 // implementation isn't complete yet
-
-//=============================================================================
-
#define OPT_MULT_ADDSUB 1 // optimize consecutive "lclVar += or -= icon"
#define OPT_BOOL_OPS 1 // optimize boolean operations
@@ -699,11 +691,7 @@ inline unsigned int unsigned_abs(int x)
#ifdef _TARGET_64BIT_
inline size_t unsigned_abs(ssize_t x)
{
-#ifndef FEATURE_PAL
return ((size_t)abs(x));
-#else // !FEATURE_PAL
- return ((size_t)labs(x));
-#endif // !FEATURE_PAL
}
#endif // _TARGET_64BIT_
diff --git a/src/jit/jit.settings.targets b/src/jit/jit.settings.targets
index 6c0474a..8749b80 100644
--- a/src/jit/jit.settings.targets
+++ b/src/jit/jit.settings.targets
@@ -95,9 +95,11 @@
<ItemGroup Condition="'$(TargetArch)'=='i386'">
<CppCompile Include="..\emitXArch.cpp" />
<CppCompile Include="..\TargetX86.cpp" />
+ <CppCompile Include="..\unwindx86.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='True'" Include="..\stackfp.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\DecomposeLongs.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\LowerXArch.cpp" />
+ <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\lsraxarch.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\CodeGenXArch.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\SIMD.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\SIMDCodeGenXArch.cpp" />
@@ -107,6 +109,7 @@
<CppCompile Include="..\emitXArch.cpp" />
<CppCompile Include="..\TargetAmd64.cpp" />
<CppCompile Include="..\LowerXArch.cpp" />
+ <CppCompile Include="..\lsraxarch.cpp" />
<CppCompile Include="..\CodeGenXArch.cpp" />
<CppCompile Include="..\SIMD.cpp" />
<CppCompile Include="..\SIMDCodeGenXArch.cpp" />
@@ -118,6 +121,7 @@
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='True'" Include="..\registerfp.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\DecomposeLongs.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\LowerArm.cpp" />
+ <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\lsraarm.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\CodeGenArm.cpp" />
<CppCompile Include="..\unwindArm.cpp" />
</ItemGroup>
@@ -126,6 +130,7 @@
<CppCompile Include="..\emitarm64.cpp" />
<CppCompile Include="..\TargetArm64.cpp" />
<CppCompile Include="..\LowerArm64.cpp" />
+ <CppCompile Include="..\lsraarm64.cpp" />
<CppCompile Include="..\CodeGenArm64.cpp" />
<CppCompile Include="..\unwindArm.cpp" />
<CppCompile Include="..\unwindArm64.cpp" />
diff --git a/src/jit/jitconfigvalues.h b/src/jit/jitconfigvalues.h
index 39a2505..4623fe8 100644
--- a/src/jit/jitconfigvalues.h
+++ b/src/jit/jitconfigvalues.h
@@ -204,13 +204,14 @@ CONFIG_INTEGER(AltJitAssertOnNYI, W("AltJitAssertOnNYI"), 1) // Controls the Alt
CONFIG_INTEGER(EnableSSE3_4, W("EnableSSE3_4"), 1) // Enable SSE3, SSSE3, SSE 4.1 and 4.2 instruction set as default
#endif
-#if defined(_TARGET_AMD64_)
-CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 1) // Enable AVX instruction set for wide operations as default.
-// When both AVX and SSE3_4 are set, we will use the most capable instruction set available
-// which will prefer AVX over SSE3/4.
-#else // !defined(_TARGET_AMD64_)
-CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 0) // Enable AVX instruction set for wide operations as default
-#endif // defined(_TARGET_AMD64_)
+#if defined(_TARGET_AMD64_) || defined(_TARGET_X86_)
+// Enable AVX instruction set for wide operations as default. When both AVX and SSE3_4 are set, we will use the most
+// capable instruction set available which will prefer AVX over SSE3/4.
+CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 1)
+#else // !defined(_TARGET_AMD64_) && !defined(_TARGET_X86_)
+// Enable AVX instruction set for wide operations as default
+CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 0)
+#endif // !defined(_TARGET_AMD64_) && !defined(_TARGET_X86_)
#if !defined(DEBUG) && !defined(_DEBUG)
CONFIG_INTEGER(JitEnableNoWayAssert, W("JitEnableNoWayAssert"), 0)
@@ -274,6 +275,16 @@ CONFIG_INTEGER(JitInlinePolicyModel, W("JitInlinePolicyModel"), 0)
CONFIG_INTEGER(JitEECallTimingInfo, W("JitEECallTimingInfo"), 0)
+#if defined(DEBUG)
+#if defined(FEATURE_CORECLR)
+CONFIG_INTEGER(JitEnableFinallyCloning, W("JitEnableFinallyCloning"), 1)
+CONFIG_INTEGER(JitEnableRemoveEmptyTry, W("JitEnableRemoveEmptyTry"), 1)
+#else
+CONFIG_INTEGER(JitEnableFinallyCloning, W("JitEnableFinallyCloning"), 0)
+CONFIG_INTEGER(JitEnableRemoveEmptyTry, W("JitEnableRemoveEmptyTry"), 0)
+#endif // defined(FEATURE_CORECLR)
+#endif // DEBUG
+
#undef CONFIG_INTEGER
#undef CONFIG_STRING
#undef CONFIG_METHODSET
diff --git a/src/jit/jiteh.cpp b/src/jit/jiteh.cpp
index 4b3ceae..2d0eee3 100644
--- a/src/jit/jiteh.cpp
+++ b/src/jit/jiteh.cpp
@@ -93,7 +93,7 @@ bool EHblkDsc::HasFinallyHandler()
bool EHblkDsc::HasFaultHandler()
{
- return ebdHandlerType == EH_HANDLER_FAULT;
+ return (ebdHandlerType == EH_HANDLER_FAULT) || (ebdHandlerType == EH_HANDLER_FAULT_WAS_FINALLY);
}
bool EHblkDsc::HasFinallyOrFaultHandler()
@@ -2426,6 +2426,11 @@ bool Compiler::fgNormalizeEHCase2()
// this once per dup.
fgReplaceJumpTarget(predBlock, newTryStart, insertBeforeBlk);
+ // Need to adjust ref counts here since we're retargeting edges.
+ newTryStart->bbRefs++;
+ assert(insertBeforeBlk->countOfInEdges() > 0);
+ insertBeforeBlk->bbRefs--;
+
#ifdef DEBUG
if (verbose)
{
diff --git a/src/jit/jiteh.h b/src/jit/jiteh.h
index 5731162..502d215 100644
--- a/src/jit/jiteh.h
+++ b/src/jit/jiteh.h
@@ -27,7 +27,8 @@ enum EHHandlerType
EH_HANDLER_CATCH = 0x1, // Don't use zero (to aid debugging uninitialized memory)
EH_HANDLER_FILTER,
EH_HANDLER_FAULT,
- EH_HANDLER_FINALLY
+ EH_HANDLER_FINALLY,
+ EH_HANDLER_FAULT_WAS_FINALLY
};
// ToCORINFO_EH_CLAUSE_FLAGS: Convert an internal EHHandlerType to a CORINFO_EH_CLAUSE_FLAGS value
@@ -41,6 +42,7 @@ inline CORINFO_EH_CLAUSE_FLAGS ToCORINFO_EH_CLAUSE_FLAGS(EHHandlerType type)
case EH_HANDLER_FILTER:
return CORINFO_EH_CLAUSE_FILTER;
case EH_HANDLER_FAULT:
+ case EH_HANDLER_FAULT_WAS_FINALLY:
return CORINFO_EH_CLAUSE_FAULT;
case EH_HANDLER_FINALLY:
return CORINFO_EH_CLAUSE_FINALLY;
diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp
index ea9c573..b4e4cc6 100644
--- a/src/jit/lclvars.cpp
+++ b/src/jit/lclvars.cpp
@@ -465,7 +465,7 @@ void Compiler::lvaInitRetBuffArg(InitVarDscInfo* varDscInfo)
varDsc->lvArgReg = genMapIntRegArgNumToRegNum(retBuffArgNum);
}
-#if FEATURE_MULTIREG__ARGS
+#if FEATURE_MULTIREG_ARGS
varDsc->lvOtherArgReg = REG_NA;
#endif
varDsc->setPrefReg(varDsc->lvArgReg, this);
@@ -488,6 +488,16 @@ void Compiler::lvaInitRetBuffArg(InitVarDscInfo* varDscInfo)
varDsc->lvType = TYP_I_IMPL;
}
}
+#ifdef FEATURE_SIMD
+ else if (featureSIMD && varTypeIsSIMD(info.compRetType))
+ {
+ varDsc->lvSIMDType = true;
+ varDsc->lvBaseType =
+ getBaseTypeAndSizeOfSIMDType(info.compMethodInfo->args.retTypeClass, &varDsc->lvExactSize);
+ assert(varDsc->lvBaseType != TYP_UNKNOWN);
+ }
+#endif // FEATURE_SIMD
+
assert(isValidIntArgReg(varDsc->lvArgReg));
#ifdef DEBUG
@@ -1059,7 +1069,7 @@ void Compiler::lvaInitVarArgsHandle(InitVarDscInfo* varDscInfo)
varDsc->lvIsRegArg = 1;
varDsc->lvArgReg = genMapRegArgNumToRegNum(varArgHndArgNum, TYP_I_IMPL);
-#if FEATURE_MULTIREG__ARGS
+#if FEATURE_MULTIREG_ARGS
varDsc->lvOtherArgReg = REG_NA;
#endif
varDsc->setPrefReg(varDsc->lvArgReg, this);
@@ -1414,9 +1424,16 @@ void Compiler::lvaCanPromoteStructType(CORINFO_CLASS_HANDLE typeHnd,
if (typeHnd != StructPromotionInfo->typeHnd)
{
- // sizeof(double) represents the size of the largest primitive type that we can struct promote
- // In the future this may be changing to XMM_REGSIZE_BYTES
- const int MaxOffset = MAX_NumOfFieldsInPromotableStruct * sizeof(double); // must be a compile time constant
+ // sizeof(double) represents the size of the largest primitive type that we can struct promote.
+ // In the future this may be changing to XMM_REGSIZE_BYTES.
+ // Note: MaxOffset is used below to declare a local array, and therefore must be a compile-time constant.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef FEATURE_SIMD
+ // This will allow promotion of 2 Vector<T> fields on AVX2, or 4 Vector<T> fields on SSE2.
+ const int MaxOffset = MAX_NumOfFieldsInPromotableStruct * XMM_REGSIZE_BYTES;
+#else // !FEATURE_SIMD
+ const int MaxOffset = MAX_NumOfFieldsInPromotableStruct * sizeof(double);
+#endif // !FEATURE_SIMD
assert((BYTE)MaxOffset == MaxOffset); // because lvaStructFieldInfo.fldOffset is byte-sized
assert((BYTE)MAX_NumOfFieldsInPromotableStruct ==
@@ -1507,13 +1524,31 @@ void Compiler::lvaCanPromoteStructType(CORINFO_CLASS_HANDLE typeHnd,
CorInfoType corType = info.compCompHnd->getFieldType(pFieldInfo->fldHnd, &pFieldInfo->fldTypeHnd);
var_types varType = JITtype2varType(corType);
pFieldInfo->fldType = varType;
- pFieldInfo->fldSize = genTypeSize(varType);
+ unsigned size = genTypeSize(varType);
+ pFieldInfo->fldSize = size;
if (varTypeIsGC(varType))
{
containsGCpointers = true;
}
+#ifdef FEATURE_SIMD
+ // Check to see if this is a SIMD type.
+ // We will only check this if we have already found a SIMD type, which will be true if
+ // we have encountered any SIMD intrinsics.
+ if (usesSIMDTypes() && (pFieldInfo->fldSize == 0) && isSIMDClass(pFieldInfo->fldTypeHnd))
+ {
+ unsigned simdSize;
+ var_types simdBaseType = getBaseTypeAndSizeOfSIMDType(pFieldInfo->fldTypeHnd, &simdSize);
+ if (simdBaseType != TYP_UNKNOWN)
+ {
+ varType = getSIMDTypeForSize(simdSize);
+ pFieldInfo->fldType = varType;
+ pFieldInfo->fldSize = simdSize;
+ }
+ }
+#endif // FEATURE_SIMD
+
if (pFieldInfo->fldSize == 0)
{
// Non-primitive struct field. Don't promote.
@@ -1556,8 +1591,10 @@ void Compiler::lvaCanPromoteStructType(CORINFO_CLASS_HANDLE typeHnd,
#endif // _TARGET_ARM_
}
- // If we saw any GC pointer fields above then the CORINFO_FLG_CONTAINS_GC_PTR has to be set!
- noway_assert((containsGCpointers == false) || ((typeFlags & CORINFO_FLG_CONTAINS_GC_PTR) != 0));
+ // If we saw any GC pointer or by-ref fields above then CORINFO_FLG_CONTAINS_GC_PTR or
+ // CORINFO_FLG_CONTAINS_STACK_PTR has to be set!
+ noway_assert((containsGCpointers == false) ||
+ ((typeFlags & (CORINFO_FLG_CONTAINS_GC_PTR | CORINFO_FLG_CONTAINS_STACK_PTR)) != 0));
// If we have "Custom Layout" then we might have an explicit Size attribute
// Managed C++ uses this for its structs, such C++ types will not contain GC pointers.
@@ -1683,7 +1720,7 @@ void Compiler::lvaPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo* Stru
{
lvaStructFieldInfo* pFieldInfo = &StructPromotionInfo->fields[index];
- if (varTypeIsFloating(pFieldInfo->fldType))
+ if (varTypeIsFloating(pFieldInfo->fldType) || varTypeIsSIMD(pFieldInfo->fldType))
{
lvaTable[lclNum].lvContainsFloatingFields = 1;
// Whenever we promote a struct that contains a floating point field
@@ -1727,12 +1764,32 @@ void Compiler::lvaPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo* Stru
fieldVarDsc->lvIsRegArg = true;
fieldVarDsc->lvArgReg = varDsc->lvArgReg;
fieldVarDsc->setPrefReg(varDsc->lvArgReg, this); // Set the preferred register
+#if FEATURE_MULTIREG_ARGS && defined(FEATURE_SIMD)
+ if (varTypeIsSIMD(fieldVarDsc))
+ {
+ // This field is a SIMD type, and will be considered to be passed in multiple registers
+ // if the parent struct was. Note that this code relies on the fact that if there is
+ // a SIMD field of an enregisterable struct, it is the only field.
+ // We will assert that, in case future changes are made to the ABI.
+ assert(varDsc->lvFieldCnt == 1);
+ fieldVarDsc->lvOtherArgReg = varDsc->lvOtherArgReg;
+ }
+#endif // FEATURE_MULTIREG_ARGS && defined(FEATURE_SIMD)
lvaMarkRefsWeight = BB_UNITY_WEIGHT; // incRefCnts can use this compiler global variable
fieldVarDsc->incRefCnts(BB_UNITY_WEIGHT, this); // increment the ref count for prolog initialization
}
#endif
+#ifdef FEATURE_SIMD
+ if (varTypeIsSIMD(pFieldInfo->fldType))
+ {
+ // Set size to zero so that lvaSetStruct will appropriately set the SIMD-relevant fields.
+ fieldVarDsc->lvExactSize = 0;
+ lvaSetStruct(varNum, pFieldInfo->fldTypeHnd, false, true);
+ }
+#endif // FEATURE_SIMD
+
#ifdef DEBUG
// This temporary should not be converted to a double in stress mode,
// because we introduce assigns to it after the stress conversion
@@ -1947,14 +2004,14 @@ bool Compiler::lvaIsMultiregStruct(LclVarDsc* varDsc)
if (howToPassStruct == SPK_ByValueAsHfa)
{
- assert(type = TYP_STRUCT);
+ assert(type == TYP_STRUCT);
return true;
}
#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_)
if (howToPassStruct == SPK_ByValue)
{
- assert(type = TYP_STRUCT);
+ assert(type == TYP_STRUCT);
return true;
}
#endif
@@ -2029,7 +2086,6 @@ void Compiler::lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, bool
}
else
{
- assert(varDsc->lvExactSize != 0);
#if FEATURE_SIMD
assert(!varTypeIsSIMD(varDsc) || (varDsc->lvBaseType != TYP_UNKNOWN));
#endif // FEATURE_SIMD
@@ -3082,37 +3138,6 @@ void Compiler::lvaMarkLclRefs(GenTreePtr tree)
#endif
}
-#if FANCY_ARRAY_OPT
-
- /* Special case: assignment node */
-
- if (tree->gtOper == GT_ASG)
- {
- if (tree->gtType == TYP_INT)
- {
- unsigned lclNum1;
- LclVarDsc* varDsc1;
-
- GenTreePtr op1 = tree->gtOp.gtOp1;
-
- if (op1->gtOper != GT_LCL_VAR)
- return;
-
- lclNum1 = op1->gtLclVarCommon.gtLclNum;
- noway_assert(lclNum1 < lvaCount);
- varDsc1 = lvaTable + lclNum1;
-
- if (varDsc1->lvAssignOne)
- varDsc1->lvAssignTwo = true;
- else
- varDsc1->lvAssignOne = true;
- }
-
- return;
- }
-
-#endif
-
#ifdef _TARGET_XARCH_
/* Special case: integer shift node by a variable amount */
@@ -5750,6 +5775,7 @@ void Compiler::lvaAlignFrame()
#elif defined(_TARGET_X86_)
+#if DOUBLE_ALIGN
if (genDoubleAlign())
{
// Double Frame Alignement for x86 is handled in Compiler::lvaAssignVirtualFrameOffsetsToLocals()
@@ -5760,6 +5786,30 @@ void Compiler::lvaAlignFrame()
lvaIncrementFrameSize(sizeof(void*));
}
}
+#endif
+
+ if (STACK_ALIGN > REGSIZE_BYTES)
+ {
+ if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
+ {
+ // If we are not doing final layout, we don't know the exact value of compLclFrameSize
+ // and thus do not know how much we will need to add in order to be aligned.
+ // We add the maximum pad that we could ever have (which is 12)
+ lvaIncrementFrameSize(STACK_ALIGN - REGSIZE_BYTES);
+ }
+
+ // Align the stack with STACK_ALIGN value.
+ int adjustFrameSize = compLclFrameSize;
+#if defined(UNIX_X86_ABI)
+ // we need to consider spilled register(s) plus return address and/or EBP
+ int adjustCount = compCalleeRegsPushed + 1 + (codeGen->isFramePointerUsed() ? 1 : 0);
+ adjustFrameSize += (adjustCount * REGSIZE_BYTES) % STACK_ALIGN;
+#endif
+ if ((adjustFrameSize % STACK_ALIGN) != 0)
+ {
+ lvaIncrementFrameSize(STACK_ALIGN - (adjustFrameSize % STACK_ALIGN));
+ }
+ }
#else
NYI("TARGET specific lvaAlignFrame");
diff --git a/src/jit/lir.cpp b/src/jit/lir.cpp
index 35dd181..6eb8a49 100644
--- a/src/jit/lir.cpp
+++ b/src/jit/lir.cpp
@@ -1494,9 +1494,13 @@ bool LIR::Range::CheckLIR(Compiler* compiler, bool checkUnusedValues) const
}
else if (!def->IsValue())
{
- // Calls may contain "uses" of nodes that do not produce a value. This is an artifact of
- // the HIR and should probably be fixed, but doing so is an unknown amount of work.
- assert(node->OperGet() == GT_CALL);
+ // Stack arguments do not produce a value, but they are considered children of the call.
+ // It may be useful to remove these from being call operands, but that may also impact
+ // other code that relies on being able to reach all the operands from a call node.
+ // The GT_NOP case is because sometimes we eliminate stack argument stores as dead, but
+ // instead of removing them we replace with a NOP.
+ assert((node->OperGet() == GT_CALL) &&
+ (def->OperIsStore() || (def->OperGet() == GT_PUTARG_STK) || (def->OperGet() == GT_NOP)));
continue;
}
diff --git a/src/jit/liveness.cpp b/src/jit/liveness.cpp
index 423d72b..c666318 100644
--- a/src/jit/liveness.cpp
+++ b/src/jit/liveness.cpp
@@ -19,34 +19,15 @@
*
* Helper for Compiler::fgPerBlockLocalVarLiveness().
* The goal is to compute the USE and DEF sets for a basic block.
- * However with the new improvement to the data flow analysis (DFA),
- * we do not mark x as used in x = f(x) when there are no side effects in f(x).
- * 'asgdLclVar' is set when 'tree' is part of an expression with no side-effects
- * which is assigned to asgdLclVar, ie. asgdLclVar = (... tree ...)
*/
-void Compiler::fgMarkUseDef(GenTreeLclVarCommon* tree, GenTree* asgdLclVar)
+void Compiler::fgMarkUseDef(GenTreeLclVarCommon* tree)
{
- bool rhsUSEDEF = false;
- unsigned lclNum;
- unsigned lhsLclNum;
- LclVarDsc* varDsc;
+ assert((tree->OperIsLocal() && (tree->OperGet() != GT_PHI_ARG)) || tree->OperIsLocalAddr());
- noway_assert(tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_LCL_VAR_ADDR || tree->gtOper == GT_LCL_FLD ||
- tree->gtOper == GT_LCL_FLD_ADDR || tree->gtOper == GT_STORE_LCL_VAR ||
- tree->gtOper == GT_STORE_LCL_FLD);
-
- if (tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_LCL_VAR_ADDR || tree->gtOper == GT_STORE_LCL_VAR)
- {
- lclNum = tree->gtLclNum;
- }
- else
- {
- noway_assert(tree->OperIsLocalField());
- lclNum = tree->gtLclFld.gtLclNum;
- }
+ const unsigned lclNum = tree->gtLclNum;
+ assert(lclNum < lvaCount);
- noway_assert(lclNum < lvaCount);
- varDsc = lvaTable + lclNum;
+ LclVarDsc* const varDsc = &lvaTable[lclNum];
// We should never encounter a reference to a lclVar that has a zero refCnt.
if (varDsc->lvRefCnt == 0 && (!varTypeIsPromotable(varDsc) || !varDsc->lvPromoted))
@@ -56,121 +37,80 @@ void Compiler::fgMarkUseDef(GenTreeLclVarCommon* tree, GenTree* asgdLclVar)
varDsc->lvRefCnt = 1;
}
- // NOTE: the analysis done below is neither necessary nor correct for LIR: it depends on
- // the nodes that precede `asgdLclVar` in execution order to factor into the dataflow for the
- // value being assigned to the local var, which is not necessarily the case without tree
- // order. Furthermore, LIR is always traversed in an order that reflects the dataflow for the
- // block.
- if (asgdLclVar != nullptr)
- {
- assert(!compCurBB->IsLIR());
-
- /* we have an assignment to a local var : asgdLclVar = ... tree ...
- * check for x = f(x) case */
+ const bool isDef = (tree->gtFlags & GTF_VAR_DEF) != 0;
+ const bool isUse = !isDef || ((tree->gtFlags & (GTF_VAR_USEASG | GTF_VAR_USEDEF)) != 0);
- noway_assert(asgdLclVar->gtOper == GT_LCL_VAR || asgdLclVar->gtOper == GT_STORE_LCL_VAR);
- noway_assert(asgdLclVar->gtFlags & GTF_VAR_DEF);
+ if (varDsc->lvTracked)
+ {
+ assert(varDsc->lvVarIndex < lvaTrackedCount);
- lhsLclNum = asgdLclVar->gtLclVarCommon.gtLclNum;
+ // We don't treat stores to tracked locals as modifications of ByrefExposed memory;
+ // Make sure no tracked local is addr-exposed, to make sure we don't incorrectly CSE byref
+ // loads aliasing it across a store to it.
+ assert(!varDsc->lvAddrExposed);
- if ((lhsLclNum == lclNum) && ((tree->gtFlags & GTF_VAR_DEF) == 0) && (tree != asgdLclVar))
+ if (isUse && !VarSetOps::IsMember(this, fgCurDefSet, varDsc->lvVarIndex))
{
- /* bingo - we have an x = f(x) case */
- asgdLclVar->gtFlags |= GTF_VAR_USEDEF;
- rhsUSEDEF = true;
+ // This is an exposed use; add it to the set of uses.
+ VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex);
}
- }
- /* Is this a tracked variable? */
-
- if (varDsc->lvTracked)
- {
- noway_assert(varDsc->lvVarIndex < lvaTrackedCount);
-
- if ((tree->gtFlags & GTF_VAR_DEF) != 0 && (tree->gtFlags & (GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0)
+ if (isDef)
{
- // if (!(fgCurUseSet & bitMask)) printf("V%02u,T%02u def at %08p\n", lclNum, varDsc->lvVarIndex, tree);
+ // This is a def, add it to the set of defs.
VarSetOps::AddElemD(this, fgCurDefSet, varDsc->lvVarIndex);
}
- else
+ }
+ else
+ {
+ if (varDsc->lvAddrExposed)
{
- // if (!(fgCurDefSet & bitMask))
- // {
- // printf("V%02u,T%02u use at ", lclNum, varDsc->lvVarIndex);
- // printTreeID(tree);
- // printf("\n");
- // }
-
- /* We have the following scenarios:
- * 1. "x += something" - in this case x is flagged GTF_VAR_USEASG
- * 2. "x = ... x ..." - the LHS x is flagged GTF_VAR_USEDEF,
- * the RHS x is has rhsUSEDEF = true
- * (both set by the code above)
- *
- * We should not mark an USE of x in the above cases provided the value "x" is not used
- * further up in the tree. For example "while (i++)" is required to mark i as used.
- */
+ // Reflect the effect on ByrefExposed memory
- /* make sure we don't include USEDEF variables in the USE set
- * The first test is for LSH, the second (!rhsUSEDEF) is for any var in the RHS */
-
- if ((tree->gtFlags & (GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0)
+ if (isUse)
{
- /* Not a special flag - check to see if used to assign to itself */
-
- if (rhsUSEDEF)
- {
- /* assign to itself - do not include it in the USE set */
- if (!opts.MinOpts() && !opts.compDbgCode)
- {
- return;
- }
- }
+ fgCurMemoryUse |= memoryKindSet(ByrefExposed);
}
-
- /* Fall through for the "good" cases above - add the variable to the USE set */
-
- if (!VarSetOps::IsMember(this, fgCurDefSet, varDsc->lvVarIndex))
+ if (isDef)
{
- VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex);
- }
+ fgCurMemoryDef |= memoryKindSet(ByrefExposed);
- // For defs, also add to the (all) def set.
- if ((tree->gtFlags & GTF_VAR_DEF) != 0)
- {
- VarSetOps::AddElemD(this, fgCurDefSet, varDsc->lvVarIndex);
+ // We've found a store that modifies ByrefExposed
+ // memory but not GcHeap memory, so track their
+ // states separately.
+ byrefStatesMatchGcHeapStates = false;
}
}
- }
- else if (varTypeIsStruct(varDsc))
- {
- noway_assert(!varDsc->lvTracked);
- lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
-
- if (promotionType != PROMOTION_TYPE_NONE)
+ if (varTypeIsStruct(varDsc))
{
- VARSET_TP VARSET_INIT_NOCOPY(bitMask, VarSetOps::MakeEmpty(this));
+ lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
- for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
+ if (promotionType != PROMOTION_TYPE_NONE)
{
- noway_assert(lvaTable[i].lvIsStructField);
- if (lvaTable[i].lvTracked)
+ VARSET_TP VARSET_INIT_NOCOPY(bitMask, VarSetOps::MakeEmpty(this));
+
+ for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
{
- noway_assert(lvaTable[i].lvVarIndex < lvaTrackedCount);
- VarSetOps::AddElemD(this, bitMask, lvaTable[i].lvVarIndex);
+ noway_assert(lvaTable[i].lvIsStructField);
+ if (lvaTable[i].lvTracked)
+ {
+ noway_assert(lvaTable[i].lvVarIndex < lvaTrackedCount);
+ VarSetOps::AddElemD(this, bitMask, lvaTable[i].lvVarIndex);
+ }
}
- }
- // For pure defs (i.e. not an "update" def which is also a use), add to the (all) def set.
- if ((tree->gtFlags & GTF_VAR_DEF) != 0 && (tree->gtFlags & (GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0)
- {
- VarSetOps::UnionD(this, fgCurDefSet, bitMask);
- }
- else if (!VarSetOps::IsSubset(this, bitMask, fgCurDefSet))
- {
- // Mark as used any struct fields that are not yet defined.
- VarSetOps::UnionD(this, fgCurUseSet, bitMask);
+ // For pure defs (i.e. not an "update" def which is also a use), add to the (all) def set.
+ if (!isUse)
+ {
+ assert(isDef);
+ VarSetOps::UnionD(this, fgCurDefSet, bitMask);
+ }
+ else if (!VarSetOps::IsSubset(this, bitMask, fgCurDefSet))
+ {
+ // Mark as used any struct fields that are not yet defined.
+ VarSetOps::UnionD(this, fgCurUseSet, bitMask);
+ }
}
}
}
@@ -285,18 +225,15 @@ void Compiler::fgLocalVarLivenessInit()
#ifndef LEGACY_BACKEND
//------------------------------------------------------------------------
// fgPerNodeLocalVarLiveness:
-// Set fgCurHeapUse and fgCurHeapDef when the global heap is read or updated
+// Set fgCurMemoryUse and fgCurMemoryDef when memory is read or updated
// Call fgMarkUseDef for any Local variables encountered
//
// Arguments:
// tree - The current node.
-// asgdLclVar - Either nullptr or the assignement's left-hand-side GT_LCL_VAR.
-// Used as an argument to fgMarkUseDef(); only valid for HIR blocks.
//
-void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree, GenTree* asgdLclVar)
+void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree)
{
assert(tree != nullptr);
- assert(asgdLclVar == nullptr || !compCurBB->IsLIR());
switch (tree->gtOper)
{
@@ -312,42 +249,43 @@ void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree, GenTree* asgdLclVar)
case GT_LCL_FLD_ADDR:
case GT_STORE_LCL_VAR:
case GT_STORE_LCL_FLD:
- fgMarkUseDef(tree->AsLclVarCommon(), asgdLclVar);
+ fgMarkUseDef(tree->AsLclVarCommon());
break;
case GT_CLS_VAR:
- // For Volatile indirection, first mutate the global heap
- // see comments in ValueNum.cpp (under case GT_CLS_VAR)
- // This models Volatile reads as def-then-use of the heap.
- // and allows for a CSE of a subsequent non-volatile read
+ // For Volatile indirection, first mutate GcHeap/ByrefExposed.
+ // See comments in ValueNum.cpp (under case GT_CLS_VAR)
+ // This models Volatile reads as def-then-use of memory
+ // and allows for a CSE of a subsequent non-volatile read.
if ((tree->gtFlags & GTF_FLD_VOLATILE) != 0)
{
// For any Volatile indirection, we must handle it as a
- // definition of the global heap
- fgCurHeapDef = true;
+ // definition of GcHeap/ByrefExposed
+ fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
}
- // If the GT_CLS_VAR is the lhs of an assignment, we'll handle it as a heap def, when we get to assignment.
+ // If the GT_CLS_VAR is the lhs of an assignment, we'll handle it as a GcHeap/ByrefExposed def, when we get
+ // to the assignment.
// Otherwise, we treat it as a use here.
- if (!fgCurHeapDef && (tree->gtFlags & GTF_CLS_VAR_ASG_LHS) == 0)
+ if ((tree->gtFlags & GTF_CLS_VAR_ASG_LHS) == 0)
{
- fgCurHeapUse = true;
+ fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
}
break;
case GT_IND:
- // For Volatile indirection, first mutate the global heap
+ // For Volatile indirection, first mutate GcHeap/ByrefExposed
// see comments in ValueNum.cpp (under case GT_CLS_VAR)
- // This models Volatile reads as def-then-use of the heap.
+ // This models Volatile reads as def-then-use of memory.
// and allows for a CSE of a subsequent non-volatile read
if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
{
// For any Volatile indirection, we must handle it as a
- // definition of the global heap
- fgCurHeapDef = true;
+ // definition of the GcHeap/ByrefExposed
+ fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
}
// If the GT_IND is the lhs of an assignment, we'll handle it
- // as a heap def, when we get to assignment.
+ // as a memory def, when we get to assignment.
// Otherwise, we treat it as a use here.
if ((tree->gtFlags & GTF_IND_ASG_LHS) == 0)
{
@@ -356,16 +294,13 @@ void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree, GenTree* asgdLclVar)
GenTreePtr addrArg = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
if (!addrArg->DefinesLocalAddr(this, /*width doesn't matter*/ 0, &dummyLclVarTree, &dummyIsEntire))
{
- if (!fgCurHeapDef)
- {
- fgCurHeapUse = true;
- }
+ fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
}
else
{
// Defines a local addr
assert(dummyLclVarTree != nullptr);
- fgMarkUseDef(dummyLclVarTree->AsLclVarCommon(), asgdLclVar);
+ fgMarkUseDef(dummyLclVarTree->AsLclVarCommon());
}
}
break;
@@ -376,25 +311,22 @@ void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree, GenTree* asgdLclVar)
unreached();
break;
- // We'll assume these are use-then-defs of the heap.
+ // We'll assume these are use-then-defs of memory.
case GT_LOCKADD:
case GT_XADD:
case GT_XCHG:
case GT_CMPXCHG:
- if (!fgCurHeapDef)
- {
- fgCurHeapUse = true;
- }
- fgCurHeapDef = true;
- fgCurHeapHavoc = true;
+ fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
+ fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
+ fgCurMemoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
break;
case GT_MEMORYBARRIER:
- // Simliar to any Volatile indirection, we must handle this as a definition of the global heap
- fgCurHeapDef = true;
+ // Simliar to any Volatile indirection, we must handle this as a definition of GcHeap/ByrefExposed
+ fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
break;
- // For now, all calls read/write the heap, the latter in its entirety. Might tighten this case later.
+ // For now, all calls read/write GcHeap/ByrefExposed, writes in their entirety. Might tighten this case later.
case GT_CALL:
{
GenTreeCall* call = tree->AsCall();
@@ -410,12 +342,9 @@ void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree, GenTree* asgdLclVar)
}
if (modHeap)
{
- if (!fgCurHeapDef)
- {
- fgCurHeapUse = true;
- }
- fgCurHeapDef = true;
- fgCurHeapHavoc = true;
+ fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
+ fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
+ fgCurMemoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
}
}
@@ -451,35 +380,32 @@ void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree, GenTree* asgdLclVar)
default:
- // Determine whether it defines a heap location.
+ // Determine what memory locations it defines.
if (tree->OperIsAssignment() || tree->OperIsBlkOp())
{
GenTreeLclVarCommon* dummyLclVarTree = nullptr;
- if (!tree->DefinesLocal(this, &dummyLclVarTree))
+ if (tree->DefinesLocal(this, &dummyLclVarTree))
+ {
+ if (lvaVarAddrExposed(dummyLclVarTree->gtLclNum))
+ {
+ fgCurMemoryDef |= memoryKindSet(ByrefExposed);
+
+ // We've found a store that modifies ByrefExposed
+ // memory but not GcHeap memory, so track their
+ // states separately.
+ byrefStatesMatchGcHeapStates = false;
+ }
+ }
+ else
{
- // If it doesn't define a local, then it might update the heap.
- fgCurHeapDef = true;
+ // If it doesn't define a local, then it might update GcHeap/ByrefExposed.
+ fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
}
}
break;
}
}
-void Compiler::fgPerStatementLocalVarLiveness(GenTree* startNode, GenTree* asgdLclVar)
-{
- // The startNode must be the 1st node of the statement.
- assert(startNode == compCurStmt->gtStmt.gtStmtList);
-
- // The asgdLclVar node must be either nullptr or a GT_LCL_VAR or GT_STORE_LCL_VAR
- assert((asgdLclVar == nullptr) || (asgdLclVar->gtOper == GT_LCL_VAR || asgdLclVar->gtOper == GT_STORE_LCL_VAR));
-
- // We always walk every node in statement list
- for (GenTreePtr node = startNode; node != nullptr; node = node->gtNext)
- {
- fgPerNodeLocalVarLiveness(node, asgdLclVar);
- }
-}
-
#endif // !LEGACY_BACKEND
/*****************************************************************************/
@@ -524,10 +450,10 @@ void Compiler::fgPerBlockLocalVarLiveness()
VarSetOps::Assign(this, block->bbVarDef, liveAll);
VarSetOps::Assign(this, block->bbLiveIn, liveAll);
VarSetOps::Assign(this, block->bbLiveOut, liveAll);
- block->bbHeapUse = true;
- block->bbHeapDef = true;
- block->bbHeapLiveIn = true;
- block->bbHeapLiveOut = true;
+ block->bbMemoryUse = fullMemoryKindSet;
+ block->bbMemoryDef = fullMemoryKindSet;
+ block->bbMemoryLiveIn = fullMemoryKindSet;
+ block->bbMemoryLiveOut = fullMemoryKindSet;
switch (block->bbJumpKind)
{
@@ -540,6 +466,11 @@ void Compiler::fgPerBlockLocalVarLiveness()
break;
}
}
+
+ // In minopts, we don't explicitly build SSA or value-number; GcHeap and
+ // ByrefExposed implicitly (conservatively) change state at each instr.
+ byrefStatesMatchGcHeapStates = true;
+
return;
}
@@ -549,77 +480,34 @@ void Compiler::fgPerBlockLocalVarLiveness()
VarSetOps::AssignNoCopy(this, fgCurUseSet, VarSetOps::MakeEmpty(this));
VarSetOps::AssignNoCopy(this, fgCurDefSet, VarSetOps::MakeEmpty(this));
+ // GC Heap and ByrefExposed can share states unless we see a def of byref-exposed
+ // memory that is not a GC Heap def.
+ byrefStatesMatchGcHeapStates = true;
+
for (block = fgFirstBB; block; block = block->bbNext)
{
- GenTreePtr stmt;
- GenTreePtr tree;
- GenTreePtr asgdLclVar;
-
VarSetOps::ClearD(this, fgCurUseSet);
VarSetOps::ClearD(this, fgCurDefSet);
- fgCurHeapUse = false;
- fgCurHeapDef = false;
- fgCurHeapHavoc = false;
+ fgCurMemoryUse = emptyMemoryKindSet;
+ fgCurMemoryDef = emptyMemoryKindSet;
+ fgCurMemoryHavoc = emptyMemoryKindSet;
compCurBB = block;
-
if (!block->IsLIR())
{
- for (stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
+ for (GenTreeStmt* stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
{
- noway_assert(stmt->gtOper == GT_STMT);
-
compCurStmt = stmt;
- asgdLclVar = nullptr;
- tree = stmt->gtStmt.gtStmtExpr;
- noway_assert(tree);
-
- // The following code checks if we have an assignment expression
- // which may become a GTF_VAR_USEDEF - x=f(x).
- // consider if LHS is local var - ignore if RHS contains SIDE_EFFECTS
-
- if ((tree->gtOper == GT_ASG && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR) ||
- tree->gtOper == GT_STORE_LCL_VAR)
- {
- noway_assert(tree->gtOp.gtOp1);
- GenTreePtr rhsNode;
- if (tree->gtOper == GT_ASG)
- {
- noway_assert(tree->gtOp.gtOp2);
- asgdLclVar = tree->gtOp.gtOp1;
- rhsNode = tree->gtOp.gtOp2;
- }
- else
- {
- asgdLclVar = tree;
- rhsNode = tree->gtOp.gtOp1;
- }
-
- // If this is an assignment to local var with no SIDE EFFECTS,
- // set asgdLclVar so that genMarkUseDef will flag potential
- // x=f(x) expressions as GTF_VAR_USEDEF.
- // Reset the flag before recomputing it - it may have been set before,
- // but subsequent optimizations could have removed the rhs reference.
- asgdLclVar->gtFlags &= ~GTF_VAR_USEDEF;
- if ((rhsNode->gtFlags & GTF_SIDE_EFFECT) == 0)
- {
- noway_assert(asgdLclVar->gtFlags & GTF_VAR_DEF);
- }
- else
- {
- asgdLclVar = nullptr;
- }
- }
-
#ifdef LEGACY_BACKEND
- tree = fgLegacyPerStatementLocalVarLiveness(stmt->gtStmt.gtStmtList, NULL, asgdLclVar);
-
- // We must have walked to the end of this statement.
- noway_assert(!tree);
+ GenTree* tree = fgLegacyPerStatementLocalVarLiveness(stmt->gtStmtList, nullptr);
+ assert(tree == nullptr);
#else // !LEGACY_BACKEND
- fgPerStatementLocalVarLiveness(stmt->gtStmt.gtStmtList, asgdLclVar);
+ for (GenTree* node = stmt->gtStmtList; node != nullptr; node = node->gtNext)
+ {
+ fgPerNodeLocalVarLiveness(node);
+ }
#endif // !LEGACY_BACKEND
}
}
@@ -628,13 +516,9 @@ void Compiler::fgPerBlockLocalVarLiveness()
#ifdef LEGACY_BACKEND
unreached();
#else // !LEGACY_BACKEND
- // NOTE: the `asgdLclVar` analysis done above is not correct for LIR: it depends
- // on all of the nodes that precede `asgdLclVar` in execution order to factor into the
- // dataflow for the value being assigned to the local var, which is not necessarily the
- // case without tree order. As a result, we simply pass `nullptr` for `asgdLclVar`.
for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
{
- fgPerNodeLocalVarLiveness(node, nullptr);
+ fgPerNodeLocalVarLiveness(node);
}
#endif // !LEGACY_BACKEND
}
@@ -667,19 +551,25 @@ void Compiler::fgPerBlockLocalVarLiveness()
printf("BB%02u", block->bbNum);
printf(" USE(%d)=", VarSetOps::Count(this, fgCurUseSet));
lvaDispVarSet(fgCurUseSet, allVars);
- if (fgCurHeapUse)
+ for (MemoryKind memoryKind : allMemoryKinds())
{
- printf(" + HEAP");
+ if ((fgCurMemoryUse & memoryKindSet(memoryKind)) != 0)
+ {
+ printf(" + %s", memoryKindNames[memoryKind]);
+ }
}
printf("\n DEF(%d)=", VarSetOps::Count(this, fgCurDefSet));
lvaDispVarSet(fgCurDefSet, allVars);
- if (fgCurHeapDef)
+ for (MemoryKind memoryKind : allMemoryKinds())
{
- printf(" + HEAP");
- }
- if (fgCurHeapHavoc)
- {
- printf("*");
+ if ((fgCurMemoryDef & memoryKindSet(memoryKind)) != 0)
+ {
+ printf(" + %s", memoryKindNames[memoryKind]);
+ }
+ if ((fgCurMemoryHavoc & memoryKindSet(memoryKind)) != 0)
+ {
+ printf("*");
+ }
}
printf("\n\n");
}
@@ -687,15 +577,23 @@ void Compiler::fgPerBlockLocalVarLiveness()
VarSetOps::Assign(this, block->bbVarUse, fgCurUseSet);
VarSetOps::Assign(this, block->bbVarDef, fgCurDefSet);
- block->bbHeapUse = fgCurHeapUse;
- block->bbHeapDef = fgCurHeapDef;
- block->bbHeapHavoc = fgCurHeapHavoc;
+ block->bbMemoryUse = fgCurMemoryUse;
+ block->bbMemoryDef = fgCurMemoryDef;
+ block->bbMemoryHavoc = fgCurMemoryHavoc;
/* also initialize the IN set, just in case we will do multiple DFAs */
VarSetOps::AssignNoCopy(this, block->bbLiveIn, VarSetOps::MakeEmpty(this));
- block->bbHeapLiveIn = false;
+ block->bbMemoryLiveIn = emptyMemoryKindSet;
}
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("** Memory liveness computed, GcHeap states and ByrefExposed states %s\n",
+ (byrefStatesMatchGcHeapStates ? "match" : "diverge"));
+ }
+#endif // DEBUG
}
// Helper functions to mark variables live over their entire scope
@@ -1226,181 +1124,218 @@ VARSET_VALRET_TP Compiler::fgGetHandlerLiveVars(BasicBlock* block)
return liveVars;
}
-/*****************************************************************************
- *
- * This is the classic algorithm for Live Variable Analysis.
- * If updateInternalOnly==true, only update BBF_INTERNAL blocks.
- */
-
-void Compiler::fgLiveVarAnalysis(bool updateInternalOnly)
+class LiveVarAnalysis
{
- BasicBlock* block;
- bool change;
-#ifdef DEBUG
- VARSET_TP VARSET_INIT_NOCOPY(extraLiveOutFromFinally, VarSetOps::MakeEmpty(this));
-#endif // DEBUG
- bool keepAliveThis = lvaKeepAliveAndReportThis() && lvaTable[info.compThisArg].lvTracked;
+ Compiler* m_compiler;
- /* Live Variable Analysis - Backward dataflow */
+ bool m_hasPossibleBackEdge;
- bool hasPossibleBackEdge = false;
+ unsigned m_memoryLiveIn;
+ unsigned m_memoryLiveOut;
+ VARSET_TP m_liveIn;
+ VARSET_TP m_liveOut;
- do
+ LiveVarAnalysis(Compiler* compiler)
+ : m_compiler(compiler)
+ , m_hasPossibleBackEdge(false)
+ , m_memoryLiveIn(emptyMemoryKindSet)
+ , m_memoryLiveOut(emptyMemoryKindSet)
+ , m_liveIn(VarSetOps::MakeEmpty(compiler))
+ , m_liveOut(VarSetOps::MakeEmpty(compiler))
{
- change = false;
-
- /* Visit all blocks and compute new data flow values */
-
- VARSET_TP VARSET_INIT_NOCOPY(liveIn, VarSetOps::MakeEmpty(this));
- VARSET_TP VARSET_INIT_NOCOPY(liveOut, VarSetOps::MakeEmpty(this));
-
- bool heapLiveIn = false;
- bool heapLiveOut = false;
+ }
- for (block = fgLastBB; block; block = block->bbPrev)
+ bool PerBlockAnalysis(BasicBlock* block, bool updateInternalOnly, bool keepAliveThis)
+ {
+ /* Compute the 'liveOut' set */
+ VarSetOps::ClearD(m_compiler, m_liveOut);
+ m_memoryLiveOut = emptyMemoryKindSet;
+ if (block->endsWithJmpMethod(m_compiler))
{
- // sometimes block numbers are not monotonically increasing which
- // would cause us not to identify backedges
- if (block->bbNext && block->bbNext->bbNum <= block->bbNum)
+ // A JMP uses all the arguments, so mark them all
+ // as live at the JMP instruction
+ //
+ const LclVarDsc* varDscEndParams = m_compiler->lvaTable + m_compiler->info.compArgsCount;
+ for (LclVarDsc* varDsc = m_compiler->lvaTable; varDsc < varDscEndParams; varDsc++)
{
- hasPossibleBackEdge = true;
+ noway_assert(!varDsc->lvPromoted);
+ if (varDsc->lvTracked)
+ {
+ VarSetOps::AddElemD(m_compiler, m_liveOut, varDsc->lvVarIndex);
+ }
}
+ }
- if (updateInternalOnly)
+ // Additionally, union in all the live-in tracked vars of successors.
+ AllSuccessorIter succsEnd = block->GetAllSuccs(m_compiler).end();
+ for (AllSuccessorIter succs = block->GetAllSuccs(m_compiler).begin(); succs != succsEnd; ++succs)
+ {
+ BasicBlock* succ = (*succs);
+ VarSetOps::UnionD(m_compiler, m_liveOut, succ->bbLiveIn);
+ m_memoryLiveOut |= (*succs)->bbMemoryLiveIn;
+ if (succ->bbNum <= block->bbNum)
{
- /* Only update BBF_INTERNAL blocks as they may be
- syntactically out of sequence. */
+ m_hasPossibleBackEdge = true;
+ }
+ }
- noway_assert(opts.compDbgCode && (info.compVarScopesCount > 0));
+ /* For lvaKeepAliveAndReportThis methods, "this" has to be kept alive everywhere
+ Note that a function may end in a throw on an infinite loop (as opposed to a return).
+ "this" has to be alive everywhere even in such methods. */
- if (!(block->bbFlags & BBF_INTERNAL))
- {
- continue;
- }
- }
+ if (keepAliveThis)
+ {
+ VarSetOps::AddElemD(m_compiler, m_liveOut, m_compiler->lvaTable[m_compiler->info.compThisArg].lvVarIndex);
+ }
- /* Compute the 'liveOut' set */
+ /* Compute the 'm_liveIn' set */
+ VarSetOps::Assign(m_compiler, m_liveIn, m_liveOut);
+ VarSetOps::DiffD(m_compiler, m_liveIn, block->bbVarDef);
+ VarSetOps::UnionD(m_compiler, m_liveIn, block->bbVarUse);
- VarSetOps::ClearD(this, liveOut);
- heapLiveOut = false;
- if (block->endsWithJmpMethod(this))
+ // Even if block->bbMemoryDef is set, we must assume that it doesn't kill memory liveness from m_memoryLiveOut,
+ // since (without proof otherwise) the use and def may touch different memory at run-time.
+ m_memoryLiveIn = m_memoryLiveOut | block->bbMemoryUse;
+
+ /* Can exceptions from this block be handled (in this function)? */
+
+ if (m_compiler->ehBlockHasExnFlowDsc(block))
+ {
+ VARSET_TP VARSET_INIT_NOCOPY(liveVars, m_compiler->fgGetHandlerLiveVars(block));
+
+ VarSetOps::UnionD(m_compiler, m_liveIn, liveVars);
+ VarSetOps::UnionD(m_compiler, m_liveOut, liveVars);
+ }
+
+ /* Has there been any change in either live set? */
+
+ bool liveInChanged = !VarSetOps::Equal(m_compiler, block->bbLiveIn, m_liveIn);
+ if (liveInChanged || !VarSetOps::Equal(m_compiler, block->bbLiveOut, m_liveOut))
+ {
+ if (updateInternalOnly)
{
- // A JMP uses all the arguments, so mark them all
- // as live at the JMP instruction
- //
- const LclVarDsc* varDscEndParams = lvaTable + info.compArgsCount;
- for (LclVarDsc* varDsc = lvaTable; varDsc < varDscEndParams; varDsc++)
+ // Only "extend" liveness over BBF_INTERNAL blocks
+
+ noway_assert(block->bbFlags & BBF_INTERNAL);
+
+ liveInChanged =
+ !VarSetOps::Equal(m_compiler, VarSetOps::Intersection(m_compiler, block->bbLiveIn, m_liveIn),
+ m_liveIn);
+ if (liveInChanged ||
+ !VarSetOps::Equal(m_compiler, VarSetOps::Intersection(m_compiler, block->bbLiveOut, m_liveOut),
+ m_liveOut))
{
- noway_assert(!varDsc->lvPromoted);
- if (varDsc->lvTracked)
+#ifdef DEBUG
+ if (m_compiler->verbose)
{
- VarSetOps::AddElemD(this, liveOut, varDsc->lvVarIndex);
+ printf("Scope info: block BB%02u LiveIn+ ", block->bbNum);
+ dumpConvertedVarSet(m_compiler, VarSetOps::Diff(m_compiler, m_liveIn, block->bbLiveIn));
+ printf(", LiveOut+ ");
+ dumpConvertedVarSet(m_compiler, VarSetOps::Diff(m_compiler, m_liveOut, block->bbLiveOut));
+ printf("\n");
}
- }
- }
+#endif // DEBUG
- // Additionally, union in all the live-in tracked vars of successors.
- AllSuccessorIter succsEnd = block->GetAllSuccs(this).end();
- for (AllSuccessorIter succs = block->GetAllSuccs(this).begin(); succs != succsEnd; ++succs)
- {
- BasicBlock* succ = (*succs);
- VarSetOps::UnionD(this, liveOut, succ->bbLiveIn);
- heapLiveOut = heapLiveOut || (*succs)->bbHeapLiveIn;
- if (succ->bbNum <= block->bbNum)
- {
- hasPossibleBackEdge = true;
+ VarSetOps::UnionD(m_compiler, block->bbLiveIn, m_liveIn);
+ VarSetOps::UnionD(m_compiler, block->bbLiveOut, m_liveOut);
}
}
-
- /* For lvaKeepAliveAndReportThis methods, "this" has to be kept alive everywhere
- Note that a function may end in a throw on an infinite loop (as opposed to a return).
- "this" has to be alive everywhere even in such methods. */
-
- if (keepAliveThis)
+ else
{
- VarSetOps::AddElemD(this, liveOut, lvaTable[info.compThisArg].lvVarIndex);
+ VarSetOps::Assign(m_compiler, block->bbLiveIn, m_liveIn);
+ VarSetOps::Assign(m_compiler, block->bbLiveOut, m_liveOut);
}
+ }
- /* Compute the 'liveIn' set */
+ const bool memoryLiveInChanged = (block->bbMemoryLiveIn != m_memoryLiveIn);
+ if (memoryLiveInChanged || (block->bbMemoryLiveOut != m_memoryLiveOut))
+ {
+ block->bbMemoryLiveIn = m_memoryLiveIn;
+ block->bbMemoryLiveOut = m_memoryLiveOut;
+ }
- VarSetOps::Assign(this, liveIn, liveOut);
- VarSetOps::DiffD(this, liveIn, block->bbVarDef);
- VarSetOps::UnionD(this, liveIn, block->bbVarUse);
+ return liveInChanged || memoryLiveInChanged;
+ }
- heapLiveIn = (heapLiveOut && !block->bbHeapDef) || block->bbHeapUse;
+ void Run(bool updateInternalOnly)
+ {
+ const bool keepAliveThis =
+ m_compiler->lvaKeepAliveAndReportThis() && m_compiler->lvaTable[m_compiler->info.compThisArg].lvTracked;
- /* Can exceptions from this block be handled (in this function)? */
+ /* Live Variable Analysis - Backward dataflow */
+ bool changed;
+ do
+ {
+ changed = false;
- if (ehBlockHasExnFlowDsc(block))
- {
- VARSET_TP VARSET_INIT_NOCOPY(liveVars, fgGetHandlerLiveVars(block));
+ /* Visit all blocks and compute new data flow values */
- VarSetOps::UnionD(this, liveIn, liveVars);
- VarSetOps::UnionD(this, liveOut, liveVars);
- }
+ VarSetOps::ClearD(m_compiler, m_liveIn);
+ VarSetOps::ClearD(m_compiler, m_liveOut);
- /* Has there been any change in either live set? */
+ m_memoryLiveIn = emptyMemoryKindSet;
+ m_memoryLiveOut = emptyMemoryKindSet;
- if (!VarSetOps::Equal(this, block->bbLiveIn, liveIn) || !VarSetOps::Equal(this, block->bbLiveOut, liveOut))
+ for (BasicBlock* block = m_compiler->fgLastBB; block; block = block->bbPrev)
{
+ // sometimes block numbers are not monotonically increasing which
+ // would cause us not to identify backedges
+ if (block->bbNext && block->bbNext->bbNum <= block->bbNum)
+ {
+ m_hasPossibleBackEdge = true;
+ }
+
if (updateInternalOnly)
{
- // Only "extend" liveness over BBF_INTERNAL blocks
+ /* Only update BBF_INTERNAL blocks as they may be
+ syntactically out of sequence. */
- noway_assert(block->bbFlags & BBF_INTERNAL);
+ noway_assert(m_compiler->opts.compDbgCode && (m_compiler->info.compVarScopesCount > 0));
- if (!VarSetOps::Equal(this, VarSetOps::Intersection(this, block->bbLiveIn, liveIn), liveIn) ||
- !VarSetOps::Equal(this, VarSetOps::Intersection(this, block->bbLiveOut, liveOut), liveOut))
+ if (!(block->bbFlags & BBF_INTERNAL))
{
-#ifdef DEBUG
- if (verbose)
- {
- printf("Scope info: block BB%02u LiveIn+ ", block->bbNum);
- dumpConvertedVarSet(this, VarSetOps::Diff(this, liveIn, block->bbLiveIn));
- printf(", LiveOut+ ");
- dumpConvertedVarSet(this, VarSetOps::Diff(this, liveOut, block->bbLiveOut));
- printf("\n");
- }
-#endif // DEBUG
-
- VarSetOps::UnionD(this, block->bbLiveIn, liveIn);
- VarSetOps::UnionD(this, block->bbLiveOut, liveOut);
- change = true;
+ continue;
}
}
- else
+
+ if (PerBlockAnalysis(block, updateInternalOnly, keepAliveThis))
{
- VarSetOps::Assign(this, block->bbLiveIn, liveIn);
- VarSetOps::Assign(this, block->bbLiveOut, liveOut);
- change = true;
+ changed = true;
}
}
-
- if ((block->bbHeapLiveIn == 1) != heapLiveIn || (block->bbHeapLiveOut == 1) != heapLiveOut)
+ // if there is no way we could have processed a block without seeing all of its predecessors
+ // then there is no need to iterate
+ if (!m_hasPossibleBackEdge)
{
- block->bbHeapLiveIn = heapLiveIn;
- block->bbHeapLiveOut = heapLiveOut;
- change = true;
+ break;
}
- }
- // if there is no way we could have processed a block without seeing all of its predecessors
- // then there is no need to iterate
- if (!hasPossibleBackEdge)
- {
- break;
- }
- } while (change);
+ } while (changed);
+ }
-//-------------------------------------------------------------------------
+public:
+ static void Run(Compiler* compiler, bool updateInternalOnly)
+ {
+ LiveVarAnalysis analysis(compiler);
+ analysis.Run(updateInternalOnly);
+ }
+};
-#ifdef DEBUG
+/*****************************************************************************
+ *
+ * This is the classic algorithm for Live Variable Analysis.
+ * If updateInternalOnly==true, only update BBF_INTERNAL blocks.
+ */
+
+void Compiler::fgLiveVarAnalysis(bool updateInternalOnly)
+{
+ LiveVarAnalysis::Run(this, updateInternalOnly);
+#ifdef DEBUG
if (verbose && !updateInternalOnly)
{
printf("\nBB liveness after fgLiveVarAnalysis():\n\n");
fgDispBBLiveness();
}
-
#endif // DEBUG
}
@@ -3090,15 +3025,21 @@ void Compiler::fgDispBBLiveness(BasicBlock* block)
printf("BB%02u", block->bbNum);
printf(" IN (%d)=", VarSetOps::Count(this, block->bbLiveIn));
lvaDispVarSet(block->bbLiveIn, allVars);
- if (block->bbHeapLiveIn)
+ for (MemoryKind memoryKind : allMemoryKinds())
{
- printf(" + HEAP");
+ if ((block->bbMemoryLiveIn & memoryKindSet(memoryKind)) != 0)
+ {
+ printf(" + %s", memoryKindNames[memoryKind]);
+ }
}
printf("\n OUT(%d)=", VarSetOps::Count(this, block->bbLiveOut));
lvaDispVarSet(block->bbLiveOut, allVars);
- if (block->bbHeapLiveOut)
+ for (MemoryKind memoryKind : allMemoryKinds())
{
- printf(" + HEAP");
+ if ((block->bbMemoryLiveOut & memoryKindSet(memoryKind)) != 0)
+ {
+ printf(" + %s", memoryKindNames[memoryKind]);
+ }
}
printf("\n\n");
}
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index a6e50b3..0316a34 100644
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -167,8 +167,13 @@ GenTree* Lowering::LowerNode(GenTree* node)
case GT_STORE_BLK:
case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
- LowerBlockStore(node->AsBlk());
- break;
+ {
+ // TODO-Cleanup: Consider moving this code to LowerBlockStore, which is currently
+ // called from TreeNodeInfoInitBlockStore, and calling that method here.
+ GenTreeBlk* blkNode = node->AsBlk();
+ TryCreateAddrMode(LIR::Use(BlockRange(), &blkNode->Addr(), blkNode), false);
+ }
+ break;
#ifdef FEATURE_SIMD
case GT_SIMD:
@@ -236,20 +241,14 @@ GenTree* Lowering::LowerNode(GenTree* node)
unsigned varNum = node->AsLclVarCommon()->GetLclNum();
LclVarDsc* varDsc = &comp->lvaTable[varNum];
-#if defined(_TARGET_64BIT_)
- assert(varDsc->lvSize() == 16);
- node->gtType = TYP_SIMD16;
-#else // !_TARGET_64BIT_
- if (varDsc->lvSize() == 16)
+ if (comp->lvaMapSimd12ToSimd16(varDsc))
{
+ JITDUMP("Mapping TYP_SIMD12 lclvar node to TYP_SIMD16:\n");
+ DISPNODE(node);
+ JITDUMP("============");
+
node->gtType = TYP_SIMD16;
}
- else
- {
- // The following assert is guaranteed by lvSize().
- assert(varDsc->lvIsParam);
- }
-#endif // !_TARGET_64BIT_
}
#endif // FEATURE_SIMD
__fallthrough;
@@ -549,7 +548,7 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
// If the number of possible destinations is small enough, we proceed to expand the switch
// into a series of conditional branches, otherwise we follow the jump table based switch
// transformation.
- else if (jumpCnt < minSwitchTabJumpCnt)
+ else if ((jumpCnt < minSwitchTabJumpCnt) || comp->compStressCompile(Compiler::STRESS_SWITCH_CMP_BR_EXPANSION, 50))
{
// Lower the switch into a series of compare and branch IR trees.
//
@@ -639,7 +638,7 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
GenTreePtr gtCaseBranch = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, gtCaseCond);
LIR::Range caseRange = LIR::SeqTree(comp, gtCaseBranch);
- currentBBRange->InsertAtEnd(std::move(condRange));
+ currentBBRange->InsertAtEnd(std::move(caseRange));
}
}
@@ -757,16 +756,6 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
GenTreePtr putArg = nullptr;
bool updateArgTable = true;
-#if !defined(_TARGET_64BIT_)
- if (varTypeIsLong(type))
- {
- // For TYP_LONG, we leave the GT_LONG as the arg, and put the putArg below it.
- // Therefore, we don't update the arg table entry.
- updateArgTable = false;
- type = TYP_INT;
- }
-#endif // !defined(_TARGET_64BIT_)
-
bool isOnStack = true;
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
if (varTypeIsStruct(type))
@@ -954,6 +943,11 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots) DEBUGARG(call));
#endif
+#if defined(UNIX_X86_ABI)
+ assert((info->padStkAlign > 0 && info->numSlots > 0) || (info->padStkAlign == 0));
+ putArg->AsPutArgStk()->setArgPadding(info->padStkAlign);
+#endif
+
#ifdef FEATURE_PUT_STRUCT_ARG_STK
// If the ArgTabEntry indicates that this arg is a struct
// get and store the number of slots that are references.
@@ -1084,25 +1078,22 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg)
NYI("Lowering of long register argument");
}
- // For longs, we will create two PUTARG_STKs below the GT_LONG. The hi argument needs to
- // be pushed first, so the hi PUTARG_STK will precede the lo PUTARG_STK in execution order.
+ // For longs, we will replace the GT_LONG with a GT_FIELD_LIST, and put that under a PUTARG_STK.
+ // Although the hi argument needs to be pushed first, that will be handled by the general case,
+ // in which the fields will be reversed.
noway_assert(arg->OperGet() == GT_LONG);
- GenTreePtr argLo = arg->gtGetOp1();
- GenTreePtr argHi = arg->gtGetOp2();
-
- GenTreePtr putArgLo = NewPutArg(call, argLo, info, type);
- GenTreePtr putArgHi = NewPutArg(call, argHi, info, type);
-
- arg->gtOp.gtOp1 = putArgLo;
- arg->gtOp.gtOp2 = putArgHi;
-
- BlockRange().InsertBefore(arg, putArgHi, putArgLo);
-
- // The execution order now looks like this:
- // argLoPrev <-> argLoFirst ... argLo <-> argHiFirst ... argHi <-> putArgHi <-> putArgLo <-> arg(GT_LONG)
-
- assert((arg->gtFlags & GTF_REVERSE_OPS) == 0);
- arg->gtFlags |= GTF_REVERSE_OPS; // We consume the high arg (op2) first.
+ assert(info->numSlots == 2);
+ GenTreePtr argLo = arg->gtGetOp1();
+ GenTreePtr argHi = arg->gtGetOp2();
+ GenTreeFieldList* fieldList = new (comp, GT_FIELD_LIST) GenTreeFieldList(argLo, 0, TYP_INT, nullptr);
+ // Only the first fieldList node (GTF_FIELD_LIST_HEAD) is in the instruction sequence.
+ (void)new (comp, GT_FIELD_LIST) GenTreeFieldList(argHi, 4, TYP_INT, fieldList);
+ putArg = NewPutArg(call, fieldList, info, TYP_VOID);
+
+ // We can't call ReplaceArgWithPutArgOrCopy here because it presumes that we are keeping the original arg.
+ BlockRange().InsertBefore(arg, fieldList, putArg);
+ BlockRange().Remove(arg);
+ *ppArg = putArg;
}
else
#endif // !defined(_TARGET_64BIT_)
@@ -1872,6 +1863,7 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget
bool isClosed;
LIR::ReadOnlyRange secondArgRange = BlockRange().GetTreeRange(arg0, &isClosed);
assert(isClosed);
+ BlockRange().Remove(std::move(secondArgRange));
argEntry->node->gtOp.gtOp1 = callTarget;
@@ -1935,251 +1927,439 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget
}
//------------------------------------------------------------------------
-// Lowering::LowerCompare: lowers a compare node.
-//
-// For 64-bit targets, this doesn't do much of anything: all comparisons
-// that we support can be handled in code generation on such targets.
-//
-// For 32-bit targets, however, any comparison that feeds a `GT_JTRUE`
-// node must be lowered such that the liveness of the operands to the
-// comparison is properly visible to the rest of the backend. As such,
-// a 64-bit comparison is lowered from something like this:
-//
-// ------------ BB02 [004..014) -> BB02 (cond), preds={BB02,BB01} succs={BB03,BB02}
-// N001 ( 1, 1) [000006] ------------ t6 = lclVar int V02 loc0 u:5 $148
-//
-// /--* t6 int
-// N002 ( 2, 3) [000007] ---------U-- t7 = * cast long <- ulong <- uint $3c0
+// Lowering::LowerCompare: Lowers a compare node.
//
-// N003 ( 3, 10) [000009] ------------ t9 = lconst long 0x0000000000000003 $101
-//
-// /--* t7 long
-// +--* t9 long
-// N004 ( 9, 17) [000010] N------N-U-- t10 = * < int $149
-//
-// /--* t10 int
-// N005 ( 11, 19) [000011] ------------ * jmpTrue void
-//
-// To something like this:
-//
-// ------------ BB02 [004..014) -> BB03 (cond), preds={BB06,BB07,BB01} succs={BB06,BB03}
-// [000099] ------------ t99 = const int 0
-//
-// [000101] ------------ t101 = const int 0
-//
-// /--* t99 int
-// +--* t101 int
-// N004 ( 9, 17) [000010] N------N-U-- t10 = * > int $149
-//
-// /--* t10 int
-// N005 ( 11, 19) [000011] ------------ * jmpTrue void
-//
-//
-// ------------ BB06 [???..???) -> BB02 (cond), preds={BB02} succs={BB07,BB02}
-// [000105] -------N-U-- jcc void cond=<
-//
-//
-// ------------ BB07 [???..???) -> BB02 (cond), preds={BB06} succs={BB03,BB02}
-// N001 ( 1, 1) [000006] ------------ t6 = lclVar int V02 loc0 u:5 $148
-//
-// N003 ( 3, 10) [000009] ------------ t9 = const int 3
-//
-// /--* t6 int
-// +--* t9 int
-// [000106] N------N-U-- t106 = * < int
-//
-// /--* t106 int
-// [000107] ------------ * jmpTrue void
-//
-// Which will eventually generate code similar to the following:
-//
-// 33DB xor ebx, ebx
-// 85DB test ebx, ebx
-// 7707 ja SHORT G_M50523_IG04
-// 72E7 jb SHORT G_M50523_IG03
-// 83F803 cmp eax, 3
-// 72E2 jb SHORT G_M50523_IG03
+// Arguments:
+// cmp - the compare node
//
+// Notes:
+// - Decomposes long comparisons that feed a GT_JTRUE (32 bit specific).
+// - Ensures that we don't have a mix of int/long operands (XARCH specific).
+// - Narrow operands to enable memory operand containment (XARCH specific).
+// - Transform cmp(and(x, y), 0) into test(x, y) (XARCH specific but could
+// be used for ARM as well if support for GT_TEST_EQ/GT_TEST_NE is added).
+
void Lowering::LowerCompare(GenTree* cmp)
{
#ifndef _TARGET_64BIT_
- if (cmp->gtGetOp1()->TypeGet() != TYP_LONG)
- {
- return;
- }
-
LIR::Use cmpUse;
- if (!BlockRange().TryGetUse(cmp, &cmpUse) || cmpUse.User()->OperGet() != GT_JTRUE)
+ if ((cmp->gtGetOp1()->TypeGet() == TYP_LONG) && BlockRange().TryGetUse(cmp, &cmpUse) &&
+ cmpUse.User()->OperIs(GT_JTRUE))
{
- return;
- }
+ // For 32-bit targets any comparison that feeds a `GT_JTRUE` node must be lowered such that
+ // the liveness of the operands to the comparison is properly visible to the rest of the
+ // backend. As such, a 64-bit comparison is lowered from something like this:
+ //
+ // ------------ BB02 [004..014) -> BB02 (cond), preds={BB02,BB01} succs={BB03,BB02}
+ // N001 ( 1, 1) [000006] ------------ t6 = lclVar int V02 loc0 u:5 $148
+ //
+ // /--* t6 int
+ // N002 ( 2, 3) [000007] ---------U-- t7 = * cast long <- ulong <- uint $3c0
+ //
+ // N003 ( 3, 10) [000009] ------------ t9 = lconst long 0x0000000000000003 $101
+ //
+ // /--* t7 long
+ // +--* t9 long
+ // N004 ( 9, 17) [000010] N------N-U-- t10 = * < int $149
+ //
+ // /--* t10 int
+ // N005 ( 11, 19) [000011] ------------ * jmpTrue void
+ //
+ // To something like this:
+ //
+ // ------------ BB02 [004..014) -> BB03 (cond), preds={BB06,BB07,BB01} succs={BB06,BB03}
+ // [000099] ------------ t99 = const int 0
+ //
+ // [000101] ------------ t101 = const int 0
+ //
+ // /--* t99 int
+ // +--* t101 int
+ // N004 ( 9, 17) [000010] N------N-U-- t10 = * > int $149
+ //
+ // /--* t10 int
+ // N005 ( 11, 19) [000011] ------------ * jmpTrue void
+ //
+ //
+ // ------------ BB06 [???..???) -> BB02 (cond), preds={BB02} succs={BB07,BB02}
+ // [000105] -------N-U-- jcc void cond=<
+ //
+ //
+ // ------------ BB07 [???..???) -> BB02 (cond), preds={BB06} succs={BB03,BB02}
+ // N001 ( 1, 1) [000006] ------------ t6 = lclVar int V02 loc0 u:5 $148
+ //
+ // N003 ( 3, 10) [000009] ------------ t9 = const int 3
+ //
+ // /--* t6 int
+ // +--* t9 int
+ // [000106] N------N-U-- t106 = * < int
+ //
+ // /--* t106 int
+ // [000107] ------------ * jmpTrue void
+ //
+ // Which will eventually generate code similar to the following:
+ //
+ // 33DB xor ebx, ebx
+ // 85DB test ebx, ebx
+ // 7707 ja SHORT G_M50523_IG04
+ // 72E7 jb SHORT G_M50523_IG03
+ // 83F803 cmp eax, 3
+ // 72E2 jb SHORT G_M50523_IG03
+ //
- GenTree* src1 = cmp->gtGetOp1();
- GenTree* src2 = cmp->gtGetOp2();
- unsigned weight = m_block->getBBWeight(comp);
+ GenTree* src1 = cmp->gtGetOp1();
+ GenTree* src2 = cmp->gtGetOp2();
+ unsigned weight = m_block->getBBWeight(comp);
- LIR::Use loSrc1(BlockRange(), &(src1->gtOp.gtOp1), src1);
- LIR::Use loSrc2(BlockRange(), &(src2->gtOp.gtOp1), src2);
+ LIR::Use loSrc1(BlockRange(), &(src1->gtOp.gtOp1), src1);
+ LIR::Use loSrc2(BlockRange(), &(src2->gtOp.gtOp1), src2);
- if (loSrc1.Def()->OperGet() != GT_CNS_INT && loSrc1.Def()->OperGet() != GT_LCL_VAR)
- {
- loSrc1.ReplaceWithLclVar(comp, weight);
- }
+ // TODO-CQ-32bit: We should move more code to the new basic block, currently we're only moving
+ // constants and lclvars. In particular, it would be nice to move GT_AND nodes as that would
+ // enable the and-cmp to test transform that happens later in this function. Though that's not
+ // exactly ideal, the and-cmp to test transform should run before this code but:
+ // - it would need to run before decomposition otherwise it won't recognize the 0 constant
+ // because after decomposition it is packed in a GT_LONG
+ // - this code would also need to handle GT_TEST_EQ/GT_TEST_NE
- if (loSrc2.Def()->OperGet() != GT_CNS_INT && loSrc2.Def()->OperGet() != GT_LCL_VAR)
- {
- loSrc2.ReplaceWithLclVar(comp, weight);
- }
+ if (!loSrc1.Def()->OperIs(GT_CNS_INT, GT_LCL_VAR))
+ {
+ loSrc1.ReplaceWithLclVar(comp, weight);
+ }
+
+ if (!loSrc2.Def()->OperIs(GT_CNS_INT, GT_LCL_VAR))
+ {
+ loSrc2.ReplaceWithLclVar(comp, weight);
+ }
- BasicBlock* jumpDest = m_block->bbJumpDest;
- BasicBlock* nextDest = m_block->bbNext;
- BasicBlock* newBlock = comp->fgSplitBlockAtEnd(m_block);
+ BasicBlock* jumpDest = m_block->bbJumpDest;
+ BasicBlock* nextDest = m_block->bbNext;
+ BasicBlock* newBlock = comp->fgSplitBlockAtEnd(m_block);
- cmp->gtType = TYP_INT;
- cmp->gtOp.gtOp1 = src1->gtOp.gtOp2;
- cmp->gtOp.gtOp2 = src2->gtOp.gtOp2;
+ cmp->gtType = TYP_INT;
+ cmp->gtOp.gtOp1 = src1->gtOp.gtOp2;
+ cmp->gtOp.gtOp2 = src2->gtOp.gtOp2;
- if (cmp->OperGet() == GT_EQ || cmp->OperGet() == GT_NE)
- {
- // 64-bit equality comparisons (no matter the polarity) require two 32-bit comparisons: one for the upper 32
- // bits and one for the lower 32 bits. As such, we update the flow graph like so:
- //
- // Before:
- // BB0: cond
- // / \
- // false true
- // | |
- // BB1 BB2
- //
- // After:
- // BB0: cond(hi)
- // / \
- // false true
- // | |
- // | BB3: cond(lo)
- // | / \
- // | false true
- // \ / |
- // BB1 BB2
- //
+ if (cmp->OperIs(GT_EQ, GT_NE))
+ {
+ // 64-bit equality comparisons (no matter the polarity) require two 32-bit comparisons: one for the upper 32
+ // bits and one for the lower 32 bits. As such, we update the flow graph like so:
+ //
+ // Before:
+ // BB0: cond
+ // / \
+ // false true
+ // | |
+ // BB1 BB2
+ //
+ // After:
+ // BB0: cond(hi)
+ // / \
+ // false true
+ // | |
+ // | BB3: cond(lo)
+ // | / \
+ // | false true
+ // \ / |
+ // BB1 BB2
+ //
- BlockRange().Remove(loSrc1.Def());
- BlockRange().Remove(loSrc2.Def());
- GenTree* loCmp = comp->gtNewOperNode(cmp->OperGet(), TYP_INT, loSrc1.Def(), loSrc2.Def());
- loCmp->gtFlags = cmp->gtFlags;
- GenTree* loJtrue = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, loCmp);
- LIR::AsRange(newBlock).InsertAfter(nullptr, loSrc1.Def(), loSrc2.Def(), loCmp, loJtrue);
+ BlockRange().Remove(loSrc1.Def());
+ BlockRange().Remove(loSrc2.Def());
+ GenTree* loCmp = comp->gtNewOperNode(cmp->OperGet(), TYP_INT, loSrc1.Def(), loSrc2.Def());
+ loCmp->gtFlags = cmp->gtFlags;
+ GenTree* loJtrue = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, loCmp);
+ LIR::AsRange(newBlock).InsertAfter(nullptr, loSrc1.Def(), loSrc2.Def(), loCmp, loJtrue);
- m_block->bbJumpKind = BBJ_COND;
+ m_block->bbJumpKind = BBJ_COND;
+
+ if (cmp->OperIs(GT_EQ))
+ {
+ cmp->gtOper = GT_NE;
+ m_block->bbJumpDest = nextDest;
+ nextDest->bbFlags |= BBF_JMP_TARGET;
+ comp->fgAddRefPred(nextDest, m_block);
+ }
+ else
+ {
+ m_block->bbJumpDest = jumpDest;
+ comp->fgAddRefPred(jumpDest, m_block);
+ }
- if (cmp->OperGet() == GT_EQ)
+ assert(newBlock->bbJumpKind == BBJ_COND);
+ assert(newBlock->bbJumpDest == jumpDest);
+ }
+ else
{
- cmp->gtOper = GT_NE;
+ // 64-bit ordinal comparisons are more complicated: they require two comparisons for the upper 32 bits and
+ // one comparison for the lower 32 bits. We update the flowgraph as such:
+ //
+ // Before:
+ // BB0: cond
+ // / \
+ // false true
+ // | |
+ // BB1 BB2
+ //
+ // After:
+ // BB0: (!cond(hi) && !eq(hi))
+ // / \
+ // true false
+ // | |
+ // | BB3: (cond(hi) && !eq(hi))
+ // | / \
+ // | false true
+ // | | |
+ // | BB4: cond(lo) |
+ // | / \ |
+ // | false true |
+ // \ / \ /
+ // BB1 BB2
+ //
+ //
+ // Note that the actual comparisons used to implement "(!cond(hi) && !eq(hi))" and "(cond(hi) && !eq(hi))"
+ // differ based on the original condition, and all consist of a single node. The switch statement below
+ // performs the necessary mapping.
+ //
+
+ genTreeOps hiCmpOper;
+ genTreeOps loCmpOper;
+
+ switch (cmp->OperGet())
+ {
+ case GT_LT:
+ cmp->gtOper = GT_GT;
+ hiCmpOper = GT_LT;
+ loCmpOper = GT_LT;
+ break;
+ case GT_LE:
+ cmp->gtOper = GT_GT;
+ hiCmpOper = GT_LT;
+ loCmpOper = GT_LE;
+ break;
+ case GT_GT:
+ cmp->gtOper = GT_LT;
+ hiCmpOper = GT_GT;
+ loCmpOper = GT_GT;
+ break;
+ case GT_GE:
+ cmp->gtOper = GT_LT;
+ hiCmpOper = GT_GT;
+ loCmpOper = GT_GE;
+ break;
+ default:
+ unreached();
+ }
+
+ BasicBlock* newBlock2 = comp->fgSplitBlockAtEnd(newBlock);
+
+ GenTree* hiJcc = new (comp, GT_JCC) GenTreeJumpCC(hiCmpOper);
+ hiJcc->gtFlags = cmp->gtFlags;
+ LIR::AsRange(newBlock).InsertAfter(nullptr, hiJcc);
+
+ BlockRange().Remove(loSrc1.Def());
+ BlockRange().Remove(loSrc2.Def());
+ GenTree* loCmp = comp->gtNewOperNode(loCmpOper, TYP_INT, loSrc1.Def(), loSrc2.Def());
+ loCmp->gtFlags = cmp->gtFlags | GTF_UNSIGNED;
+ GenTree* loJtrue = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, loCmp);
+ LIR::AsRange(newBlock2).InsertAfter(nullptr, loSrc1.Def(), loSrc2.Def(), loCmp, loJtrue);
+
+ m_block->bbJumpKind = BBJ_COND;
m_block->bbJumpDest = nextDest;
nextDest->bbFlags |= BBF_JMP_TARGET;
comp->fgAddRefPred(nextDest, m_block);
+
+ newBlock->bbJumpKind = BBJ_COND;
+ newBlock->bbJumpDest = jumpDest;
+ comp->fgAddRefPred(jumpDest, newBlock);
+
+ assert(newBlock2->bbJumpKind == BBJ_COND);
+ assert(newBlock2->bbJumpDest == jumpDest);
}
- else
+
+ BlockRange().Remove(src1);
+ BlockRange().Remove(src2);
+ }
+#endif
+
+#ifdef _TARGET_XARCH_
+#ifdef _TARGET_AMD64_
+ if (cmp->gtGetOp1()->TypeGet() != cmp->gtGetOp2()->TypeGet())
+ {
+ bool op1Is64Bit = (genTypeSize(cmp->gtGetOp1()->TypeGet()) == 8);
+ bool op2Is64Bit = (genTypeSize(cmp->gtGetOp2()->TypeGet()) == 8);
+
+ if (op1Is64Bit != op2Is64Bit)
{
- m_block->bbJumpDest = jumpDest;
- comp->fgAddRefPred(jumpDest, m_block);
- }
+ //
+ // Normally this should not happen. IL allows comparing int32 to native int but the importer
+ // automatically inserts a cast from int32 to long on 64 bit architectures. However, the JIT
+ // accidentally generates int/long comparisons internally:
+ // - loop cloning compares int (and even small int) index limits against long constants
+ // - switch lowering compares a 64 bit switch value against a int32 constant
+ //
+ // TODO-Cleanup: The above mentioned issues should be fixed and then the code below may be
+ // replaced with an assert or at least simplified. The special casing of constants in code
+ // below is only necessary to prevent worse code generation for switches and loop cloning.
+ //
- assert(newBlock->bbJumpKind == BBJ_COND);
- assert(newBlock->bbJumpDest == jumpDest);
+ GenTree* longOp = op1Is64Bit ? cmp->gtOp.gtOp1 : cmp->gtOp.gtOp2;
+ GenTree** smallerOpUse = op2Is64Bit ? &cmp->gtOp.gtOp1 : &cmp->gtOp.gtOp2;
+ var_types smallerType = (*smallerOpUse)->TypeGet();
+
+ assert(genTypeSize(smallerType) < 8);
+
+ if (longOp->IsCnsIntOrI() && genTypeCanRepresentValue(smallerType, longOp->AsIntCon()->IconValue()))
+ {
+ longOp->gtType = smallerType;
+ }
+ else if ((*smallerOpUse)->IsCnsIntOrI())
+ {
+ (*smallerOpUse)->gtType = TYP_LONG;
+ }
+ else
+ {
+ GenTree* cast = comp->gtNewCastNode(TYP_LONG, *smallerOpUse, TYP_LONG);
+ *smallerOpUse = cast;
+ BlockRange().InsertAfter(cast->gtGetOp1(), cast);
+ }
+ }
}
- else
+#endif // _TARGET_AMD64_
+
+ if (cmp->gtGetOp2()->IsIntegralConst())
{
- // 64-bit ordinal comparisons are more complicated: they require two comparisons for the upper 32 bits and one
- // comparison for the lower 32 bits. We update the flowgraph as such:
- //
- // Before:
- // BB0: cond
- // / \
- // false true
- // | |
- // BB1 BB2
- //
- // After:
- // BB0: (!cond(hi) && !eq(hi))
- // / \
- // true false
- // | |
- // | BB3: (cond(hi) && !eq(hi))
- // | / \
- // | false true
- // | | |
- // | BB4: cond(lo) |
- // | / \ |
- // | false true |
- // \ / \ /
- // BB1 BB2
- //
- //
- // Note that the actual comparisons used to implement "(!cond(hi) && !eq(hi))" and "(cond(hi) && !eq(hi))"
- // differ based on the original condition, and all consist of a single node. The switch statement below
- // performs the necessary mapping.
- //
+ GenTree* op1 = cmp->gtGetOp1();
+ var_types op1Type = op1->TypeGet();
+ GenTreeIntCon* op2 = cmp->gtGetOp2()->AsIntCon();
+ ssize_t op2Value = op2->IconValue();
- genTreeOps hiCmpOper;
- genTreeOps loCmpOper;
+ if (op1->isMemoryOp() && varTypeIsSmall(op1Type) && genTypeCanRepresentValue(op1Type, op2Value))
+ {
+ //
+ // If op1's type is small then try to narrow op2 so it has the same type as op1.
+ // Small types are usually used by memory loads and if both compare operands have
+ // the same type then the memory load can be contained. In certain situations
+ // (e.g "cmp ubyte, 200") we also get a smaller instruction encoding.
+ //
- switch (cmp->OperGet())
+ op2->gtType = op1Type;
+ }
+ else if (op1->OperIs(GT_CAST) && !op1->gtOverflow())
{
- case GT_LT:
- cmp->gtOper = GT_GT;
- hiCmpOper = GT_LT;
- loCmpOper = GT_LT;
- break;
- case GT_LE:
- cmp->gtOper = GT_GT;
- hiCmpOper = GT_LT;
- loCmpOper = GT_LE;
- break;
- case GT_GT:
- cmp->gtOper = GT_LT;
- hiCmpOper = GT_GT;
- loCmpOper = GT_GT;
- break;
- case GT_GE:
- cmp->gtOper = GT_LT;
- hiCmpOper = GT_GT;
- loCmpOper = GT_GE;
- break;
- default:
- unreached();
+ GenTreeCast* cast = op1->AsCast();
+ var_types castToType = cast->CastToType();
+ GenTree* castOp = cast->gtGetOp1();
+
+ if (((castToType == TYP_BOOL) || (castToType == TYP_UBYTE)) && FitsIn<UINT8>(op2Value))
+ {
+ //
+ // Since we're going to remove the cast we need to be able to narrow the cast operand
+ // to the cast type. This can be done safely only for certain opers (e.g AND, OR, XOR).
+ // Some opers just can't be narrowed (e.g DIV, MUL) while other could be narrowed but
+ // doing so would produce incorrect results (e.g. RSZ, RSH).
+ //
+ // The below list of handled opers is conservative but enough to handle the most common
+ // situations. In particular this include CALL, sometimes the JIT unnecessarilly widens
+ // the result of bool returning calls.
+ //
+
+ if (castOp->OperIs(GT_CALL, GT_LCL_VAR) || castOp->OperIsLogical() || castOp->isMemoryOp())
+ {
+ assert(!castOp->gtOverflowEx()); // Must not be an overflow checking operation
+
+ castOp->gtType = castToType;
+ cmp->gtOp.gtOp1 = castOp;
+ op2->gtType = castToType;
+
+ BlockRange().Remove(cast);
+ }
+ }
}
+ else if (op1->OperIs(GT_AND) && cmp->OperIs(GT_EQ, GT_NE))
+ {
+ //
+ // Transform ((x AND y) EQ|NE 0) into (x TEST_EQ|TEST_NE y) when possible.
+ //
- BasicBlock* newBlock2 = comp->fgSplitBlockAtEnd(newBlock);
+ GenTree* andOp1 = op1->gtGetOp1();
+ GenTree* andOp2 = op1->gtGetOp2();
- GenTree* hiJcc = new (comp, GT_JCC) GenTreeJumpCC(hiCmpOper);
- hiJcc->gtFlags = cmp->gtFlags;
- LIR::AsRange(newBlock).InsertAfter(nullptr, hiJcc);
+ if (op2Value != 0)
+ {
+ //
+ // If we don't have a 0 compare we can get one by transforming ((x AND mask) EQ|NE mask)
+ // into ((x AND mask) NE|EQ 0) when mask is a single bit.
+ //
- BlockRange().Remove(loSrc1.Def());
- BlockRange().Remove(loSrc2.Def());
- GenTree* loCmp = comp->gtNewOperNode(loCmpOper, TYP_INT, loSrc1.Def(), loSrc2.Def());
- loCmp->gtFlags = cmp->gtFlags | GTF_UNSIGNED;
- GenTree* loJtrue = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, loCmp);
- LIR::AsRange(newBlock2).InsertAfter(nullptr, loSrc1.Def(), loSrc2.Def(), loCmp, loJtrue);
+ if (isPow2(static_cast<size_t>(op2Value)) && andOp2->IsIntegralConst(op2Value))
+ {
+ op2Value = 0;
+ op2->SetIconValue(0);
+ cmp->SetOperRaw(GenTree::ReverseRelop(cmp->OperGet()));
+ }
+ }
- m_block->bbJumpKind = BBJ_COND;
- m_block->bbJumpDest = nextDest;
- nextDest->bbFlags |= BBF_JMP_TARGET;
- comp->fgAddRefPred(nextDest, m_block);
+ if (op2Value == 0)
+ {
+ BlockRange().Remove(op1);
+ BlockRange().Remove(op2);
- newBlock->bbJumpKind = BBJ_COND;
- newBlock->bbJumpDest = jumpDest;
- comp->fgAddRefPred(jumpDest, newBlock);
+ cmp->SetOperRaw(cmp->OperIs(GT_EQ) ? GT_TEST_EQ : GT_TEST_NE);
+ cmp->gtOp.gtOp1 = andOp1;
+ cmp->gtOp.gtOp2 = andOp2;
- assert(newBlock2->bbJumpKind == BBJ_COND);
- assert(newBlock2->bbJumpDest == jumpDest);
+ if (andOp1->isMemoryOp() && andOp2->IsIntegralConst())
+ {
+ //
+ // For "test" we only care about the bits that are set in the second operand (mask).
+ // If the mask fits in a small type then we can narrow both operands to generate a "test"
+ // instruction with a smaller encoding ("test" does not have a r/m32, imm8 form) and avoid
+ // a widening load in some cases.
+ //
+ // For 16 bit operands we narrow only if the memory operand is already 16 bit. This matches
+ // the behavior of a previous implementation and avoids adding more cases where we generate
+ // 16 bit instructions that require a length changing prefix (0x66). These suffer from
+ // significant decoder stalls on Intel CPUs.
+ //
+ // We could also do this for 64 bit masks that fit into 32 bit but it doesn't help.
+ // In such cases morph narrows down the existing GT_AND by inserting a cast between it and
+ // the memory operand so we'd need to add more code to recognize and eliminate that cast.
+ //
+
+ size_t mask = static_cast<size_t>(andOp2->AsIntCon()->IconValue());
+
+ if (FitsIn<UINT8>(mask))
+ {
+ andOp1->gtType = TYP_UBYTE;
+ andOp2->gtType = TYP_UBYTE;
+ }
+ else if (FitsIn<UINT16>(mask) && genTypeSize(andOp1) == 2)
+ {
+ andOp1->gtType = TYP_CHAR;
+ andOp2->gtType = TYP_CHAR;
+ }
+ }
+ }
+ }
}
- BlockRange().Remove(src1);
- BlockRange().Remove(src2);
-#endif
+ if (cmp->gtGetOp1()->TypeGet() == cmp->gtGetOp2()->TypeGet())
+ {
+ if (varTypeIsSmall(cmp->gtGetOp1()->TypeGet()) && varTypeIsUnsigned(cmp->gtGetOp1()->TypeGet()))
+ {
+ //
+ // If both operands have the same type then codegen will use the common operand type to
+ // determine the instruction type. For small types this would result in performing a
+ // signed comparison of two small unsigned values without zero extending them to TYP_INT
+ // which is incorrect. Note that making the comparison unsigned doesn't imply that codegen
+ // has to generate a small comparison, it can still correctly generate a TYP_INT comparison.
+ //
+
+ cmp->gtFlags |= GTF_UNSIGNED;
+ }
+ }
+#endif // _TARGET_XARCH_
}
// Lower "jmp <method>" tail call to insert PInvoke method epilog if required.
@@ -3498,18 +3678,19 @@ GenTree* Lowering::TryCreateAddrMode(LIR::Use&& use, bool isIndir)
// make sure there are not any side effects between def of leaves and use
if (!doAddrMode || AreSourcesPossiblyModifiedLocals(addr, base, index))
{
- JITDUMP(" No addressing mode\n");
+ JITDUMP("No addressing mode:\n ");
+ DISPNODE(addr);
return addr;
}
GenTreePtr arrLength = nullptr;
JITDUMP("Addressing mode:\n");
- JITDUMP(" Base\n");
+ JITDUMP(" Base\n ");
DISPNODE(base);
if (index != nullptr)
{
- JITDUMP(" + Index * %u + %u\n", scale, offset);
+ JITDUMP(" + Index * %u + %u\n ", scale, offset);
DISPNODE(index);
}
else
@@ -4023,12 +4204,6 @@ void Lowering::LowerStoreInd(GenTree* node)
node->AsStoreInd()->SetRMWStatusDefault();
}
-void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
-{
- GenTree* src = blkNode->Data();
- TryCreateAddrMode(LIR::Use(BlockRange(), &blkNode->Addr(), blkNode), false);
-}
-
//------------------------------------------------------------------------
// LowerArrElem: Lower a GT_ARR_ELEM node
//
@@ -4303,13 +4478,12 @@ void Lowering::DoPhase()
m_block = block;
for (GenTree* node : BlockRange().NonPhiNodes())
{
-/* We increment the number position of each tree node by 2 to
-* simplify the logic when there's the case of a tree that implicitly
-* does a dual-definition of temps (the long case). In this case
-* is easier to already have an idle spot to handle a dual-def instead
-* of making some messy adjustments if we only increment the
-* number position by one.
-*/
+ // We increment the number position of each tree node by 2 to simplify the logic when there's the case of
+ // a tree that implicitly does a dual-definition of temps (the long case). In this case it is easier to
+ // already have an idle spot to handle a dual-def instead of making some messy adjustments if we only
+ // increment the number position by one.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef DEBUG
node->gtSeqNum = currentLoc;
#endif
@@ -4633,13 +4807,8 @@ bool Lowering::NodesAreEquivalentLeaves(GenTreePtr tree1, GenTreePtr tree2)
}
}
-#ifdef _TARGET_64BIT_
/**
* Get common information required to handle a cast instruction
- *
- * Right now only supports 64 bit targets. In order to support 32 bit targets the
- * switch statement needs work.
- *
*/
void Lowering::getCastDescription(GenTreePtr treeNode, CastInfo* castInfo)
{
@@ -4675,7 +4844,6 @@ void Lowering::getCastDescription(GenTreePtr treeNode, CastInfo* castInfo)
bool signCheckOnly = false;
// Do we need to compare the value, or just check masks
-
switch (dstType)
{
default:
@@ -4709,9 +4877,13 @@ void Lowering::getCastDescription(GenTreePtr treeNode, CastInfo* castInfo)
}
else
{
+#ifdef _TARGET_64BIT_
typeMask = 0xFFFFFFFF80000000LL;
- typeMin = INT_MIN;
- typeMax = INT_MAX;
+#else
+ typeMask = 0x80000000;
+#endif
+ typeMin = INT_MIN;
+ typeMax = INT_MAX;
}
break;
@@ -4722,7 +4894,11 @@ void Lowering::getCastDescription(GenTreePtr treeNode, CastInfo* castInfo)
}
else
{
+#ifdef _TARGET_64BIT_
typeMask = 0xFFFFFFFF00000000LL;
+#else
+ typeMask = 0x00000000;
+#endif
}
break;
@@ -4746,8 +4922,6 @@ void Lowering::getCastDescription(GenTreePtr treeNode, CastInfo* castInfo)
}
}
-#endif // _TARGET_64BIT_
-
#ifdef DEBUG
void Lowering::DumpNodeInfoMap()
{
diff --git a/src/jit/lower.h b/src/jit/lower.h
index c1cafb4..57b4127 100644
--- a/src/jit/lower.h
+++ b/src/jit/lower.h
@@ -45,9 +45,7 @@ public:
bool signCheckOnly; // For converting between unsigned/signed int
};
-#ifdef _TARGET_64BIT_
static void getCastDescription(GenTreePtr treeNode, CastInfo* castInfo);
-#endif // _TARGET_64BIT_
private:
#ifdef DEBUG
@@ -168,7 +166,7 @@ private:
// operands.
//
// Arguments:
- // tree - Gentree of a bininary operation.
+ // tree - Gentree of a binary operation.
//
// Returns
// None.
@@ -194,6 +192,7 @@ private:
}
}
#endif // defined(_TARGET_XARCH_)
+ void TreeNodeInfoInitStoreLoc(GenTree* tree);
void TreeNodeInfoInitReturn(GenTree* tree);
void TreeNodeInfoInitShiftRotate(GenTree* tree);
void TreeNodeInfoInitCall(GenTreeCall* call);
@@ -203,14 +202,26 @@ private:
void TreeNodeInfoInitLogicalOp(GenTree* tree);
void TreeNodeInfoInitModDiv(GenTree* tree);
void TreeNodeInfoInitIntrinsic(GenTree* tree);
+ void TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* tree);
+ void TreeNodeInfoInitIndir(GenTree* indirTree);
+ void TreeNodeInfoInitGCWriteBarrier(GenTree* tree);
+#if !CPU_LOAD_STORE_ARCH
+ bool TreeNodeInfoInitIfRMWMemOp(GenTreePtr storeInd);
+#endif
#ifdef FEATURE_SIMD
void TreeNodeInfoInitSIMD(GenTree* tree);
#endif // FEATURE_SIMD
void TreeNodeInfoInitCast(GenTree* tree);
#ifdef _TARGET_ARM64_
+ void LowerPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info);
+ void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info);
+#endif // _TARGET_ARM64_
+#ifdef _TARGET_ARM_
+ void LowerPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info);
void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info);
#endif // _TARGET_ARM64_
#ifdef FEATURE_PUT_STRUCT_ARG_STK
+ void LowerPutArgStk(GenTreePutArgStk* tree);
void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* tree);
#endif // FEATURE_PUT_STRUCT_ARG_STK
void TreeNodeInfoInitLclHeap(GenTree* tree);
@@ -231,18 +242,16 @@ private:
void LowerCast(GenTree* node);
#if defined(_TARGET_XARCH_)
- void SetMulOpCounts(GenTreePtr tree);
+ void TreeNodeInfoInitMul(GenTreePtr tree);
+ void SetContainsAVXFlags(bool isFloatingPointType = true, unsigned sizeOfSIMDVector = 0);
#endif // defined(_TARGET_XARCH_)
#if !CPU_LOAD_STORE_ARCH
bool IsRMWIndirCandidate(GenTree* operand, GenTree* storeInd);
bool IsBinOpInRMWStoreInd(GenTreePtr tree);
bool IsRMWMemOpRootedAtStoreInd(GenTreePtr storeIndTree, GenTreePtr* indirCandidate, GenTreePtr* indirOpSource);
- bool SetStoreIndOpCountsIfRMWMemOp(GenTreePtr storeInd);
#endif
void LowerStoreLoc(GenTreeLclVarCommon* tree);
- void SetIndirAddrOpCounts(GenTree* indirTree);
- void LowerGCWriteBarrier(GenTree* tree);
GenTree* LowerArrElem(GenTree* node);
void LowerRotate(GenTree* tree);
diff --git a/src/jit/lowerarm.cpp b/src/jit/lowerarm.cpp
index 5bf23c4..9792b8a 100644
--- a/src/jit/lowerarm.cpp
+++ b/src/jit/lowerarm.cpp
@@ -23,8 +23,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
-// The ARM backend is not yet implemented, so the methods here are all NYI.
-// TODO-ARM-NYI: Lowering for ARM.
#ifdef _TARGET_ARM_
#include "jit.h"
@@ -33,6 +31,68 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "lsra.h"
//------------------------------------------------------------------------
+// LowerStoreLoc: Lower a store of a lclVar
+//
+// Arguments:
+// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR)
+//
+// Notes:
+// This involves:
+// - Widening operations of unsigneds.
+//
+void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
+{
+ // Try to widen the ops if they are going into a local var.
+ GenTree* op1 = storeLoc->gtGetOp1();
+ if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (op1->gtOper == GT_CNS_INT))
+ {
+ GenTreeIntCon* con = op1->AsIntCon();
+ ssize_t ival = con->gtIconVal;
+ unsigned varNum = storeLoc->gtLclNum;
+ LclVarDsc* varDsc = comp->lvaTable + varNum;
+
+ if (varDsc->lvIsSIMDType())
+ {
+ noway_assert(storeLoc->gtType != TYP_STRUCT);
+ }
+ unsigned size = genTypeSize(storeLoc);
+ // If we are storing a constant into a local variable
+ // we extend the size of the store here
+ if ((size < 4) && !varTypeIsStruct(varDsc))
+ {
+ if (!varTypeIsUnsigned(varDsc))
+ {
+ if (genTypeSize(storeLoc) == 1)
+ {
+ if ((ival & 0x7f) != ival)
+ {
+ ival = ival | 0xffffff00;
+ }
+ }
+ else
+ {
+ assert(genTypeSize(storeLoc) == 2);
+ if ((ival & 0x7fff) != ival)
+ {
+ ival = ival | 0xffff0000;
+ }
+ }
+ }
+
+ // A local stack slot is at least 4 bytes in size, regardless of
+ // what the local var is typed as, so auto-promote it here
+ // unless it is a field of a promoted struct
+ // TODO-ARM-CQ: if the field is promoted shouldn't we also be able to do this?
+ if (!varDsc->lvIsStructField)
+ {
+ storeLoc->gtType = TYP_INT;
+ con->SetIconValue(ival);
+ }
+ }
+ }
+}
+
+//------------------------------------------------------------------------
// LowerCast: Lower GT_CAST(srcType, DstType) nodes.
//
// Arguments:
@@ -57,7 +117,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
// Note that for the overflow conversions we still depend on helper calls and
// don't expect to see them here.
// i) GT_CAST(float/double, int type with overflow detection)
-
+//
void Lowering::LowerCast(GenTree* tree)
{
assert(tree->OperGet() == GT_CAST);
@@ -71,10 +131,8 @@ void Lowering::LowerCast(GenTree* tree)
var_types srcType = op1->TypeGet();
var_types tmpType = TYP_UNDEF;
- // TODO-ARM-Cleanup: Remove following NYI assertions.
if (varTypeIsFloating(srcType))
{
- NYI_ARM("Lowering for cast from float"); // Not tested yet.
noway_assert(!tree->gtOverflow());
}
@@ -104,36 +162,78 @@ void Lowering::LowerCast(GenTree* tree)
}
}
+//------------------------------------------------------------------------
+// LowerRotate: Lower GT_ROL and GT_ROL nodes.
+//
+// Arguments:
+// tree - the node to lower
+//
+// Return Value:
+// None.
+//
void Lowering::LowerRotate(GenTreePtr tree)
{
- NYI_ARM("ARM Lowering for ROL and ROR");
-}
+ if (tree->OperGet() == GT_ROL)
+ {
+ // There is no ROL instruction on ARM. Convert ROL into ROR.
+ GenTreePtr rotatedValue = tree->gtOp.gtOp1;
+ unsigned rotatedValueBitSize = genTypeSize(rotatedValue->gtType) * 8;
+ GenTreePtr rotateLeftIndexNode = tree->gtOp.gtOp2;
-void Lowering::TreeNodeInfoInit(GenTree* stmt)
-{
- NYI("ARM TreeNodInfoInit");
+ if (rotateLeftIndexNode->IsCnsIntOrI())
+ {
+ ssize_t rotateLeftIndex = rotateLeftIndexNode->gtIntCon.gtIconVal;
+ ssize_t rotateRightIndex = rotatedValueBitSize - rotateLeftIndex;
+ rotateLeftIndexNode->gtIntCon.gtIconVal = rotateRightIndex;
+ }
+ else
+ {
+ GenTreePtr tmp =
+ comp->gtNewOperNode(GT_NEG, genActualType(rotateLeftIndexNode->gtType), rotateLeftIndexNode);
+ BlockRange().InsertAfter(rotateLeftIndexNode, tmp);
+ tree->gtOp.gtOp2 = tmp;
+ }
+ tree->ChangeOper(GT_ROR);
+ }
}
-// returns true if the tree can use the read-modify-write memory instruction form
-bool Lowering::isRMWRegOper(GenTreePtr tree)
+//------------------------------------------------------------------------
+// LowerPutArgStk: Lower a GT_PUTARG_STK node
+//
+// Arguments:
+// argNode - a GT_PUTARG_STK node
+//
+// Return Value:
+// None.
+//
+// Notes:
+// There is currently no Lowering required for this on ARM.
+//
+void Lowering::LowerPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info)
{
- return false;
}
+//------------------------------------------------------------------------
+// IsCallTargetInRange: Can a call target address be encoded in-place?
+//
+// Return Value:
+// True if the addr fits into the range.
+//
bool Lowering::IsCallTargetInRange(void* addr)
{
return comp->codeGen->validImmForBL((ssize_t)addr);
}
-// return true if the immediate can be folded into an instruction, for example small enough and non-relocatable
+//------------------------------------------------------------------------
+// IsContainableImmed: Is an immediate encodable in-place?
+//
+// Return Value:
+// True if the immediate can be folded into an instruction,
+// for example small enough and non-relocatable.
bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode)
{
if (varTypeIsFloating(parentNode->TypeGet()))
{
- // TODO-ARM-Cleanup: not tested yet.
- NYI_ARM("ARM IsContainableImmed for floating point type");
-
- // We can contain a floating point 0.0 constant in a compare instruction
switch (parentNode->OperGet())
{
default:
@@ -146,7 +246,12 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode)
case GT_GE:
case GT_GT:
if (childNode->IsIntegralConst(0))
+ {
+ // TODO-ARM-Cleanup: not tested yet.
+ NYI_ARM("ARM IsContainableImmed for floating point type");
+ // We can contain a floating point 0.0 constant in a compare instruction
return true;
+ }
break;
}
}
@@ -185,13 +290,6 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode)
if (emitter::emitIns_valid_imm_for_alu(immVal))
return true;
break;
-
- case GT_STORE_LCL_VAR:
- // TODO-ARM-Cleanup: not tested yet
- NYI_ARM("ARM IsContainableImmed for GT_STORE_LCL_VAR");
- if (immVal == 0)
- return true;
- break;
}
}
diff --git a/src/jit/lowerarm64.cpp b/src/jit/lowerarm64.cpp
index cc9e226..f5bc55e 100644
--- a/src/jit/lowerarm64.cpp
+++ b/src/jit/lowerarm64.cpp
@@ -29,34 +29,20 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "sideeffects.h"
#include "lower.h"
-// there is not much lowering to do with storing a local but
-// we do some handling of contained immediates and widening operations of unsigneds
+//------------------------------------------------------------------------
+// LowerStoreLoc: Lower a store of a lclVar
+//
+// Arguments:
+// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR)
+//
+// Notes:
+// This involves:
+// - Widening operations of unsigneds.
+
void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
{
- TreeNodeInfo* info = &(storeLoc->gtLsraInfo);
-
- // Is this the case of var = call where call is returning
- // a value in multiple return registers?
- GenTree* op1 = storeLoc->gtGetOp1();
- if (op1->IsMultiRegCall())
- {
- // backend expects to see this case only for store lclvar.
- assert(storeLoc->OperGet() == GT_STORE_LCL_VAR);
-
- // srcCount = number of registers in which the value is returned by call
- GenTreeCall* call = op1->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- info->srcCount = retTypeDesc->GetReturnRegCount();
-
- // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1
- regMaskTP srcCandidates = m_lsra->allMultiRegCallNodeRegs(call);
- op1->gtLsraInfo.setSrcCandidates(m_lsra, srcCandidates);
- return;
- }
-
- CheckImmedAndMakeContained(storeLoc, op1);
-
// Try to widen the ops if they are going into a local var.
+ GenTree* op1 = storeLoc->gtGetOp1();
if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (op1->gtOper == GT_CNS_INT))
{
GenTreeIntCon* con = op1->AsIntCon();
@@ -105,1120 +91,8 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
}
}
-/**
- * Takes care of annotating the register requirements
- * for every TreeNodeInfo struct that maps to each tree node.
- * Preconditions:
- * LSRA has been initialized and there is a TreeNodeInfo node
- * already allocated and initialized for every tree in the IR.
- * Postconditions:
- * Every TreeNodeInfo instance has the right annotations on register
- * requirements needed by LSRA to build the Interval Table (source,
- * destination and internal [temp] register counts).
- * This code is refactored originally from LSRA.
- */
-void Lowering::TreeNodeInfoInit(GenTree* tree)
-{
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
-
- unsigned kind = tree->OperKind();
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- RegisterType registerType = TypeGet(tree);
-
- JITDUMP("TreeNodeInfoInit for: ");
- DISPNODE(tree);
- JITDUMP("\n");
-
- switch (tree->OperGet())
- {
- GenTree* op1;
- GenTree* op2;
-
- default:
- info->dstCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
- if (kind & (GTK_CONST | GTK_LEAF))
- {
- info->srcCount = 0;
- }
- else if (kind & (GTK_SMPOP))
- {
- if (tree->gtGetOp2() != nullptr)
- {
- info->srcCount = 2;
- }
- else
- {
- info->srcCount = 1;
- }
- }
- else
- {
- unreached();
- }
- break;
-
- case GT_STORE_LCL_FLD:
- case GT_STORE_LCL_VAR:
- info->srcCount = 1;
- info->dstCount = 0;
- LowerStoreLoc(tree->AsLclVarCommon());
- break;
-
- case GT_BOX:
- noway_assert(!"box should not exist here");
- // The result of 'op1' is also the final result
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_PHYSREGDST:
- info->srcCount = 1;
- info->dstCount = 0;
- break;
-
- case GT_COMMA:
- {
- GenTreePtr firstOperand;
- GenTreePtr secondOperand;
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- firstOperand = tree->gtOp.gtOp2;
- secondOperand = tree->gtOp.gtOp1;
- }
- else
- {
- firstOperand = tree->gtOp.gtOp1;
- secondOperand = tree->gtOp.gtOp2;
- }
- if (firstOperand->TypeGet() != TYP_VOID)
- {
- firstOperand->gtLsraInfo.isLocalDefUse = true;
- firstOperand->gtLsraInfo.dstCount = 0;
- }
- if (tree->TypeGet() == TYP_VOID && secondOperand->TypeGet() != TYP_VOID)
- {
- secondOperand->gtLsraInfo.isLocalDefUse = true;
- secondOperand->gtLsraInfo.dstCount = 0;
- }
- }
-
- __fallthrough;
-
- case GT_LIST:
- case GT_FIELD_LIST:
- case GT_ARGPLACE:
- case GT_NO_OP:
- case GT_START_NONGC:
- case GT_PROF_HOOK:
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_CNS_DBL:
- info->srcCount = 0;
- info->dstCount = 1;
- {
- GenTreeDblCon* dblConst = tree->AsDblCon();
- double constValue = dblConst->gtDblCon.gtDconVal;
-
- if (emitter::emitIns_valid_imm_for_fmov(constValue))
- {
- // Directly encode constant to instructions.
- }
- else
- {
- // Reserve int to load constant from memory (IF_LARGELDC)
- info->internalIntCount = 1;
- }
- }
- break;
-
- case GT_QMARK:
- case GT_COLON:
- info->srcCount = 0;
- info->dstCount = 0;
- unreached();
- break;
-
- case GT_RETURN:
- TreeNodeInfoInitReturn(tree);
- break;
-
- case GT_RETFILT:
- if (tree->TypeGet() == TYP_VOID)
- {
- info->srcCount = 0;
- info->dstCount = 0;
- }
- else
- {
- assert(tree->TypeGet() == TYP_INT);
-
- info->srcCount = 1;
- info->dstCount = 0;
-
- info->setSrcCandidates(l, RBM_INTRET);
- tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, RBM_INTRET);
- }
- break;
-
- case GT_NOP:
- // A GT_NOP is either a passthrough (if it is void, or if it has
- // a child), but must be considered to produce a dummy value if it
- // has a type but no child
- info->srcCount = 0;
- if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr)
- {
- info->dstCount = 1;
- }
- else
- {
- info->dstCount = 0;
- }
- break;
-
- case GT_JTRUE:
- info->srcCount = 0;
- info->dstCount = 0;
- l->clearDstCount(tree->gtOp.gtOp1);
- break;
-
- case GT_JMP:
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_SWITCH:
- // This should never occur since switch nodes must not be visible at this
- // point in the JIT.
- info->srcCount = 0;
- info->dstCount = 0; // To avoid getting uninit errors.
- noway_assert(!"Switch must be lowered at this point");
- break;
-
- case GT_JMPTABLE:
- info->srcCount = 0;
- info->dstCount = 1;
- break;
-
- case GT_SWITCH_TABLE:
- info->srcCount = 2;
- info->internalIntCount = 1;
- info->dstCount = 0;
- break;
-
- case GT_ASG:
- case GT_ASG_ADD:
- case GT_ASG_SUB:
- noway_assert(!"We should never hit any assignment operator in lowering");
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_ADD:
- case GT_SUB:
- if (varTypeIsFloating(tree->TypeGet()))
- {
- // overflow operations aren't supported on float/double types.
- assert(!tree->gtOverflow());
-
- // No implicit conversions at this stage as the expectation is that
- // everything is made explicit by adding casts.
- assert(tree->gtOp.gtOp1->TypeGet() == tree->gtOp.gtOp2->TypeGet());
-
- info->srcCount = 2;
- info->dstCount = 1;
-
- break;
- }
-
- __fallthrough;
-
- case GT_AND:
- case GT_OR:
- case GT_XOR:
- info->srcCount = 2;
- info->dstCount = 1;
- // Check and make op2 contained (if it is a containable immediate)
- CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
- break;
-
- case GT_RETURNTRAP:
- // this just turns into a compare of its child with an int
- // + a conditional call
- info->srcCount = 1;
- info->dstCount = 0;
- break;
-
- case GT_MOD:
- case GT_UMOD:
- NYI_IF(varTypeIsFloating(tree->TypeGet()), "FP Remainder in ARM64");
- assert(!"Shouldn't see an integer typed GT_MOD node in ARM64");
- break;
-
- case GT_MUL:
- if (tree->gtOverflow())
- {
- // Need a register different from target reg to check for overflow.
- info->internalIntCount = 2;
- }
- __fallthrough;
-
- case GT_DIV:
- case GT_MULHI:
- case GT_UDIV:
- {
- info->srcCount = 2;
- info->dstCount = 1;
- }
- break;
-
- case GT_INTRINSIC:
- {
- // TODO-ARM64-NYI
- // Right now only Abs/Round/Sqrt are treated as math intrinsics
- noway_assert((tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs) ||
- (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round) ||
- (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Sqrt));
-
- // Both operand and its result must be of the same floating point type.
- op1 = tree->gtOp.gtOp1;
- assert(varTypeIsFloating(op1));
- assert(op1->TypeGet() == tree->TypeGet());
-
- info->srcCount = 1;
- info->dstCount = 1;
- }
- break;
-
-#ifdef FEATURE_SIMD
- case GT_SIMD:
- TreeNodeInfoInitSIMD(tree);
- break;
-#endif // FEATURE_SIMD
-
- case GT_CAST:
- {
- // TODO-ARM64-CQ: Int-To-Int conversions - castOp cannot be a memory op and must have an assigned
- // register.
- // see CodeGen::genIntToIntCast()
-
- info->srcCount = 1;
- info->dstCount = 1;
-
- // Non-overflow casts to/from float/double are done using SSE2 instructions
- // and that allow the source operand to be either a reg or memop. Given the
- // fact that casts from small int to float/double are done as two-level casts,
- // the source operand is always guaranteed to be of size 4 or 8 bytes.
- var_types castToType = tree->CastToType();
- GenTreePtr castOp = tree->gtCast.CastOp();
- var_types castOpType = castOp->TypeGet();
- if (tree->gtFlags & GTF_UNSIGNED)
- {
- castOpType = genUnsignedType(castOpType);
- }
-#ifdef DEBUG
- if (!tree->gtOverflow() && (varTypeIsFloating(castToType) || varTypeIsFloating(castOpType)))
- {
- // If converting to float/double, the operand must be 4 or 8 byte in size.
- if (varTypeIsFloating(castToType))
- {
- unsigned opSize = genTypeSize(castOpType);
- assert(opSize == 4 || opSize == 8);
- }
- }
-#endif // DEBUG
- // Some overflow checks need a temp reg
-
- CastInfo castInfo;
-
- // Get information about the cast.
- getCastDescription(tree, &castInfo);
-
- if (castInfo.requiresOverflowCheck)
- {
- var_types srcType = castOp->TypeGet();
- emitAttr cmpSize = EA_ATTR(genTypeSize(srcType));
-
- // If we cannot store the comparisons in an immediate for either
- // comparing against the max or min value, then we will need to
- // reserve a temporary register.
-
- bool canStoreMaxValue = emitter::emitIns_valid_imm_for_cmp(castInfo.typeMax, cmpSize);
- bool canStoreMinValue = emitter::emitIns_valid_imm_for_cmp(castInfo.typeMin, cmpSize);
-
- if (!canStoreMaxValue || !canStoreMinValue)
- {
- info->internalIntCount = 1;
- }
- }
- }
- break;
-
- case GT_NEG:
- info->srcCount = 1;
- info->dstCount = 1;
- break;
-
- case GT_NOT:
- info->srcCount = 1;
- info->dstCount = 1;
- break;
-
- case GT_LSH:
- case GT_RSH:
- case GT_RSZ:
- case GT_ROR:
- {
- info->srcCount = 2;
- info->dstCount = 1;
-
- GenTreePtr shiftBy = tree->gtOp.gtOp2;
- GenTreePtr source = tree->gtOp.gtOp1;
- if (shiftBy->IsCnsIntOrI())
- {
- l->clearDstCount(shiftBy);
- info->srcCount--;
- }
- }
- break;
-
- case GT_EQ:
- case GT_NE:
- case GT_LT:
- case GT_LE:
- case GT_GE:
- case GT_GT:
- TreeNodeInfoInitCmp(tree);
- break;
-
- case GT_CKFINITE:
- info->srcCount = 1;
- info->dstCount = 1;
- info->internalIntCount = 1;
- break;
-
- case GT_CMPXCHG:
- info->srcCount = 3;
- info->dstCount = 1;
-
- // TODO-ARM64-NYI
- NYI("CMPXCHG");
- break;
-
- case GT_LOCKADD:
- info->srcCount = 2;
- info->dstCount = 0;
- CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
- break;
-
- case GT_CALL:
- TreeNodeInfoInitCall(tree->AsCall());
- break;
-
- case GT_ADDR:
- {
- // For a GT_ADDR, the child node should not be evaluated into a register
- GenTreePtr child = tree->gtOp.gtOp1;
- assert(!l->isCandidateLocalRef(child));
- l->clearDstCount(child);
- info->srcCount = 0;
- info->dstCount = 1;
- }
- break;
-
- case GT_BLK:
- case GT_DYN_BLK:
- // These should all be eliminated prior to Lowering.
- assert(!"Non-store block node in Lowering");
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_STORE_BLK:
- case GT_STORE_OBJ:
- case GT_STORE_DYN_BLK:
- TreeNodeInfoInitBlockStore(tree->AsBlk());
- break;
-
- case GT_INIT_VAL:
- // Always a passthrough of its child's value.
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_LCLHEAP:
- {
- info->srcCount = 1;
- info->dstCount = 1;
-
- // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
- // Here '-' means don't care.
- //
- // Size? Init Memory? # temp regs
- // 0 - 0
- // const and <=6 ptr words - 0
- // const and <PageSize No 0
- // >6 ptr words Yes hasPspSym ? 1 : 0
- // Non-const Yes hasPspSym ? 1 : 0
- // Non-const No 2
- //
- // PSPSym - If the method has PSPSym increment internalIntCount by 1.
- //
- bool hasPspSym;
-#if FEATURE_EH_FUNCLETS
- hasPspSym = (compiler->lvaPSPSym != BAD_VAR_NUM);
-#else
- hasPspSym = false;
-#endif
-
- GenTreePtr size = tree->gtOp.gtOp1;
- if (size->IsCnsIntOrI())
- {
- MakeSrcContained(tree, size);
-
- size_t sizeVal = size->gtIntCon.gtIconVal;
-
- if (sizeVal == 0)
- {
- info->internalIntCount = 0;
- }
- else
- {
- // Compute the amount of memory to properly STACK_ALIGN.
- // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size.
- // This should also help in debugging as we can examine the original size specified with
- // localloc.
- sizeVal = AlignUp(sizeVal, STACK_ALIGN);
- size_t cntStackAlignedWidthItems = (sizeVal >> STACK_ALIGN_SHIFT);
-
- // For small allocations upto 4 'stp' instructions (i.e. 64 bytes of localloc)
- //
- if (cntStackAlignedWidthItems <= 4)
- {
- info->internalIntCount = 0;
- }
- else if (!compiler->info.compInitMem)
- {
- // No need to initialize allocated stack space.
- if (sizeVal < compiler->eeGetPageSize())
- {
- info->internalIntCount = 0;
- }
- else
- {
- // We need two registers: regCnt and RegTmp
- info->internalIntCount = 2;
- }
- }
- else
- {
- // greater than 4 and need to zero initialize allocated stack space.
- // If the method has PSPSym, we need an internal register to hold regCnt
- // since targetReg allocated to GT_LCLHEAP node could be the same as one of
- // the the internal registers.
- info->internalIntCount = hasPspSym ? 1 : 0;
- }
- }
- }
- else
- {
- if (!compiler->info.compInitMem)
- {
- info->internalIntCount = 2;
- }
- else
- {
- // If the method has PSPSym, we need an internal register to hold regCnt
- // since targetReg allocated to GT_LCLHEAP node could be the same as one of
- // the the internal registers.
- info->internalIntCount = hasPspSym ? 1 : 0;
- }
- }
-
- // If the method has PSPSym, we would need an addtional register to relocate it on stack.
- if (hasPspSym)
- {
- // Exclude const size 0
- if (!size->IsCnsIntOrI() || (size->gtIntCon.gtIconVal > 0))
- info->internalIntCount++;
- }
- }
- break;
-
- case GT_ARR_BOUNDS_CHECK:
-#ifdef FEATURE_SIMD
- case GT_SIMD_CHK:
-#endif // FEATURE_SIMD
- {
- GenTreeBoundsChk* node = tree->AsBoundsChk();
- // Consumes arrLen & index - has no result
- info->srcCount = 2;
- info->dstCount = 0;
-
- GenTree* intCns = nullptr;
- GenTree* other = nullptr;
- if (CheckImmedAndMakeContained(tree, node->gtIndex))
- {
- intCns = node->gtIndex;
- other = node->gtArrLen;
- }
- else if (CheckImmedAndMakeContained(tree, node->gtArrLen))
- {
- intCns = node->gtArrLen;
- other = node->gtIndex;
- }
- else
- {
- other = node->gtIndex;
- }
- }
- break;
-
- case GT_ARR_ELEM:
- // These must have been lowered to GT_ARR_INDEX
- noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_ARR_INDEX:
- info->srcCount = 2;
- info->dstCount = 1;
-
- // We need one internal register when generating code for GT_ARR_INDEX, however the
- // register allocator always may just give us the same one as it gives us for the 'dst'
- // as a workaround we will just ask for two internal registers.
- //
- info->internalIntCount = 2;
-
- // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
- // times while the result is being computed.
- tree->AsArrIndex()->ArrObj()->gtLsraInfo.isDelayFree = true;
- info->hasDelayFreeSrc = true;
- break;
-
- case GT_ARR_OFFSET:
- // This consumes the offset, if any, the arrObj and the effective index,
- // and produces the flattened offset for this dimension.
- info->srcCount = 3;
- info->dstCount = 1;
- info->internalIntCount = 1;
-
- // we don't want to generate code for this
- if (tree->gtArrOffs.gtOffset->IsIntegralConst(0))
- {
- MakeSrcContained(tree, tree->gtArrOffs.gtOffset);
- }
- break;
-
- case GT_LEA:
- {
- GenTreeAddrMode* lea = tree->AsAddrMode();
-
- GenTree* base = lea->Base();
- GenTree* index = lea->Index();
- unsigned cns = lea->gtOffset;
-
- // This LEA is instantiating an address,
- // so we set up the srcCount and dstCount here.
- info->srcCount = 0;
- if (base != nullptr)
- {
- info->srcCount++;
- }
- if (index != nullptr)
- {
- info->srcCount++;
- }
- info->dstCount = 1;
-
- // On ARM64 we may need a single internal register
- // (when both conditions are true then we still only need a single internal register)
- if ((index != nullptr) && (cns != 0))
- {
- // ARM64 does not support both Index and offset so we need an internal register
- info->internalIntCount = 1;
- }
- else if (!emitter::emitIns_valid_imm_for_add(cns, EA_8BYTE))
- {
- // This offset can't be contained in the add instruction, so we need an internal register
- info->internalIntCount = 1;
- }
- }
- break;
-
- case GT_STOREIND:
- {
- info->srcCount = 2;
- info->dstCount = 0;
- GenTree* src = tree->gtOp.gtOp2;
-
- if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
- {
- LowerGCWriteBarrier(tree);
- break;
- }
- if (!varTypeIsFloating(src->TypeGet()) && src->IsIntegralConst(0))
- {
- // an integer zero for 'src' can be contained.
- MakeSrcContained(tree, src);
- }
-
- SetIndirAddrOpCounts(tree);
- }
- break;
-
- case GT_NULLCHECK:
- info->dstCount = 0;
- info->srcCount = 1;
- info->isLocalDefUse = true;
- // null check is an indirection on an addr
- SetIndirAddrOpCounts(tree);
- break;
-
- case GT_IND:
- info->dstCount = 1;
- info->srcCount = 1;
- SetIndirAddrOpCounts(tree);
- break;
-
- case GT_CATCH_ARG:
- info->srcCount = 0;
- info->dstCount = 1;
- info->setDstCandidates(l, RBM_EXCEPTION_OBJECT);
- break;
-
- case GT_CLS_VAR:
- info->srcCount = 0;
- // GT_CLS_VAR, by the time we reach the backend, must always
- // be a pure use.
- // It will produce a result of the type of the
- // node, and use an internal register for the address.
-
- info->dstCount = 1;
- assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0);
- info->internalIntCount = 1;
- break;
- } // end switch (tree->OperGet())
-
- // We need to be sure that we've set info->srcCount and info->dstCount appropriately
- assert((info->dstCount < 2) || tree->IsMultiRegCall());
-}
-//------------------------------------------------------------------------
-// TreeNodeInfoInitReturn: Set the NodeInfo for a GT_RETURN.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
-
- GenTree* op1 = tree->gtGetOp1();
- regMaskTP useCandidates = RBM_NONE;
-
- info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
- info->dstCount = 0;
-
- if (varTypeIsStruct(tree))
- {
- // op1 has to be either an lclvar or a multi-reg returning call
- if ((op1->OperGet() == GT_LCL_VAR) || (op1->OperGet() == GT_LCL_FLD))
- {
- GenTreeLclVarCommon* lclVarCommon = op1->AsLclVarCommon();
- LclVarDsc* varDsc = &(compiler->lvaTable[lclVarCommon->gtLclNum]);
- assert(varDsc->lvIsMultiRegRet);
-
- // Mark var as contained if not enregistrable.
- if (!varTypeIsEnregisterableStruct(op1))
- {
- MakeSrcContained(tree, op1);
- }
- }
- else
- {
- noway_assert(op1->IsMultiRegCall());
-
- ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc();
- info->srcCount = retTypeDesc->GetReturnRegCount();
- useCandidates = retTypeDesc->GetABIReturnRegs();
- }
- }
- else
- {
- // Non-struct type return - determine useCandidates
- switch (tree->TypeGet())
- {
- case TYP_VOID:
- useCandidates = RBM_NONE;
- break;
- case TYP_FLOAT:
- useCandidates = RBM_FLOATRET;
- break;
- case TYP_DOUBLE:
- useCandidates = RBM_DOUBLERET;
- break;
- case TYP_LONG:
- useCandidates = RBM_LNGRET;
- break;
- default:
- useCandidates = RBM_INTRET;
- break;
- }
- }
-
- if (useCandidates != RBM_NONE)
- {
- tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, useCandidates);
- }
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitCall: Set the NodeInfo for a call.
-//
-// Arguments:
-// call - The call node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
-{
- TreeNodeInfo* info = &(call->gtLsraInfo);
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
- bool hasMultiRegRetVal = false;
- ReturnTypeDesc* retTypeDesc = nullptr;
-
- info->srcCount = 0;
- if (call->TypeGet() != TYP_VOID)
- {
- hasMultiRegRetVal = call->HasMultiRegRetVal();
- if (hasMultiRegRetVal)
- {
- // dst count = number of registers in which the value is returned by call
- retTypeDesc = call->GetReturnTypeDesc();
- info->dstCount = retTypeDesc->GetReturnRegCount();
- }
- else
- {
- info->dstCount = 1;
- }
- }
- else
- {
- info->dstCount = 0;
- }
-
- GenTree* ctrlExpr = call->gtControlExpr;
- if (call->gtCallType == CT_INDIRECT)
- {
- // either gtControlExpr != null or gtCallAddr != null.
- // Both cannot be non-null at the same time.
- assert(ctrlExpr == nullptr);
- assert(call->gtCallAddr != nullptr);
- ctrlExpr = call->gtCallAddr;
- }
-
- // set reg requirements on call target represented as control sequence.
- if (ctrlExpr != nullptr)
- {
- // we should never see a gtControlExpr whose type is void.
- assert(ctrlExpr->TypeGet() != TYP_VOID);
-
- info->srcCount++;
-
- // In case of fast tail implemented as jmp, make sure that gtControlExpr is
- // computed into a register.
- if (call->IsFastTailCall())
- {
- // Fast tail call - make sure that call target is always computed in IP0
- // so that epilog sequence can generate "br xip0" to achieve fast tail call.
- ctrlExpr->gtLsraInfo.setSrcCandidates(l, genRegMask(REG_IP0));
- }
- }
-
- RegisterType registerType = call->TypeGet();
-
- // Set destination candidates for return value of the call.
- if (hasMultiRegRetVal)
- {
- assert(retTypeDesc != nullptr);
- info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs());
- }
- else if (varTypeIsFloating(registerType))
- {
- info->setDstCandidates(l, RBM_FLOATRET);
- }
- else if (registerType == TYP_LONG)
- {
- info->setDstCandidates(l, RBM_LNGRET);
- }
- else
- {
- info->setDstCandidates(l, RBM_INTRET);
- }
-
- // If there is an explicit this pointer, we don't want that node to produce anything
- // as it is redundant
- if (call->gtCallObjp != nullptr)
- {
- GenTreePtr thisPtrNode = call->gtCallObjp;
-
- if (thisPtrNode->gtOper == GT_PUTARG_REG)
- {
- l->clearOperandCounts(thisPtrNode);
- l->clearDstCount(thisPtrNode->gtOp.gtOp1);
- }
- else
- {
- l->clearDstCount(thisPtrNode);
- }
- }
-
- // First, count reg args
- bool callHasFloatRegArgs = false;
-
- for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
- {
- assert(list->OperIsList());
-
- GenTreePtr argNode = list->Current();
-
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
- assert(curArgTabEntry);
-
- if (curArgTabEntry->regNum == REG_STK)
- {
- // late arg that is not passed in a register
- assert(argNode->gtOper == GT_PUTARG_STK);
-
- TreeNodeInfoInitPutArgStk(argNode->AsPutArgStk(), curArgTabEntry);
- continue;
- }
-
- var_types argType = argNode->TypeGet();
- bool argIsFloat = varTypeIsFloating(argType);
- callHasFloatRegArgs |= argIsFloat;
-
- regNumber argReg = curArgTabEntry->regNum;
- // We will setup argMask to the set of all registers that compose this argument
- regMaskTP argMask = 0;
-
- argNode = argNode->gtEffectiveVal();
-
- // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct
- if (varTypeIsStruct(argNode) || (argNode->gtOper == GT_FIELD_LIST))
- {
- GenTreePtr actualArgNode = argNode;
- unsigned originalSize = 0;
-
- if (argNode->gtOper == GT_FIELD_LIST)
- {
- // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs)
- GenTreeFieldList* fieldListPtr = argNode->AsFieldList();
-
- // Initailize the first register and the first regmask in our list
- regNumber targetReg = argReg;
- regMaskTP targetMask = genRegMask(targetReg);
- unsigned iterationNum = 0;
- originalSize = 0;
-
- for (; fieldListPtr; fieldListPtr = fieldListPtr->Rest())
- {
- GenTreePtr putArgRegNode = fieldListPtr->Current();
- assert(putArgRegNode->gtOper == GT_PUTARG_REG);
- GenTreePtr putArgChild = putArgRegNode->gtOp.gtOp1;
-
- originalSize += REGSIZE_BYTES; // 8 bytes
-
- // Record the register requirements for the GT_PUTARG_REG node
- putArgRegNode->gtLsraInfo.setDstCandidates(l, targetMask);
- putArgRegNode->gtLsraInfo.setSrcCandidates(l, targetMask);
-
- // To avoid redundant moves, request that the argument child tree be
- // computed in the register in which the argument is passed to the call.
- putArgChild->gtLsraInfo.setSrcCandidates(l, targetMask);
-
- // We consume one source for each item in this list
- info->srcCount++;
- iterationNum++;
-
- // Update targetReg and targetMask for the next putarg_reg (if any)
- targetReg = genRegArgNext(targetReg);
- targetMask = genRegMask(targetReg);
- }
- }
- else
- {
-#ifdef DEBUG
- compiler->gtDispTreeRange(BlockRange(), argNode);
-#endif
- noway_assert(!"Unsupported TYP_STRUCT arg kind");
- }
-
- unsigned slots = ((unsigned)(roundUp(originalSize, REGSIZE_BYTES))) / REGSIZE_BYTES;
- regNumber curReg = argReg;
- regNumber lastReg = argIsFloat ? REG_ARG_FP_LAST : REG_ARG_LAST;
- unsigned remainingSlots = slots;
-
- while (remainingSlots > 0)
- {
- argMask |= genRegMask(curReg);
- remainingSlots--;
-
- if (curReg == lastReg)
- break;
-
- curReg = genRegArgNext(curReg);
- }
-
- // Struct typed arguments must be fully passed in registers (Reg/Stk split not allowed)
- noway_assert(remainingSlots == 0);
- argNode->gtLsraInfo.internalIntCount = 0;
- }
- else // A scalar argument (not a struct)
- {
- // We consume one source
- info->srcCount++;
-
- argMask |= genRegMask(argReg);
- argNode->gtLsraInfo.setDstCandidates(l, argMask);
- argNode->gtLsraInfo.setSrcCandidates(l, argMask);
-
- if (argNode->gtOper == GT_PUTARG_REG)
- {
- GenTreePtr putArgChild = argNode->gtOp.gtOp1;
-
- // To avoid redundant moves, request that the argument child tree be
- // computed in the register in which the argument is passed to the call.
- putArgChild->gtLsraInfo.setSrcCandidates(l, argMask);
- }
- }
- }
-
- // Now, count stack args
- // Note that these need to be computed into a register, but then
- // they're just stored to the stack - so the reg doesn't
- // need to remain live until the call. In fact, it must not
- // because the code generator doesn't actually consider it live,
- // so it can't be spilled.
-
- GenTreePtr args = call->gtCallArgs;
- while (args)
- {
- GenTreePtr arg = args->gtOp.gtOp1;
-
- // Skip arguments that have been moved to the Late Arg list
- if (!(args->gtFlags & GTF_LATE_ARG))
- {
- if (arg->gtOper == GT_PUTARG_STK)
- {
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
- assert(curArgTabEntry);
-
- assert(curArgTabEntry->regNum == REG_STK);
-
- TreeNodeInfoInitPutArgStk(arg->AsPutArgStk(), curArgTabEntry);
- }
- else
- {
- TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
- if (argInfo->dstCount != 0)
- {
- argInfo->isLocalDefUse = true;
- }
-
- argInfo->dstCount = 0;
- }
- }
- args = args->gtOp.gtOp2;
- }
-
- // If it is a fast tail call, it is already preferenced to use IP0.
- // Therefore, no need set src candidates on call tgt again.
- if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr))
- {
- // Don't assign the call target to any of the argument registers because
- // we will use them to also pass floating point arguments as required
- // by Arm64 ABI.
- ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS));
- }
-}
-
//------------------------------------------------------------------------
-// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node
-//
-// Arguments:
-// argNode - a GT_PUTARG_STK node
-//
-// Return Value:
-// None.
-//
-// Notes:
-// Set the child node(s) to be contained when we have a multireg arg
-//
-void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info)
-{
- assert(argNode->gtOper == GT_PUTARG_STK);
-
- GenTreePtr putArgChild = argNode->gtOp.gtOp1;
-
- // Initialize 'argNode' as not contained, as this is both the default case
- // and how MakeSrcContained expects to find things setup.
- //
- argNode->gtLsraInfo.srcCount = 1;
- argNode->gtLsraInfo.dstCount = 0;
-
- // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct
- if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_FIELD_LIST))
- {
- // We will use store instructions that each write a register sized value
-
- if (putArgChild->OperGet() == GT_FIELD_LIST)
- {
- // We consume all of the items in the GT_FIELD_LIST
- argNode->gtLsraInfo.srcCount = info->numSlots;
- }
- else
- {
- // We could use a ldp/stp sequence so we need two internal registers
- argNode->gtLsraInfo.internalIntCount = 2;
-
- if (putArgChild->OperGet() == GT_OBJ)
- {
- GenTreePtr objChild = putArgChild->gtOp.gtOp1;
- if (objChild->OperGet() == GT_LCL_VAR_ADDR)
- {
- // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR
- // as one contained operation
- //
- MakeSrcContained(putArgChild, objChild);
- }
- }
-
- // We will generate all of the code for the GT_PUTARG_STK and it's child node
- // as one contained operation
- //
- MakeSrcContained(argNode, putArgChild);
- }
- }
- else
- {
- // We must not have a multi-reg struct
- assert(info->numSlots == 1);
- }
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store.
+// LowerBlockStore: Set block store type
//
// Arguments:
// blkNode - The block store node of interest
@@ -1226,22 +100,17 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntr
// Return Value:
// None.
//
-// Notes:
-void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
+void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
{
- GenTree* dstAddr = blkNode->Addr();
- unsigned size = blkNode->gtBlkSize;
- GenTree* source = blkNode->Data();
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
+ GenTree* dstAddr = blkNode->Addr();
+ unsigned size = blkNode->gtBlkSize;
+ GenTree* source = blkNode->Data();
+ Compiler* compiler = comp;
// Sources are dest address and initVal or source.
- // We may require an additional source or temp register for the size.
- blkNode->gtLsraInfo.srcCount = 2;
- blkNode->gtLsraInfo.dstCount = 0;
- GenTreePtr srcAddrOrFill = nullptr;
- bool isInitBlk = blkNode->OperIsInitBlkOp();
+ GenTreePtr srcAddrOrFill = nullptr;
+ bool isInitBlk = blkNode->OperIsInitBlkOp();
if (!isInitBlk)
{
@@ -1253,20 +122,6 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
if (source->gtOper == GT_IND)
{
srcAddrOrFill = blkNode->Data()->gtGetOp1();
- // We're effectively setting source as contained, but can't call MakeSrcContained, because the
- // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading.
- // If srcAddr is already non-contained, we don't need to change it.
- if (srcAddrOrFill->gtLsraInfo.getDstCount() == 0)
- {
- srcAddrOrFill->gtLsraInfo.setDstCount(1);
- srcAddrOrFill->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount);
- }
- m_lsra->clearOperandCounts(source);
- }
- else if (!source->IsMultiRegCall() && !source->OperIsSIMD())
- {
- assert(source->IsLocal());
- MakeSrcContained(blkNode, source);
}
}
@@ -1303,41 +158,12 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
initVal->gtIntCon.gtIconVal = 0x0101010101010101LL * fill;
initVal->gtType = TYP_LONG;
}
-
- // In case we have a buffer >= 16 bytes
- // we can use SSE2 to do a 128-bit store in a single
- // instruction.
- if (size >= XMM_REGSIZE_BYTES)
- {
- // Reserve an XMM register to fill it with
- // a pack of 16 init value constants.
- blkNode->gtLsraInfo.internalFloatCount = 1;
- blkNode->gtLsraInfo.setInternalCandidates(l, l->internalFloatRegCandidates());
- }
initBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindUnroll;
- }
}
else
#endif // 0
{
- // The helper follows the regular ABI.
- dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0);
- initVal->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1);
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
- if (size != 0)
- {
- // Reserve a temp register for the block size argument.
- blkNode->gtLsraInfo.setInternalCandidates(l, RBM_ARG_2);
- blkNode->gtLsraInfo.internalIntCount = 1;
- }
- else
- {
- // The block size argument is a third argument to GT_STORE_DYN_BLK
- noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
- blkNode->gtLsraInfo.setSrcCount(3);
- GenTree* sizeNode = blkNode->AsDynBlk()->gtDynamicSize;
- sizeNode->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2);
- }
}
}
else
@@ -1373,18 +199,7 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
assert(objNode->HasGCPtr());
#endif
- // We don't need to materialize the struct size but we still need
- // a temporary register to perform the sequence of loads and stores.
- blkNode->gtLsraInfo.internalIntCount = 1;
-
- dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_DST_BYREF);
- // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF.
- // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF,
- // which is killed by a StoreObj (and thus needn't be reserved).
- if (srcAddrOrFill != nullptr)
- {
- srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_SRC_BYREF);
- }
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
}
else
{
@@ -1395,41 +210,12 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
#if 0
// In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size
// we should unroll the loop to improve CQ.
+ // For reference see the code in lowerxarch.cpp.
// TODO-ARM64-CQ: cpblk loop unrolling is currently not implemented.
if ((size != 0) && (size <= INITBLK_UNROLL_LIMIT))
{
- // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2.
- // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of
- // our framework assemblies, so this is the main code generation scheme we'll use.
- if ((size & (XMM_REGSIZE_BYTES - 1)) != 0)
- {
- info->internalIntCount++;
- info->addInternalCandidates(l, l->allRegs(TYP_INT));
- }
-
- if (size >= XMM_REGSIZE_BYTES)
- {
- // If we have a buffer larger than XMM_REGSIZE_BYTES,
- // reserve an XMM register to use it for a
- // series of 16-byte loads and stores.
- blkNode->gtLsraInfo.internalFloatCount = 1;
- blkNode->gtLsraInfo.addInternalCandidates(l, l->internalFloatRegCandidates());
- }
-
- // If src or dst are on stack, we don't have to generate the address into a register
- // because it's just some constant+SP
- if (srcAddr != nullptr && srcAddrOrFill->OperIsLocalAddr())
- {
- MakeSrcContained(blkNode, srcAddrOrFill);
- }
-
- if (dstAddr->OperIsLocalAddr())
- {
- MakeSrcContained(blkNode, dstAddr);
- }
-
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
}
else
@@ -1438,444 +224,10 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
// In case we have a constant integer this means we went beyond
// CPBLK_UNROLL_LIMIT bytes of size, still we should never have the case of
// any GC-Pointers in the src struct.
-
- dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0);
- // The srcAddr goes in arg1.
- if (srcAddrOrFill != nullptr)
- {
- srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1);
- }
- if (size != 0)
- {
- // Reserve a temp register for the block size argument.
- internalIntCandidates |= RBM_ARG_2;
- internalIntCount++;
- }
- else
- {
- // The block size argument is a third argument to GT_STORE_DYN_BLK
- noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
- blkNode->gtLsraInfo.setSrcCount(3);
- GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
- blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2);
- }
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
}
- if (internalIntCount != 0)
- {
- blkNode->gtLsraInfo.internalIntCount = internalIntCount;
- blkNode->gtLsraInfo.setInternalCandidates(l, internalIntCandidates);
- }
- }
- }
-}
-
-#ifdef FEATURE_SIMD
-//------------------------------------------------------------------------
-// TreeNodeInfoInitSIMD: Set the NodeInfo for a GT_SIMD tree.
-//
-// Arguments:
-// tree - The GT_SIMD node of interest
-//
-// Return Value:
-// None.
-
-void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
-{
- NYI("TreeNodeInfoInitSIMD");
- GenTreeSIMD* simdTree = tree->AsSIMD();
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* lsra = m_lsra;
- info->dstCount = 1;
- switch (simdTree->gtSIMDIntrinsicID)
- {
- case SIMDIntrinsicInit:
- {
- // This sets all fields of a SIMD struct to the given value.
- // Mark op1 as contained if it is either zero or int constant of all 1's.
- info->srcCount = 1;
- GenTree* op1 = tree->gtOp.gtOp1;
- if (op1->IsIntegralConst(0) || (simdTree->gtSIMDBaseType == TYP_INT && op1->IsCnsIntOrI() &&
- op1->AsIntConCommon()->IconValue() == 0xffffffff) ||
- (simdTree->gtSIMDBaseType == TYP_LONG && op1->IsCnsIntOrI() &&
- op1->AsIntConCommon()->IconValue() == 0xffffffffffffffffLL))
- {
- MakeSrcContained(tree, tree->gtOp.gtOp1);
- info->srcCount = 0;
- }
- }
- break;
-
- case SIMDIntrinsicInitN:
- info->srcCount = (int)(simdTree->gtSIMDSize / genTypeSize(simdTree->gtSIMDBaseType));
- // Need an internal register to stitch together all the values into a single vector in an XMM reg.
- info->internalFloatCount = 1;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
- break;
-
- case SIMDIntrinsicInitArray:
- // We have an array and an index, which may be contained.
- info->srcCount = 2;
- CheckImmedAndMakeContained(tree, tree->gtGetOp2());
- break;
-
- case SIMDIntrinsicDiv:
- // SSE2 has no instruction support for division on integer vectors
- noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
- info->srcCount = 2;
- break;
-
- case SIMDIntrinsicAbs:
- // This gets implemented as bitwise-And operation with a mask
- // and hence should never see it here.
- unreached();
- break;
-
- case SIMDIntrinsicSqrt:
- // SSE2 has no instruction support for sqrt on integer vectors.
- noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
- info->srcCount = 1;
- break;
-
- case SIMDIntrinsicAdd:
- case SIMDIntrinsicSub:
- case SIMDIntrinsicMul:
- case SIMDIntrinsicBitwiseAnd:
- case SIMDIntrinsicBitwiseAndNot:
- case SIMDIntrinsicBitwiseOr:
- case SIMDIntrinsicBitwiseXor:
- case SIMDIntrinsicMin:
- case SIMDIntrinsicMax:
- info->srcCount = 2;
-
- // SSE2 32-bit integer multiplication requires two temp regs
- if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT)
- {
- info->internalFloatCount = 2;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
- }
- break;
-
- case SIMDIntrinsicEqual:
- info->srcCount = 2;
- break;
-
- // SSE2 doesn't support < and <= directly on int vectors.
- // Instead we need to use > and >= with swapped operands.
- case SIMDIntrinsicLessThan:
- case SIMDIntrinsicLessThanOrEqual:
- info->srcCount = 2;
- noway_assert(!varTypeIsIntegral(simdTree->gtSIMDBaseType));
- break;
-
- // SIMDIntrinsicEqual is supported only on non-floating point base type vectors.
- // SSE2 cmpps/pd doesn't support > and >= directly on float/double vectors.
- // Instead we need to use < and <= with swapped operands.
- case SIMDIntrinsicGreaterThan:
- noway_assert(!varTypeIsFloating(simdTree->gtSIMDBaseType));
- info->srcCount = 2;
- break;
-
- case SIMDIntrinsicGreaterThanOrEqual:
- noway_assert(!varTypeIsFloating(simdTree->gtSIMDBaseType));
- info->srcCount = 2;
-
- // a >= b = (a==b) | (a>b)
- // To hold intermediate result of a==b and a>b we need two distinct
- // registers. We can use targetReg and one internal reg provided
- // they are distinct which is not guaranteed. Therefore, we request
- // two internal registers so that one of the internal registers has
- // to be different from targetReg.
- info->internalFloatCount = 2;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
- break;
-
- case SIMDIntrinsicOpEquality:
- case SIMDIntrinsicOpInEquality:
- // Need two SIMD registers as scratch.
- // See genSIMDIntrinsicRelOp() for details on code sequence generate and
- // the need for two scratch registers.
- info->srcCount = 2;
- info->internalFloatCount = 2;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
- break;
-
- case SIMDIntrinsicDotProduct:
- // Also need an internal register as scratch. Further we need that targetReg and internal reg
- // are two distinct regs. It is achieved by requesting two internal registers and one of them
- // has to be different from targetReg.
- //
- // See genSIMDIntrinsicDotProduct() for details on code sequence generated and
- // the need for scratch registers.
- info->srcCount = 2;
- info->internalFloatCount = 2;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
- break;
-
- case SIMDIntrinsicGetItem:
- // This implements get_Item method. The sources are:
- // - the source SIMD struct
- // - index (which element to get)
- // The result is baseType of SIMD struct.
- info->srcCount = 2;
-
- op2 = tree->gtGetOp2()
- // If the index is a constant, mark it as contained.
- if (CheckImmedAndMakeContained(tree, op2))
- {
- info->srcCount = 1;
- }
-
- // If the index is not a constant, we will use the SIMD temp location to store the vector.
- // Otherwise, if the baseType is floating point, the targetReg will be a xmm reg and we
- // can use that in the process of extracting the element.
- // In all other cases with constant index, we need a temp xmm register to extract the
- // element if index is other than zero.
- if (!op2->IsCnsIntOrI())
- {
- (void)comp->getSIMDInitTempVarNum();
- }
- else if (!varTypeIsFloating(simdTree->gtSIMDBaseType) && !op2->IsIntegralConst(0))
- {
- info->internalFloatCount = 1;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
- }
- break;
-
- case SIMDIntrinsicCast:
- info->srcCount = 1;
- break;
-
- // These should have been transformed in terms of other intrinsics
- case SIMDIntrinsicOpEquality:
- case SIMDIntrinsicOpInEquality:
- assert("OpEquality/OpInEquality intrinsics should not be seen during Lowering.");
- unreached();
-
- case SIMDIntrinsicGetX:
- case SIMDIntrinsicGetY:
- case SIMDIntrinsicGetZ:
- case SIMDIntrinsicGetW:
- case SIMDIntrinsicGetOne:
- case SIMDIntrinsicGetZero:
- case SIMDIntrinsicGetLength:
- case SIMDIntrinsicGetAllOnes:
- assert(!"Get intrinsics should not be seen during Lowering.");
- unreached();
-
- default:
- noway_assert(!"Unimplemented SIMD node type.");
- unreached();
- }
-}
-#endif // FEATURE_SIMD
-
-void Lowering::LowerGCWriteBarrier(GenTree* tree)
-{
- GenTreePtr dst = tree;
- GenTreePtr addr = tree->gtOp.gtOp1;
- GenTreePtr src = tree->gtOp.gtOp2;
-
- if (addr->OperGet() == GT_LEA)
- {
- // In the case where we are doing a helper assignment, if the dst
- // is an indir through an lea, we need to actually instantiate the
- // lea in a register
- GenTreeAddrMode* lea = addr->AsAddrMode();
-
- short leaSrcCount = 0;
- if (lea->Base() != nullptr)
- {
- leaSrcCount++;
- }
- if (lea->Index() != nullptr)
- {
- leaSrcCount++;
- }
- lea->gtLsraInfo.srcCount = leaSrcCount;
- lea->gtLsraInfo.dstCount = 1;
- }
-
-#if NOGC_WRITE_BARRIERS
- // For the NOGC JIT Helper calls
- //
- // the 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF)
- // the 'src' goes into x15 (REG_WRITE_BARRIER)
- //
- addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER_DST_BYREF);
- src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER);
-#else
- // For the standard JIT Helper calls
- // op1 goes into REG_ARG_0 and
- // op2 goes into REG_ARG_1
- //
- addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_0);
- src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_1);
-#endif // NOGC_WRITE_BARRIERS
-
- // Both src and dst must reside in a register, which they should since we haven't set
- // either of them as contained.
- assert(addr->gtLsraInfo.dstCount == 1);
- assert(src->gtLsraInfo.dstCount == 1);
-}
-
-//-----------------------------------------------------------------------------------------
-// Specify register requirements for address expression of an indirection operation.
-//
-// Arguments:
-// indirTree - GT_IND, GT_STOREIND, block node or GT_NULLCHECK gentree node
-//
-void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree)
-{
- assert(indirTree->OperIsIndir());
- // If this is the rhs of a block copy (i.e. non-enregisterable struct),
- // it has no register requirements.
- if (indirTree->TypeGet() == TYP_STRUCT)
- {
- return;
- }
-
- GenTreePtr addr = indirTree->gtGetOp1();
- TreeNodeInfo* info = &(indirTree->gtLsraInfo);
-
- GenTreePtr base = nullptr;
- GenTreePtr index = nullptr;
- unsigned cns = 0;
- unsigned mul;
- bool rev;
- bool modifiedSources = false;
-
- if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr))
- {
- GenTreeAddrMode* lea = addr->AsAddrMode();
- base = lea->Base();
- index = lea->Index();
- cns = lea->gtOffset;
-
- m_lsra->clearOperandCounts(addr);
- // The srcCount is decremented because addr is now "contained",
- // then we account for the base and index below, if they are non-null.
- info->srcCount--;
- }
- else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) &&
- !(modifiedSources = AreSourcesPossiblyModifiedLocals(indirTree, base, index)))
- {
- // An addressing mode will be constructed that may cause some
- // nodes to not need a register, and cause others' lifetimes to be extended
- // to the GT_IND or even its parent if it's an assignment
-
- assert(base != addr);
- m_lsra->clearOperandCounts(addr);
-
- GenTreePtr arrLength = nullptr;
-
- // Traverse the computation below GT_IND to find the operands
- // for the addressing mode, marking the various constants and
- // intermediate results as not consuming/producing.
- // If the traversal were more complex, we might consider using
- // a traversal function, but the addressing mode is only made
- // up of simple arithmetic operators, and the code generator
- // only traverses one leg of each node.
-
- bool foundBase = (base == nullptr);
- bool foundIndex = (index == nullptr);
- GenTreePtr nextChild = nullptr;
- for (GenTreePtr child = addr; child != nullptr && !child->OperIsLeaf(); child = nextChild)
- {
- nextChild = nullptr;
- GenTreePtr op1 = child->gtOp.gtOp1;
- GenTreePtr op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr;
-
- if (op1 == base)
- {
- foundBase = true;
- }
- else if (op1 == index)
- {
- foundIndex = true;
- }
- else
- {
- m_lsra->clearOperandCounts(op1);
- if (!op1->OperIsLeaf())
- {
- nextChild = op1;
- }
- }
-
- if (op2 != nullptr)
- {
- if (op2 == base)
- {
- foundBase = true;
- }
- else if (op2 == index)
- {
- foundIndex = true;
- }
- else
- {
- m_lsra->clearOperandCounts(op2);
- if (!op2->OperIsLeaf())
- {
- assert(nextChild == nullptr);
- nextChild = op2;
- }
- }
- }
}
- assert(foundBase && foundIndex);
- info->srcCount--; // it gets incremented below.
- }
- else if (addr->gtOper == GT_ARR_ELEM)
- {
- // The GT_ARR_ELEM consumes all the indices and produces the offset.
- // The array object lives until the mem access.
- // We also consume the target register to which the address is
- // computed
-
- info->srcCount++;
- assert(addr->gtLsraInfo.srcCount >= 2);
- addr->gtLsraInfo.srcCount -= 1;
}
- else
- {
- // it is nothing but a plain indir
- info->srcCount--; // base gets added in below
- base = addr;
- }
-
- if (base != nullptr)
- {
- info->srcCount++;
- }
-
- if (index != nullptr && !modifiedSources)
- {
- info->srcCount++;
- }
-
- // On ARM64 we may need a single internal register
- // (when both conditions are true then we still only need a single internal register)
- if ((index != nullptr) && (cns != 0))
- {
- // ARM64 does not support both Index and offset so we need an internal register
- info->internalIntCount = 1;
- }
- else if (!emitter::emitIns_valid_imm_for_ldst_offset(cns, emitTypeSize(indirTree)))
- {
- // This offset can't be contained in the ldr/str instruction, so we need an internal register
- info->internalIntCount = 1;
- }
-}
-
-void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
- info->srcCount = 2;
- info->dstCount = 1;
- CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
}
/* Lower GT_CAST(srcType, DstType) nodes.
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index 589cef4..f89a3df 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -42,61 +42,11 @@ void Lowering::LowerRotate(GenTreePtr tree)
//
// Notes:
// This involves:
-// - Setting the appropriate candidates for a store of a multi-reg call return value.
-// - Requesting an internal register for SIMD12 stores.
-// - Handling of contained immediates and widening operations of unsigneds.
+// - Widening operations of unsigneds.
void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
{
- TreeNodeInfo* info = &(storeLoc->gtLsraInfo);
-
- // Is this the case of var = call where call is returning
- // a value in multiple return registers?
GenTree* op1 = storeLoc->gtGetOp1();
- if (op1->IsMultiRegCall())
- {
- // backend expects to see this case only for store lclvar.
- assert(storeLoc->OperGet() == GT_STORE_LCL_VAR);
-
- // srcCount = number of registers in which the value is returned by call
- GenTreeCall* call = op1->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- info->srcCount = retTypeDesc->GetReturnRegCount();
-
- // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1
- regMaskTP srcCandidates = m_lsra->allMultiRegCallNodeRegs(call);
- op1->gtLsraInfo.setSrcCandidates(m_lsra, srcCandidates);
- return;
- }
-
-#ifdef FEATURE_SIMD
- if (varTypeIsSIMD(storeLoc))
- {
- if (op1->IsCnsIntOrI())
- {
- // InitBlk
- MakeSrcContained(storeLoc, op1);
- }
- else if ((storeLoc->TypeGet() == TYP_SIMD12) && (storeLoc->OperGet() == GT_STORE_LCL_FLD))
- {
- // Need an additional register to extract upper 4 bytes of Vector3.
- info->internalFloatCount = 1;
- info->setInternalCandidates(m_lsra, m_lsra->allSIMDRegs());
-
- // In this case don't mark the operand as contained as we want it to
- // be evaluated into an xmm register
- }
- return;
- }
-#endif // FEATURE_SIMD
-
- // If the source is a containable immediate, make it contained, unless it is
- // an int-size or larger store of zero to memory, because we can generate smaller code
- // by zeroing a register and then storing it.
- if (IsContainableImmed(storeLoc, op1) && (!op1->IsIntegralConst(0) || varTypeIsSmall(storeLoc)))
- {
- MakeSrcContained(storeLoc, op1);
- }
// Try to widen the ops if they are going into a local var.
if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (storeLoc->gtOp1->gtOper == GT_CNS_INT))
@@ -148,1490 +98,8 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
}
}
-/**
- * Takes care of annotating the register requirements
- * for every TreeNodeInfo struct that maps to each tree node.
- * Preconditions:
- * LSRA Has been initialized and there is a TreeNodeInfo node
- * already allocated and initialized for every tree in the IR.
- * Postconditions:
- * Every TreeNodeInfo instance has the right annotations on register
- * requirements needed by LSRA to build the Interval Table (source,
- * destination and internal [temp] register counts).
- * This code is refactored originally from LSRA.
- */
-void Lowering::TreeNodeInfoInit(GenTree* tree)
-{
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
-
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
- switch (tree->OperGet())
- {
- GenTree* op1;
- GenTree* op2;
-
- default:
- TreeNodeInfoInitSimple(tree);
- break;
-
- case GT_LCL_FLD:
- case GT_LCL_VAR:
- info->srcCount = 0;
- info->dstCount = 1;
-
-#ifdef FEATURE_SIMD
- // Need an additional register to read upper 4 bytes of Vector3.
- if (tree->TypeGet() == TYP_SIMD12)
- {
- // We need an internal register different from targetReg in which 'tree' produces its result
- // because both targetReg and internal reg will be in use at the same time.
- info->internalFloatCount = 1;
- info->isInternalRegDelayFree = true;
- info->setInternalCandidates(m_lsra, m_lsra->allSIMDRegs());
- }
-#endif
- break;
-
- case GT_STORE_LCL_FLD:
- case GT_STORE_LCL_VAR:
-#ifdef _TARGET_X86_
- if (tree->gtGetOp1()->OperGet() == GT_LONG)
- {
- info->srcCount = 2;
- }
- else
-#endif // _TARGET_X86_
- {
- info->srcCount = 1;
- }
- info->dstCount = 0;
- LowerStoreLoc(tree->AsLclVarCommon());
- break;
-
- case GT_BOX:
- noway_assert(!"box should not exist here");
- // The result of 'op1' is also the final result
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_PHYSREGDST:
- info->srcCount = 1;
- info->dstCount = 0;
- break;
-
- case GT_COMMA:
- {
- GenTreePtr firstOperand;
- GenTreePtr secondOperand;
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- firstOperand = tree->gtOp.gtOp2;
- secondOperand = tree->gtOp.gtOp1;
- }
- else
- {
- firstOperand = tree->gtOp.gtOp1;
- secondOperand = tree->gtOp.gtOp2;
- }
- if (firstOperand->TypeGet() != TYP_VOID)
- {
- firstOperand->gtLsraInfo.isLocalDefUse = true;
- firstOperand->gtLsraInfo.dstCount = 0;
- }
- if (tree->TypeGet() == TYP_VOID && secondOperand->TypeGet() != TYP_VOID)
- {
- secondOperand->gtLsraInfo.isLocalDefUse = true;
- secondOperand->gtLsraInfo.dstCount = 0;
- }
- }
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_LIST:
- case GT_FIELD_LIST:
- case GT_ARGPLACE:
- case GT_NO_OP:
- case GT_START_NONGC:
- case GT_PROF_HOOK:
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_CNS_DBL:
- info->srcCount = 0;
- info->dstCount = 1;
- break;
-
-#if !defined(_TARGET_64BIT_)
-
- case GT_LONG:
- if ((tree->gtLIRFlags & LIR::Flags::IsUnusedValue) != 0)
- {
- // An unused GT_LONG node needs to consume its sources.
- info->srcCount = 2;
- }
- else
- {
- // Passthrough
- info->srcCount = 0;
- }
-
- info->dstCount = 0;
- break;
-
-#endif // !defined(_TARGET_64BIT_)
-
- case GT_QMARK:
- case GT_COLON:
- info->srcCount = 0;
- info->dstCount = 0;
- unreached();
- break;
-
- case GT_RETURN:
- TreeNodeInfoInitReturn(tree);
- break;
-
- case GT_RETFILT:
- if (tree->TypeGet() == TYP_VOID)
- {
- info->srcCount = 0;
- info->dstCount = 0;
- }
- else
- {
- assert(tree->TypeGet() == TYP_INT);
-
- info->srcCount = 1;
- info->dstCount = 0;
-
- info->setSrcCandidates(l, RBM_INTRET);
- tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, RBM_INTRET);
- }
- break;
-
- // A GT_NOP is either a passthrough (if it is void, or if it has
- // a child), but must be considered to produce a dummy value if it
- // has a type but no child
- case GT_NOP:
- info->srcCount = 0;
- if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr)
- {
- info->dstCount = 1;
- }
- else
- {
- info->dstCount = 0;
- }
- break;
-
- case GT_JTRUE:
- {
- info->srcCount = 0;
- info->dstCount = 0;
-
- GenTree* cmp = tree->gtGetOp1();
- l->clearDstCount(cmp);
-
-#ifdef FEATURE_SIMD
- // Say we have the following IR
- // simdCompareResult = GT_SIMD((In)Equality, v1, v2)
- // integerCompareResult = GT_EQ/NE(simdCompareResult, true/false)
- // GT_JTRUE(integerCompareResult)
- //
- // In this case we don't need to generate code for GT_EQ_/NE, since SIMD (In)Equality
- // intrinsic would set or clear Zero flag.
-
- genTreeOps cmpOper = cmp->OperGet();
- if (cmpOper == GT_EQ || cmpOper == GT_NE)
- {
- GenTree* cmpOp1 = cmp->gtGetOp1();
- GenTree* cmpOp2 = cmp->gtGetOp2();
-
- if (cmpOp1->IsSIMDEqualityOrInequality() && (cmpOp2->IsIntegralConst(0) || cmpOp2->IsIntegralConst(1)))
- {
- // clear dstCount on SIMD node to indicate that
- // result doesn't need to be materialized into a register.
- l->clearOperandCounts(cmp);
- l->clearDstCount(cmpOp1);
- l->clearOperandCounts(cmpOp2);
-
- // Codegen of SIMD (in)Equality uses target integer reg
- // only for setting flags. Target reg is not needed on AVX
- // when comparing against Vector Zero. In all other cases
- // we need to reserve an int type internal register, since we
- // have cleared dstCount.
- if (compiler->canUseAVX() && cmpOp1->gtGetOp2()->IsIntegralConstVector(0))
- {
- // We don't need an internal register,since we use vptest
- // for setting flags.
- }
- else
- {
- ++(cmpOp1->gtLsraInfo.internalIntCount);
- regMaskTP internalCandidates = cmpOp1->gtLsraInfo.getInternalCandidates(l);
- internalCandidates |= l->allRegs(TYP_INT);
- cmpOp1->gtLsraInfo.setInternalCandidates(l, internalCandidates);
- }
-
- // We would have to reverse compare oper in the following cases:
- // 1) SIMD Equality: Sets Zero flag on equal otherwise clears it.
- // Therefore, if compare oper is == or != against false(0), we will
- // be checking opposite of what is required.
- //
- // 2) SIMD inEquality: Clears Zero flag on true otherwise sets it.
- // Therefore, if compare oper is == or != against true(1), we will
- // be checking opposite of what is required.
- GenTreeSIMD* simdNode = cmpOp1->AsSIMD();
- if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality)
- {
- if (cmpOp2->IsIntegralConst(0))
- {
- cmp->SetOper(GenTree::ReverseRelop(cmpOper));
- }
- }
- else
- {
- assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality);
- if (cmpOp2->IsIntegralConst(1))
- {
- cmp->SetOper(GenTree::ReverseRelop(cmpOper));
- }
- }
- }
- }
-#endif // FEATURE_SIMD
- }
- break;
-
- case GT_JCC:
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_JMP:
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_SWITCH:
- // This should never occur since switch nodes must not be visible at this
- // point in the JIT.
- info->srcCount = 0;
- info->dstCount = 0; // To avoid getting uninit errors.
- noway_assert(!"Switch must be lowered at this point");
- break;
-
- case GT_JMPTABLE:
- info->srcCount = 0;
- info->dstCount = 1;
- break;
-
- case GT_SWITCH_TABLE:
- info->srcCount = 2;
- info->internalIntCount = 1;
- info->dstCount = 0;
- break;
-
- case GT_ASG:
- case GT_ASG_ADD:
- case GT_ASG_SUB:
- noway_assert(!"We should never hit any assignment operator in lowering");
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
-#if !defined(_TARGET_64BIT_)
- case GT_ADD_LO:
- case GT_ADD_HI:
- case GT_SUB_LO:
- case GT_SUB_HI:
-#endif
- case GT_ADD:
- case GT_SUB:
- // SSE2 arithmetic instructions doesn't support the form "op mem, xmm".
- // Rather they only support "op xmm, mem/xmm" form.
- if (varTypeIsFloating(tree->TypeGet()))
- {
- // overflow operations aren't supported on float/double types.
- assert(!tree->gtOverflow());
-
- op1 = tree->gtGetOp1();
- op2 = tree->gtGetOp2();
-
- // No implicit conversions at this stage as the expectation is that
- // everything is made explicit by adding casts.
- assert(op1->TypeGet() == op2->TypeGet());
-
- info->srcCount = 2;
- info->dstCount = 1;
-
- if (op2->isMemoryOp() || op2->IsCnsNonZeroFltOrDbl())
- {
- MakeSrcContained(tree, op2);
- }
- else if (tree->OperIsCommutative() &&
- (op1->IsCnsNonZeroFltOrDbl() || (op1->isMemoryOp() && IsSafeToContainMem(tree, op1))))
- {
- // Though we have GT_ADD(op1=memOp, op2=non-memOp, we try to reorder the operands
- // as long as it is safe so that the following efficient code sequence is generated:
- // addss/sd targetReg, memOp (if op1Reg == targetReg) OR
- // movaps targetReg, op2Reg; addss/sd targetReg, [memOp]
- //
- // Instead of
- // movss op1Reg, [memOp]; addss/sd targetReg, Op2Reg (if op1Reg == targetReg) OR
- // movss op1Reg, [memOp]; movaps targetReg, op1Reg, addss/sd targetReg, Op2Reg
- MakeSrcContained(tree, op1);
- }
- else
- {
- // If there are no containable operands, we can make an operand reg optional.
- SetRegOptionalForBinOp(tree);
- }
- break;
- }
-
- __fallthrough;
-
- case GT_AND:
- case GT_OR:
- case GT_XOR:
- TreeNodeInfoInitLogicalOp(tree);
- break;
-
- case GT_RETURNTRAP:
- // this just turns into a compare of its child with an int
- // + a conditional call
- info->srcCount = 1;
- info->dstCount = 0;
- if (tree->gtOp.gtOp1->isIndir())
- {
- MakeSrcContained(tree, tree->gtOp.gtOp1);
- }
- info->internalIntCount = 1;
- info->setInternalCandidates(l, l->allRegs(TYP_INT));
- break;
-
- case GT_MOD:
- case GT_DIV:
- case GT_UMOD:
- case GT_UDIV:
- TreeNodeInfoInitModDiv(tree);
- break;
-
- case GT_MUL:
- case GT_MULHI:
-#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
- case GT_MUL_LONG:
-#endif
- SetMulOpCounts(tree);
- break;
-
- case GT_INTRINSIC:
- TreeNodeInfoInitIntrinsic(tree);
- break;
-
-#ifdef FEATURE_SIMD
- case GT_SIMD:
- TreeNodeInfoInitSIMD(tree);
- break;
-#endif // FEATURE_SIMD
-
- case GT_CAST:
- TreeNodeInfoInitCast(tree);
- break;
-
- case GT_NEG:
- info->srcCount = 1;
- info->dstCount = 1;
-
- // TODO-XArch-CQ:
- // SSE instruction set doesn't have an instruction to negate a number.
- // The recommended way is to xor the float/double number with a bitmask.
- // The only way to xor is using xorps or xorpd both of which operate on
- // 128-bit operands. To hold the bit-mask we would need another xmm
- // register or a 16-byte aligned 128-bit data constant. Right now emitter
- // lacks the support for emitting such constants or instruction with mem
- // addressing mode referring to a 128-bit operand. For now we use an
- // internal xmm register to load 32/64-bit bitmask from data section.
- // Note that by trading additional data section memory (128-bit) we can
- // save on the need for an internal register and also a memory-to-reg
- // move.
- //
- // Note: another option to avoid internal register requirement is by
- // lowering as GT_SUB(0, src). This will generate code different from
- // Jit64 and could possibly result in compat issues (?).
- if (varTypeIsFloating(tree))
- {
- info->internalFloatCount = 1;
- info->setInternalCandidates(l, l->internalFloatRegCandidates());
- }
- else
- {
- // Codegen of this tree node sets ZF and SF flags.
- tree->gtFlags |= GTF_ZSF_SET;
- }
- break;
-
- case GT_NOT:
- info->srcCount = 1;
- info->dstCount = 1;
- break;
-
- case GT_LSH:
- case GT_RSH:
- case GT_RSZ:
- case GT_ROL:
- case GT_ROR:
-#ifdef _TARGET_X86_
- case GT_LSH_HI:
- case GT_RSH_LO:
-#endif
- TreeNodeInfoInitShiftRotate(tree);
- break;
-
- case GT_EQ:
- case GT_NE:
- case GT_LT:
- case GT_LE:
- case GT_GE:
- case GT_GT:
- TreeNodeInfoInitCmp(tree);
- break;
-
- case GT_CKFINITE:
- info->srcCount = 1;
- info->dstCount = 1;
- info->internalIntCount = 1;
- break;
-
- case GT_CMPXCHG:
- info->srcCount = 3;
- info->dstCount = 1;
-
- // comparand is preferenced to RAX.
- // Remaining two operands can be in any reg other than RAX.
- tree->gtCmpXchg.gtOpComparand->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
- tree->gtCmpXchg.gtOpLocation->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~RBM_RAX);
- tree->gtCmpXchg.gtOpValue->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~RBM_RAX);
- tree->gtLsraInfo.setDstCandidates(l, RBM_RAX);
- break;
-
- case GT_LOCKADD:
- info->srcCount = 2;
- info->dstCount = 0;
-
- CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
- break;
-
- case GT_CALL:
- TreeNodeInfoInitCall(tree->AsCall());
- break;
-
- case GT_ADDR:
- {
- // For a GT_ADDR, the child node should not be evaluated into a register
- GenTreePtr child = tree->gtOp.gtOp1;
- assert(!l->isCandidateLocalRef(child));
- l->clearDstCount(child);
- info->srcCount = 0;
- info->dstCount = 1;
- }
- break;
-
-#if !defined(FEATURE_PUT_STRUCT_ARG_STK)
- case GT_OBJ:
-#endif
- case GT_BLK:
- case GT_DYN_BLK:
- // These should all be eliminated prior to Lowering.
- assert(!"Non-store block node in Lowering");
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
-#ifdef FEATURE_PUT_STRUCT_ARG_STK
- case GT_PUTARG_STK:
- TreeNodeInfoInitPutArgStk(tree->AsPutArgStk());
- break;
-#endif // FEATURE_PUT_STRUCT_ARG_STK
-
- case GT_STORE_BLK:
- case GT_STORE_OBJ:
- case GT_STORE_DYN_BLK:
- TreeNodeInfoInitBlockStore(tree->AsBlk());
- break;
-
- case GT_INIT_VAL:
- // Always a passthrough of its child's value.
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_LCLHEAP:
- TreeNodeInfoInitLclHeap(tree);
- break;
-
- case GT_ARR_BOUNDS_CHECK:
-#ifdef FEATURE_SIMD
- case GT_SIMD_CHK:
-#endif // FEATURE_SIMD
- {
- GenTreeBoundsChk* node = tree->AsBoundsChk();
- // Consumes arrLen & index - has no result
- info->srcCount = 2;
- info->dstCount = 0;
-
- GenTreePtr other;
- if (CheckImmedAndMakeContained(tree, node->gtIndex))
- {
- other = node->gtArrLen;
- }
- else if (CheckImmedAndMakeContained(tree, node->gtArrLen))
- {
- other = node->gtIndex;
- }
- else if (node->gtIndex->isMemoryOp())
- {
- other = node->gtIndex;
- }
- else
- {
- other = node->gtArrLen;
- }
-
- if (node->gtIndex->TypeGet() == node->gtArrLen->TypeGet())
- {
- if (other->isMemoryOp())
- {
- MakeSrcContained(tree, other);
- }
- else
- {
- // We can mark 'other' as reg optional, since it is not contained.
- SetRegOptional(other);
- }
- }
- }
- break;
-
- case GT_ARR_ELEM:
- // These must have been lowered to GT_ARR_INDEX
- noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_ARR_INDEX:
- info->srcCount = 2;
- info->dstCount = 1;
- // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
- // times while the result is being computed.
- tree->AsArrIndex()->ArrObj()->gtLsraInfo.isDelayFree = true;
- info->hasDelayFreeSrc = true;
- break;
-
- case GT_ARR_OFFSET:
- // This consumes the offset, if any, the arrObj and the effective index,
- // and produces the flattened offset for this dimension.
- info->srcCount = 3;
- info->dstCount = 1;
-
- // we don't want to generate code for this
- if (tree->gtArrOffs.gtOffset->IsIntegralConst(0))
- {
- MakeSrcContained(tree, tree->gtArrOffs.gtOffset);
- }
- else
- {
- // Here we simply need an internal register, which must be different
- // from any of the operand's registers, but may be the same as targetReg.
- info->internalIntCount = 1;
- }
- break;
-
- case GT_LEA:
- // The LEA usually passes its operands through to the GT_IND, in which case we'll
- // clear the info->srcCount and info->dstCount later, but we may be instantiating an address,
- // so we set them here.
- info->srcCount = 0;
- if (tree->AsAddrMode()->HasBase())
- {
- info->srcCount++;
- }
- if (tree->AsAddrMode()->HasIndex())
- {
- info->srcCount++;
- }
- info->dstCount = 1;
- break;
-
- case GT_STOREIND:
- {
- info->srcCount = 2;
- info->dstCount = 0;
- GenTree* src = tree->gtOp.gtOp2;
-
- if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
- {
- LowerGCWriteBarrier(tree);
- break;
- }
-
- // If the source is a containable immediate, make it contained, unless it is
- // an int-size or larger store of zero to memory, because we can generate smaller code
- // by zeroing a register and then storing it.
- if (IsContainableImmed(tree, src) &&
- (!src->IsIntegralConst(0) || varTypeIsSmall(tree) || tree->gtGetOp1()->OperGet() == GT_CLS_VAR_ADDR))
- {
- MakeSrcContained(tree, src);
- }
- else if (!varTypeIsFloating(tree))
- {
- // Perform recognition of trees with the following structure:
- // StoreInd(addr, BinOp(expr, GT_IND(addr)))
- // to be able to fold this into an instruction of the form
- // BINOP [addr], register
- // where register is the actual place where 'expr' is computed.
- //
- // SSE2 doesn't support RMW form of instructions.
- if (SetStoreIndOpCountsIfRMWMemOp(tree))
- {
- break;
- }
- }
-
- SetIndirAddrOpCounts(tree);
- }
- break;
-
- case GT_NULLCHECK:
- info->dstCount = 0;
- info->srcCount = 1;
- info->isLocalDefUse = true;
- break;
-
- case GT_IND:
- info->dstCount = 1;
- info->srcCount = 1;
- SetIndirAddrOpCounts(tree);
- break;
-
- case GT_CATCH_ARG:
- info->srcCount = 0;
- info->dstCount = 1;
- info->setDstCandidates(l, RBM_EXCEPTION_OBJECT);
- break;
-
-#if !FEATURE_EH_FUNCLETS
- case GT_END_LFIN:
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-#endif
-
- case GT_CLS_VAR:
- // These nodes are eliminated by rationalizer.
- JITDUMP("Unexpected node %s in Lower.\n", GenTree::NodeName(tree->OperGet()));
- unreached();
- break;
- } // end switch (tree->OperGet())
-
- // If op2 of a binary-op gets marked as contained, then binary-op srcCount will be 1.
- // Even then we would like to set isTgtPref on Op1.
- if (tree->OperIsBinary() && info->srcCount >= 1)
- {
- if (isRMWRegOper(tree))
- {
- GenTree* op1 = tree->gtOp.gtOp1;
- GenTree* op2 = tree->gtOp.gtOp2;
-
- // Commutative opers like add/mul/and/or/xor could reverse the order of
- // operands if it is safe to do so. In such a case we would like op2 to be
- // target preferenced instead of op1.
- if (tree->OperIsCommutative() && op1->gtLsraInfo.dstCount == 0 && op2 != nullptr)
- {
- op1 = op2;
- op2 = tree->gtOp.gtOp1;
- }
-
- // If we have a read-modify-write operation, we want to preference op1 to the target.
- // If op1 is contained, we don't want to preference it, but it won't
- // show up as a source in that case, so it will be ignored.
- op1->gtLsraInfo.isTgtPref = true;
-
- // Is this a non-commutative operator, or is op2 a contained memory op?
- // (Note that we can't call IsContained() at this point because it uses exactly the
- // same information we're currently computing.)
- // In either case, we need to make op2 remain live until the op is complete, by marking
- // the source(s) associated with op2 as "delayFree".
- // Note that if op2 of a binary RMW operator is a memory op, even if the operator
- // is commutative, codegen cannot reverse them.
- // TODO-XArch-CQ: This is not actually the case for all RMW binary operators, but there's
- // more work to be done to correctly reverse the operands if they involve memory
- // operands. Also, we may need to handle more cases than GT_IND, especially once
- // we've modified the register allocator to not require all nodes to be assigned
- // a register (e.g. a spilled lclVar can often be referenced directly from memory).
- // Note that we may have a null op2, even with 2 sources, if op1 is a base/index memory op.
-
- GenTree* delayUseSrc = nullptr;
- // TODO-XArch-Cleanup: We should make the indirection explicit on these nodes so that we don't have
- // to special case them.
- if (tree->OperGet() == GT_XADD || tree->OperGet() == GT_XCHG || tree->OperGet() == GT_LOCKADD)
- {
- delayUseSrc = op1;
- }
- else if ((op2 != nullptr) &&
- (!tree->OperIsCommutative() || (op2->isMemoryOp() && (op2->gtLsraInfo.srcCount == 0))))
- {
- delayUseSrc = op2;
- }
- if (delayUseSrc != nullptr)
- {
- // If delayUseSrc is an indirection and it doesn't produce a result, then we need to set "delayFree'
- // on the base & index, if any.
- // Otherwise, we set it on delayUseSrc itself.
- if (delayUseSrc->isIndir() && (delayUseSrc->gtLsraInfo.dstCount == 0))
- {
- GenTree* base = delayUseSrc->AsIndir()->Base();
- GenTree* index = delayUseSrc->AsIndir()->Index();
- if (base != nullptr)
- {
- base->gtLsraInfo.isDelayFree = true;
- }
- if (index != nullptr)
- {
- index->gtLsraInfo.isDelayFree = true;
- }
- }
- else
- {
- delayUseSrc->gtLsraInfo.isDelayFree = true;
- }
- info->hasDelayFreeSrc = true;
- }
- }
- }
-
- TreeNodeInfoInitCheckByteable(tree);
-
- // We need to be sure that we've set info->srcCount and info->dstCount appropriately
- assert((info->dstCount < 2) || (tree->IsMultiRegCall() && info->dstCount == MAX_RET_REG_COUNT));
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitCheckByteable: Check the tree to see if "byte-able" registers are
-// required, and set the tree node info accordingly.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitCheckByteable(GenTree* tree)
-{
-#ifdef _TARGET_X86_
- LinearScan* l = m_lsra;
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
- // Exclude RBM_NON_BYTE_REGS from dst candidates of tree node and src candidates of operands
- // if the tree node is a byte type.
- //
- // Though this looks conservative in theory, in practice we could not think of a case where
- // the below logic leads to conservative register specification. In future when or if we find
- // one such case, this logic needs to be fine tuned for that case(s).
-
- if (ExcludeNonByteableRegisters(tree))
- {
- regMaskTP regMask;
- if (info->dstCount > 0)
- {
- regMask = info->getDstCandidates(l);
- assert(regMask != RBM_NONE);
- info->setDstCandidates(l, regMask & ~RBM_NON_BYTE_REGS);
- }
-
- if (tree->OperIsSimple() && (info->srcCount > 0))
- {
- // No need to set src candidates on a contained child operand.
- GenTree* op = tree->gtOp.gtOp1;
- assert(op != nullptr);
- bool containedNode = (op->gtLsraInfo.srcCount == 0) && (op->gtLsraInfo.dstCount == 0);
- if (!containedNode)
- {
- regMask = op->gtLsraInfo.getSrcCandidates(l);
- assert(regMask != RBM_NONE);
- op->gtLsraInfo.setSrcCandidates(l, regMask & ~RBM_NON_BYTE_REGS);
- }
-
- if (tree->OperIsBinary() && (tree->gtOp.gtOp2 != nullptr))
- {
- op = tree->gtOp.gtOp2;
- containedNode = (op->gtLsraInfo.srcCount == 0) && (op->gtLsraInfo.dstCount == 0);
- if (!containedNode)
- {
- regMask = op->gtLsraInfo.getSrcCandidates(l);
- assert(regMask != RBM_NONE);
- op->gtLsraInfo.setSrcCandidates(l, regMask & ~RBM_NON_BYTE_REGS);
- }
- }
- }
- }
-#endif //_TARGET_X86_
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitSimple: Sets the srcCount and dstCount for all the trees
-// without special handling based on the tree node type.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitSimple(GenTree* tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- unsigned kind = tree->OperKind();
- info->dstCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
- if (kind & (GTK_CONST | GTK_LEAF))
- {
- info->srcCount = 0;
- }
- else if (kind & (GTK_SMPOP))
- {
- if (tree->gtGetOp2() != nullptr)
- {
- info->srcCount = 2;
- }
- else
- {
- info->srcCount = 1;
- }
- }
- else
- {
- unreached();
- }
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitReturn: Set the NodeInfo for a GT_RETURN.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
-
-#if !defined(_TARGET_64BIT_)
- if (tree->TypeGet() == TYP_LONG)
- {
- GenTree* op1 = tree->gtGetOp1();
- noway_assert(op1->OperGet() == GT_LONG);
- GenTree* loVal = op1->gtGetOp1();
- GenTree* hiVal = op1->gtGetOp2();
- info->srcCount = 2;
- loVal->gtLsraInfo.setSrcCandidates(l, RBM_LNGRET_LO);
- hiVal->gtLsraInfo.setSrcCandidates(l, RBM_LNGRET_HI);
- info->dstCount = 0;
- }
- else
-#endif // !defined(_TARGET_64BIT_)
- {
- GenTree* op1 = tree->gtGetOp1();
- regMaskTP useCandidates = RBM_NONE;
-
- info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
- info->dstCount = 0;
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- if (varTypeIsStruct(tree))
- {
- // op1 has to be either an lclvar or a multi-reg returning call
- if (op1->OperGet() == GT_LCL_VAR)
- {
- GenTreeLclVarCommon* lclVarCommon = op1->AsLclVarCommon();
- LclVarDsc* varDsc = &(compiler->lvaTable[lclVarCommon->gtLclNum]);
- assert(varDsc->lvIsMultiRegRet);
-
- // Mark var as contained if not enregistrable.
- if (!varTypeIsEnregisterableStruct(op1))
- {
- MakeSrcContained(tree, op1);
- }
- }
- else
- {
- noway_assert(op1->IsMultiRegCall());
-
- ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc();
- info->srcCount = retTypeDesc->GetReturnRegCount();
- useCandidates = retTypeDesc->GetABIReturnRegs();
- }
- }
- else
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- {
- // Non-struct type return - determine useCandidates
- switch (tree->TypeGet())
- {
- case TYP_VOID:
- useCandidates = RBM_NONE;
- break;
- case TYP_FLOAT:
- useCandidates = RBM_FLOATRET;
- break;
- case TYP_DOUBLE:
- useCandidates = RBM_DOUBLERET;
- break;
-#if defined(_TARGET_64BIT_)
- case TYP_LONG:
- useCandidates = RBM_LNGRET;
- break;
-#endif // defined(_TARGET_64BIT_)
- default:
- useCandidates = RBM_INTRET;
- break;
- }
- }
-
- if (useCandidates != RBM_NONE)
- {
- op1->gtLsraInfo.setSrcCandidates(l, useCandidates);
- }
- }
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitShiftRotate: Set the NodeInfo for a shift or rotate.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* l = m_lsra;
-
- info->srcCount = 2;
- info->dstCount = 1;
-
- // For shift operations, we need that the number
- // of bits moved gets stored in CL in case
- // the number of bits to shift is not a constant.
- GenTreePtr shiftBy = tree->gtOp.gtOp2;
- GenTreePtr source = tree->gtOp.gtOp1;
-
-#ifdef _TARGET_X86_
- // The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that
- // we can have a three operand form. Increment the srcCount.
- if (tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO)
- {
- assert(source->OperGet() == GT_LONG);
-
- info->srcCount++;
-
- if (tree->OperGet() == GT_LSH_HI)
- {
- GenTreePtr sourceLo = source->gtOp.gtOp1;
- sourceLo->gtLsraInfo.isDelayFree = true;
- }
- else
- {
- GenTreePtr sourceHi = source->gtOp.gtOp2;
- sourceHi->gtLsraInfo.isDelayFree = true;
- }
-
- source->gtLsraInfo.hasDelayFreeSrc = true;
- info->hasDelayFreeSrc = true;
- }
-#endif
-
- // x64 can encode 8 bits of shift and it will use 5 or 6. (the others are masked off)
- // We will allow whatever can be encoded - hope you know what you are doing.
- if (!IsContainableImmed(tree, shiftBy) || (shiftBy->gtIntConCommon.IconValue() > 255) ||
- (shiftBy->gtIntConCommon.IconValue() < 0))
- {
- source->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~RBM_RCX);
- shiftBy->gtLsraInfo.setSrcCandidates(l, RBM_RCX);
- info->setDstCandidates(l, l->allRegs(TYP_INT) & ~RBM_RCX);
- }
- else
- {
- MakeSrcContained(tree, shiftBy);
-
- // Note that Rotate Left/Right instructions don't set ZF and SF flags.
- //
- // If the operand being shifted is 32-bits then upper three bits are masked
- // by hardware to get actual shift count. Similarly for 64-bit operands
- // shift count is narrowed to [0..63]. If the resulting shift count is zero,
- // then shift operation won't modify flags.
- //
- // TODO-CQ-XARCH: We can optimize generating 'test' instruction for GT_EQ/NE(shift, 0)
- // if the shift count is known to be non-zero and in the range depending on the
- // operand size.
- }
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitCall: Set the NodeInfo for a call.
-//
-// Arguments:
-// call - The call node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
-{
- TreeNodeInfo* info = &(call->gtLsraInfo);
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
- bool hasMultiRegRetVal = false;
- ReturnTypeDesc* retTypeDesc = nullptr;
-
- info->srcCount = 0;
- if (call->TypeGet() != TYP_VOID)
- {
- hasMultiRegRetVal = call->HasMultiRegRetVal();
- if (hasMultiRegRetVal)
- {
- // dst count = number of registers in which the value is returned by call
- retTypeDesc = call->GetReturnTypeDesc();
- info->dstCount = retTypeDesc->GetReturnRegCount();
- }
- else
- {
- info->dstCount = 1;
- }
- }
- else
- {
- info->dstCount = 0;
- }
-
- GenTree* ctrlExpr = call->gtControlExpr;
- if (call->gtCallType == CT_INDIRECT)
- {
- // either gtControlExpr != null or gtCallAddr != null.
- // Both cannot be non-null at the same time.
- assert(ctrlExpr == nullptr);
- assert(call->gtCallAddr != nullptr);
- ctrlExpr = call->gtCallAddr;
-
-#ifdef _TARGET_X86_
- // Fast tail calls aren't currently supported on x86, but if they ever are, the code
- // below that handles indirect VSD calls will need to be fixed.
- assert(!call->IsFastTailCall() || !call->IsVirtualStub());
-#endif // _TARGET_X86_
- }
-
- // set reg requirements on call target represented as control sequence.
- if (ctrlExpr != nullptr)
- {
- // we should never see a gtControlExpr whose type is void.
- assert(ctrlExpr->TypeGet() != TYP_VOID);
-
- // call can take a Rm op on x64
- info->srcCount++;
-
- // In case of fast tail implemented as jmp, make sure that gtControlExpr is
- // computed into a register.
- if (!call->IsFastTailCall())
- {
-#ifdef _TARGET_X86_
- // On x86, we need to generate a very specific pattern for indirect VSD calls:
- //
- // 3-byte nop
- // call dword ptr [eax]
- //
- // Where EAX is also used as an argument to the stub dispatch helper. Make
- // sure that the call target address is computed into EAX in this case.
- if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT))
- {
- assert(ctrlExpr->isIndir());
-
- ctrlExpr->gtGetOp1()->gtLsraInfo.setSrcCandidates(l, RBM_VIRTUAL_STUB_TARGET);
- MakeSrcContained(call, ctrlExpr);
- }
- else
-#endif // _TARGET_X86_
- if (ctrlExpr->isIndir())
- {
- MakeSrcContained(call, ctrlExpr);
- }
- }
- else
- {
- // Fast tail call - make sure that call target is always computed in RAX
- // so that epilog sequence can generate "jmp rax" to achieve fast tail call.
- ctrlExpr->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
- }
- }
-
- // If this is a varargs call, we will clear the internal candidates in case we need
- // to reserve some integer registers for copying float args.
- // We have to do this because otherwise the default candidates are allRegs, and adding
- // the individual specific registers will have no effect.
- if (call->IsVarargs())
- {
- info->setInternalCandidates(l, RBM_NONE);
- }
-
- RegisterType registerType = call->TypeGet();
-
- // Set destination candidates for return value of the call.
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef _TARGET_X86_
- if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
- {
- // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
- // TCB in REG_PINVOKE_TCB. AMD64/ARM64 use the standard calling convention. fgMorphCall() sets the
- // correct argument registers.
- info->setDstCandidates(l, RBM_PINVOKE_TCB);
- }
- else
-#endif // _TARGET_X86_
- if (hasMultiRegRetVal)
- {
- assert(retTypeDesc != nullptr);
- info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs());
- }
- else if (varTypeIsFloating(registerType))
- {
-#ifdef _TARGET_X86_
- // The return value will be on the X87 stack, and we will need to move it.
- info->setDstCandidates(l, l->allRegs(registerType));
-#else // !_TARGET_X86_
- info->setDstCandidates(l, RBM_FLOATRET);
-#endif // !_TARGET_X86_
- }
- else if (registerType == TYP_LONG)
- {
- info->setDstCandidates(l, RBM_LNGRET);
- }
- else
- {
- info->setDstCandidates(l, RBM_INTRET);
- }
-
- // number of args to a call =
- // callRegArgs + (callargs - placeholders, setup, etc)
- // there is an explicit thisPtr but it is redundant
-
- // If there is an explicit this pointer, we don't want that node to produce anything
- // as it is redundant
- if (call->gtCallObjp != nullptr)
- {
- GenTreePtr thisPtrNode = call->gtCallObjp;
-
- if (thisPtrNode->gtOper == GT_PUTARG_REG)
- {
- l->clearOperandCounts(thisPtrNode);
- l->clearDstCount(thisPtrNode->gtOp.gtOp1);
- }
- else
- {
- l->clearDstCount(thisPtrNode);
- }
- }
-
-#if FEATURE_VARARG
- bool callHasFloatRegArgs = false;
-#endif // !FEATURE_VARARG
-
- // First, count reg args
- for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
- {
- assert(list->OperIsList());
-
- GenTreePtr argNode = list->Current();
-
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
- assert(curArgTabEntry);
-
- if (curArgTabEntry->regNum == REG_STK)
- {
- // late arg that is not passed in a register
- DISPNODE(argNode);
- assert(argNode->gtOper == GT_PUTARG_STK);
- argNode->gtLsraInfo.srcCount = 1;
- argNode->gtLsraInfo.dstCount = 0;
-
-#ifdef FEATURE_PUT_STRUCT_ARG_STK
- // If the node is TYP_STRUCT and it is put on stack with
- // putarg_stk operation, we consume and produce no registers.
- // In this case the embedded Obj node should not produce
- // registers too since it is contained.
- // Note that if it is a SIMD type the argument will be in a register.
- if (argNode->TypeGet() == TYP_STRUCT)
- {
- assert(argNode->gtOp.gtOp1 != nullptr && argNode->gtOp.gtOp1->OperGet() == GT_OBJ);
- argNode->gtOp.gtOp1->gtLsraInfo.dstCount = 0;
- argNode->gtLsraInfo.srcCount = 0;
- }
-#endif // FEATURE_PUT_STRUCT_ARG_STK
- continue;
- }
-
- regNumber argReg = REG_NA;
- regMaskTP argMask = RBM_NONE;
- short regCount = 0;
- bool isOnStack = true;
- if (curArgTabEntry->regNum != REG_STK)
- {
- isOnStack = false;
- var_types argType = argNode->TypeGet();
-
-#if FEATURE_VARARG
- callHasFloatRegArgs |= varTypeIsFloating(argType);
-#endif // !FEATURE_VARARG
-
- argReg = curArgTabEntry->regNum;
- regCount = 1;
-
- // Default case is that we consume one source; modify this later (e.g. for
- // promoted structs)
- info->srcCount++;
-
- argMask = genRegMask(argReg);
- argNode = argNode->gtEffectiveVal();
- }
-
- // If the struct arg is wrapped in CPYBLK the type of the param will be TYP_VOID.
- // Use the curArgTabEntry's isStruct to get whether the param is a struct.
- if (varTypeIsStruct(argNode) PUT_STRUCT_ARG_STK_ONLY(|| curArgTabEntry->isStruct))
- {
- unsigned originalSize = 0;
- LclVarDsc* varDsc = nullptr;
- if (argNode->gtOper == GT_LCL_VAR)
- {
- varDsc = compiler->lvaTable + argNode->gtLclVarCommon.gtLclNum;
- originalSize = varDsc->lvSize();
- }
- else if (argNode->gtOper == GT_MKREFANY)
- {
- originalSize = 2 * TARGET_POINTER_SIZE;
- }
- else if (argNode->gtOper == GT_OBJ)
- {
- noway_assert(!"GT_OBJ not supported for amd64");
- }
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- else if (argNode->gtOper == GT_PUTARG_REG)
- {
- originalSize = genTypeSize(argNode->gtType);
- }
- else if (argNode->gtOper == GT_FIELD_LIST)
- {
- originalSize = 0;
-
- // There could be up to 2 PUTARG_REGs in the list
- GenTreeFieldList* fieldListPtr = argNode->AsFieldList();
- unsigned iterationNum = 0;
- for (; fieldListPtr; fieldListPtr = fieldListPtr->Rest())
- {
- GenTreePtr putArgRegNode = fieldListPtr->Current();
- assert(putArgRegNode->gtOper == GT_PUTARG_REG);
-
- if (iterationNum == 0)
- {
- varDsc = compiler->lvaTable + putArgRegNode->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
- originalSize = varDsc->lvSize();
- assert(originalSize != 0);
- }
- else
- {
- // Need an extra source for every node, but the first in the list.
- info->srcCount++;
-
- // Get the mask for the second putarg_reg
- argMask = genRegMask(curArgTabEntry->otherRegNum);
- }
-
- putArgRegNode->gtLsraInfo.setDstCandidates(l, argMask);
- putArgRegNode->gtLsraInfo.setSrcCandidates(l, argMask);
-
- // To avoid redundant moves, have the argument child tree computed in the
- // register in which the argument is passed to the call.
- putArgRegNode->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, l->getUseCandidates(putArgRegNode));
- iterationNum++;
- }
-
- assert(iterationNum <= CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
- }
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- else
- {
- noway_assert(!"Can't predict unsupported TYP_STRUCT arg kind");
- }
-
- unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES;
- unsigned remainingSlots = slots;
-
- if (!isOnStack)
- {
- remainingSlots = slots - 1;
-
- regNumber reg = (regNumber)(argReg + 1);
- while (remainingSlots > 0 && reg <= REG_ARG_LAST)
- {
- argMask |= genRegMask(reg);
- reg = (regNumber)(reg + 1);
- remainingSlots--;
- regCount++;
- }
- }
-
- short internalIntCount = 0;
- if (remainingSlots > 0)
- {
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- // This TYP_STRUCT argument is also passed in the outgoing argument area
- // We need a register to address the TYP_STRUCT
- internalIntCount = 1;
-#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
- // And we may need 2
- internalIntCount = 2;
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- }
- argNode->gtLsraInfo.internalIntCount = internalIntCount;
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- if (argNode->gtOper == GT_PUTARG_REG)
- {
- argNode->gtLsraInfo.setDstCandidates(l, argMask);
- argNode->gtLsraInfo.setSrcCandidates(l, argMask);
- }
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- }
- else
- {
- argNode->gtLsraInfo.setDstCandidates(l, argMask);
- argNode->gtLsraInfo.setSrcCandidates(l, argMask);
- }
-
- // To avoid redundant moves, have the argument child tree computed in the
- // register in which the argument is passed to the call.
- if (argNode->gtOper == GT_PUTARG_REG)
- {
- argNode->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, l->getUseCandidates(argNode));
- }
-
-#if FEATURE_VARARG
- // In the case of a varargs call, the ABI dictates that if we have floating point args,
- // we must pass the enregistered arguments in both the integer and floating point registers.
- // Since the integer register is not associated with this arg node, we will reserve it as
- // an internal register so that it is not used during the evaluation of the call node
- // (e.g. for the target).
- if (call->IsVarargs() && varTypeIsFloating(argNode))
- {
- regNumber targetReg = compiler->getCallArgIntRegister(argReg);
- info->setInternalIntCount(info->internalIntCount + 1);
- info->addInternalCandidates(l, genRegMask(targetReg));
- }
-#endif // FEATURE_VARARG
- }
-
- // Now, count stack args
- // Note that these need to be computed into a register, but then
- // they're just stored to the stack - so the reg doesn't
- // need to remain live until the call. In fact, it must not
- // because the code generator doesn't actually consider it live,
- // so it can't be spilled.
-
- GenTreePtr args = call->gtCallArgs;
- while (args)
- {
- GenTreePtr arg = args->gtOp.gtOp1;
- if (!(args->gtFlags & GTF_LATE_ARG))
- {
- TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
-#if !defined(_TARGET_64BIT_)
- if (arg->TypeGet() == TYP_LONG)
- {
- assert(arg->OperGet() == GT_LONG);
- GenTreePtr loArg = arg->gtGetOp1();
- GenTreePtr hiArg = arg->gtGetOp2();
- assert((loArg->OperGet() == GT_PUTARG_STK) && (hiArg->OperGet() == GT_PUTARG_STK));
- assert((loArg->gtLsraInfo.dstCount == 1) && (hiArg->gtLsraInfo.dstCount == 1));
- loArg->gtLsraInfo.isLocalDefUse = true;
- hiArg->gtLsraInfo.isLocalDefUse = true;
- }
- else
-#endif // !defined(_TARGET_64BIT_)
- {
- if (argInfo->dstCount != 0)
- {
- argInfo->isLocalDefUse = true;
- }
-
- // If the child of GT_PUTARG_STK is a constant, we don't need a register to
- // move it to memory (stack location).
- //
- // On AMD64, we don't want to make 0 contained, because we can generate smaller code
- // by zeroing a register and then storing it. E.g.:
- // xor rdx, rdx
- // mov gword ptr [rsp+28H], rdx
- // is 2 bytes smaller than:
- // mov gword ptr [rsp+28H], 0
- //
- // On x86, we push stack arguments; we don't use 'mov'. So:
- // push 0
- // is 1 byte smaller than:
- // xor rdx, rdx
- // push rdx
-
- argInfo->dstCount = 0;
- if (arg->gtOper == GT_PUTARG_STK)
- {
- GenTree* op1 = arg->gtOp.gtOp1;
- if (IsContainableImmed(arg, op1)
-#if defined(_TARGET_AMD64_)
- && !op1->IsIntegralConst(0)
-#endif // _TARGET_AMD64_
- )
- {
- MakeSrcContained(arg, op1);
- }
- }
- }
- }
- args = args->gtOp.gtOp2;
- }
-
-#if FEATURE_VARARG
- // If it is a fast tail call, it is already preferenced to use RAX.
- // Therefore, no need set src candidates on call tgt again.
- if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr))
- {
- // Don't assign the call target to any of the argument registers because
- // we will use them to also pass floating point arguments as required
- // by Amd64 ABI.
- ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS));
- }
-#endif // !FEATURE_VARARG
-}
-
//------------------------------------------------------------------------
-// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store.
+// LowerBlockStore: Set block store type
//
// Arguments:
// blkNode - The block store node of interest
@@ -1639,25 +107,15 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
// Return Value:
// None.
//
-void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
+void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
{
- GenTree* dstAddr = blkNode->Addr();
- unsigned size = blkNode->gtBlkSize;
- GenTree* source = blkNode->Data();
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
-
- // Sources are dest address, initVal or source.
- // We may require an additional source or temp register for the size.
- blkNode->gtLsraInfo.srcCount = 2;
- blkNode->gtLsraInfo.dstCount = 0;
- blkNode->gtLsraInfo.setInternalCandidates(l, RBM_NONE);
+ GenTree* dstAddr = blkNode->Addr();
+ unsigned size = blkNode->gtBlkSize;
+ GenTree* source = blkNode->Data();
+ Compiler* compiler = comp;
GenTreePtr srcAddrOrFill = nullptr;
bool isInitBlk = blkNode->OperIsInitBlkOp();
- regMaskTP dstAddrRegMask = RBM_NONE;
- regMaskTP sourceRegMask = RBM_NONE;
- regMaskTP blkSizeRegMask = RBM_NONE;
if (!isInitBlk)
{
// CopyObj or CopyBlk
@@ -1668,20 +126,6 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
if (source->gtOper == GT_IND)
{
srcAddrOrFill = blkNode->Data()->gtGetOp1();
- // We're effectively setting source as contained, but can't call MakeSrcContained, because the
- // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading.
- // If srcAddr is already non-contained, we don't need to change it.
- if (srcAddrOrFill->gtLsraInfo.getDstCount() == 0)
- {
- srcAddrOrFill->gtLsraInfo.setDstCount(1);
- srcAddrOrFill->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount);
- }
- m_lsra->clearOperandCounts(source);
- }
- else if (!source->IsMultiRegCall() && !source->OperIsSIMD())
- {
- assert(source->IsLocal());
- MakeSrcContained(blkNode, source);
}
}
@@ -1735,58 +179,18 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
initVal->gtIntCon.gtIconVal = 0x01010101 * fill;
#endif // !_TARGET_AMD64_
- // In case we have a buffer >= 16 bytes
- // we can use SSE2 to do a 128-bit store in a single
- // instruction.
- if (size >= XMM_REGSIZE_BYTES)
- {
- // Reserve an XMM register to fill it with
- // a pack of 16 init value constants.
- blkNode->gtLsraInfo.internalFloatCount = 1;
- blkNode->gtLsraInfo.setInternalCandidates(l, l->internalFloatRegCandidates());
- if ((fill == 0) && ((size & 0xf) == 0))
- {
- MakeSrcContained(blkNode, source);
- }
- }
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
-
-#ifdef _TARGET_X86_
- if ((size & 1) != 0)
- {
- // On x86, you can't address the lower byte of ESI, EDI, ESP, or EBP when doing
- // a "mov byte ptr [dest], val". If the fill size is odd, we will try to do this
- // when unrolling, so only allow byteable registers as the source value. (We could
- // consider just using BlkOpKindRepInstr instead.)
- sourceRegMask = RBM_BYTE_REGS;
- }
-#endif // _TARGET_X86_
}
else
{
- // rep stos has the following register requirements:
- // a) The memory address to be in RDI.
- // b) The fill value has to be in RAX.
- // c) The buffer size will go in RCX.
- dstAddrRegMask = RBM_RDI;
- srcAddrOrFill = initVal;
- sourceRegMask = RBM_RAX;
- blkSizeRegMask = RBM_RCX;
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
}
}
else
{
#ifdef _TARGET_AMD64_
- // The helper follows the regular AMD64 ABI.
- dstAddrRegMask = RBM_ARG_0;
- sourceRegMask = RBM_ARG_1;
- blkSizeRegMask = RBM_ARG_2;
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
#else // !_TARGET_AMD64_
- dstAddrRegMask = RBM_RDI;
- sourceRegMask = RBM_RAX;
- blkSizeRegMask = RBM_RCX;
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
#endif // !_TARGET_AMD64_
}
@@ -1870,19 +274,12 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
if (IsRepMovsProfitable)
{
// We need the size of the contiguous Non-GC-region to be in RCX to call rep movsq.
- blkSizeRegMask = RBM_RCX;
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
}
else
{
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
}
-
- dstAddrRegMask = RBM_RDI;
-
- // The srcAddr must be in a register. If it was under a GT_IND, we need to subsume all of its
- // sources.
- sourceRegMask = RBM_RSI;
}
else
{
@@ -1903,119 +300,31 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
// our framework assemblies, so this is the main code generation scheme we'll use.
if (size <= CPBLK_UNROLL_LIMIT)
{
- // If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg.
- //
- // x86 specific note: if the size is odd, the last copy operation would be of size 1 byte.
- // But on x86 only RBM_BYTE_REGS could be used as byte registers. Therefore, exclude
- // RBM_NON_BYTE_REGS from internal candidates.
- if ((size & (XMM_REGSIZE_BYTES - 1)) != 0)
- {
- blkNode->gtLsraInfo.internalIntCount++;
- regMaskTP regMask = l->allRegs(TYP_INT);
-
-#ifdef _TARGET_X86_
- if ((size % 2) != 0)
- {
- regMask &= ~RBM_NON_BYTE_REGS;
- }
-#endif
- blkNode->gtLsraInfo.setInternalCandidates(l, regMask);
- }
-
- if (size >= XMM_REGSIZE_BYTES)
- {
- // If we have a buffer larger than XMM_REGSIZE_BYTES,
- // reserve an XMM register to use it for a
- // series of 16-byte loads and stores.
- blkNode->gtLsraInfo.internalFloatCount = 1;
- blkNode->gtLsraInfo.addInternalCandidates(l, l->internalFloatRegCandidates());
- }
-
- // If src or dst are on stack, we don't have to generate the address into a register
- // because it's just some constant+SP
- if (srcAddrOrFill != nullptr && srcAddrOrFill->OperIsLocalAddr())
- {
- MakeSrcContained(blkNode, srcAddrOrFill);
- }
-
- if (dstAddr->OperIsLocalAddr())
- {
- MakeSrcContained(blkNode, dstAddr);
- }
-
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
}
else
{
- blkNode->gtLsraInfo.setInternalCandidates(l, RBM_NONE);
- dstAddrRegMask = RBM_RDI;
- sourceRegMask = RBM_RSI;
- blkSizeRegMask = RBM_RCX;
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
}
}
#ifdef _TARGET_AMD64_
else
{
- // In case we have a constant integer this means we went beyond
- // CPBLK_MOVS_LIMIT bytes of size, still we should never have the case of
- // any GC-Pointers in the src struct.
- blkNode->gtLsraInfo.setInternalCandidates(l, RBM_NONE);
- dstAddrRegMask = RBM_ARG_0;
- sourceRegMask = RBM_ARG_1;
- blkSizeRegMask = RBM_ARG_2;
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
}
#elif defined(_TARGET_X86_)
else
{
- dstAddrRegMask = RBM_RDI;
- sourceRegMask = RBM_RSI;
- blkSizeRegMask = RBM_RCX;
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
}
#endif // _TARGET_X86_
assert(blkNode->gtBlkOpKind != GenTreeBlk::BlkOpKindInvalid);
}
- if (dstAddrRegMask != RBM_NONE)
- {
- dstAddr->gtLsraInfo.setSrcCandidates(l, dstAddrRegMask);
- }
- if (sourceRegMask != RBM_NONE)
- {
- if (srcAddrOrFill != nullptr)
- {
- srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, sourceRegMask);
- }
- else
- {
- // This is a local source; we'll use a temp register for its address.
- blkNode->gtLsraInfo.addInternalCandidates(l, sourceRegMask);
- blkNode->gtLsraInfo.internalIntCount++;
- }
- }
- if (blkSizeRegMask != RBM_NONE)
- {
- if (size != 0)
- {
- // Reserve a temp register for the block size argument.
- blkNode->gtLsraInfo.addInternalCandidates(l, blkSizeRegMask);
- blkNode->gtLsraInfo.internalIntCount++;
- }
- else
- {
- // The block size argument is a third argument to GT_STORE_DYN_BLK
- noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
- blkNode->gtLsraInfo.setSrcCount(3);
- GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
- blockSize->gtLsraInfo.setSrcCandidates(l, blkSizeRegMask);
- }
- }
}
#ifdef FEATURE_PUT_STRUCT_ARG_STK
//------------------------------------------------------------------------
-// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK.
+// LowerPutArgStk: Lower a GT_PUTARG_STK.
//
// Arguments:
// tree - The node of interest
@@ -2023,11 +332,8 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
// Return Value:
// None.
//
-void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
+void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk)
{
- TreeNodeInfo* info = &(putArgStk->gtLsraInfo);
- LinearScan* l = m_lsra;
-
#ifdef _TARGET_X86_
if (putArgStk->gtOp1->gtOper == GT_FIELD_LIST)
{
@@ -2070,9 +376,6 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
fieldCount++;
}
- info->srcCount = fieldCount;
- info->dstCount = 0;
-
// In theory, the upper bound for the size of a field list is 8: these constructs only appear when passing the
// collection of lclVars that represent the fields of a promoted struct lclVar, and we do not promote struct
// lclVars with more than 4 fields. If each of these lclVars is of type long, decomposition will split the
@@ -2103,9 +406,8 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
putArgStk->gtOp1 = fieldList;
}
- // Now that the fields have been sorted, initialize the LSRA info.
+ // Now that the fields have been sorted, the kind of code we will generate.
bool allFieldsAreSlots = true;
- bool needsByteTemp = false;
unsigned prevOffset = putArgStk->getArgSize();
for (GenTreeFieldList* current = fieldList; current != nullptr; current = current->Rest())
{
@@ -2114,56 +416,12 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
const unsigned fieldOffset = current->gtFieldOffset;
assert(fieldType != TYP_LONG);
- // For x86 we must mark all integral fields as contained or reg-optional, and handle them
- // accordingly in code generation, since we may have up to 8 fields, which cannot all be in
- // registers to be consumed atomically by the call.
- if (varTypeIsIntegralOrI(fieldNode))
- {
- if (fieldNode->OperGet() == GT_LCL_VAR)
- {
- LclVarDsc* varDsc = &(comp->lvaTable[fieldNode->AsLclVarCommon()->gtLclNum]);
- if (varDsc->lvTracked && !varDsc->lvDoNotEnregister)
- {
- SetRegOptional(fieldNode);
- }
- else
- {
- MakeSrcContained(putArgStk, fieldNode);
- }
- }
- else if (fieldNode->IsIntCnsFitsInI32())
- {
- MakeSrcContained(putArgStk, fieldNode);
- }
- else
- {
- // For the case where we cannot directly push the value, if we run out of registers,
- // it would be better to defer computation until we are pushing the arguments rather
- // than spilling, but this situation is not all that common, as most cases of promoted
- // structs do not have a large number of fields, and of those most are lclVars or
- // copy-propagated constants.
- SetRegOptional(fieldNode);
- }
- }
- else
- {
- assert(varTypeIsFloating(fieldNode));
- }
-
// We can treat as a slot any field that is stored at a slot boundary, where the previous
// field is not in the same slot. (Note that we store the fields in reverse order.)
const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevOffset - fieldOffset) >= 4);
if (!fieldIsSlot)
{
allFieldsAreSlots = false;
- if (varTypeIsByte(fieldType))
- {
- // If this field is a slot--i.e. it is an integer field that is 4-byte aligned and takes up 4 bytes
- // (including padding)--we can store the whole value rather than just the byte. Otherwise, we will
- // need a byte-addressable register for the store. We will enforce this requirement on an internal
- // register, which we can use to copy multiple byte values.
- needsByteTemp = true;
- }
}
if (varTypeIsGC(fieldType))
@@ -2187,35 +445,13 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
else
{
putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Push;
- // If any of the fields cannot be stored with an actual push, we may need a temporary
- // register to load the value before storing it to the stack location.
- info->internalIntCount = 1;
- regMaskTP regMask = l->allRegs(TYP_INT);
- if (needsByteTemp)
- {
- regMask &= ~RBM_NON_BYTE_REGS;
- }
- info->setInternalCandidates(l, regMask);
}
return;
}
#endif // _TARGET_X86_
-#if defined(FEATURE_SIMD) && defined(_TARGET_X86_)
- // For PutArgStk of a TYP_SIMD12, we need an extra register.
- if (putArgStk->TypeGet() == TYP_SIMD12)
- {
- info->srcCount = putArgStk->gtOp1->gtLsraInfo.dstCount;
- info->dstCount = 0;
- info->internalFloatCount = 1;
- info->setInternalCandidates(l, l->allSIMDRegs());
- return;
- }
-#endif // defined(FEATURE_SIMD) && defined(_TARGET_X86_)
-
if (putArgStk->TypeGet() != TYP_STRUCT)
{
- TreeNodeInfoInitSimple(putArgStk);
return;
}
@@ -2223,21 +459,6 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
GenTreePtr src = putArgStk->gtOp1;
GenTreePtr srcAddr = nullptr;
- bool haveLocalAddr = false;
- if ((src->OperGet() == GT_OBJ) || (src->OperGet() == GT_IND))
- {
- srcAddr = src->gtOp.gtOp1;
- assert(srcAddr != nullptr);
- haveLocalAddr = srcAddr->OperIsLocalAddr();
- }
- else
- {
- assert(varTypeIsSIMD(putArgStk));
- }
-
- info->srcCount = src->gtLsraInfo.dstCount;
- info->dstCount = 0;
-
// In case of a CpBlk we could use a helper call. In case of putarg_stk we
// can't do that since the helper call could kill some already set up outgoing args.
// TODO-Amd64-Unix: converge the code for putarg_stk with cpyblk/cpyobj.
@@ -2257,38 +478,6 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
// our framework assemblies, so this is the main code generation scheme we'll use.
if (size <= CPBLK_UNROLL_LIMIT && putArgStk->gtNumberReferenceSlots == 0)
{
- // If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg.
- //
- // x86 specific note: if the size is odd, the last copy operation would be of size 1 byte.
- // But on x86 only RBM_BYTE_REGS could be used as byte registers. Therefore, exclude
- // RBM_NON_BYTE_REGS from internal candidates.
- if ((size & (XMM_REGSIZE_BYTES - 1)) != 0)
- {
- info->internalIntCount++;
- regMaskTP regMask = l->allRegs(TYP_INT);
-
-#ifdef _TARGET_X86_
- if ((size % 2) != 0)
- {
- regMask &= ~RBM_NON_BYTE_REGS;
- }
-#endif
- info->setInternalCandidates(l, regMask);
- }
-
-#ifdef _TARGET_X86_
- if (size >= 8)
-#else // !_TARGET_X86_
- if (size >= XMM_REGSIZE_BYTES)
-#endif // !_TARGET_X86_
- {
- // If we have a buffer larger than or equal to XMM_REGSIZE_BYTES on x64/ux,
- // or larger than or equal to 8 bytes on x86, reserve an XMM register to use it for a
- // series of 16-byte loads and stores.
- info->internalFloatCount = 1;
- info->addInternalCandidates(l, l->internalFloatRegCandidates());
- }
-
#ifdef _TARGET_X86_
if (size < XMM_REGSIZE_BYTES)
{
@@ -2310,1486 +499,11 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
#endif // _TARGET_X86_
else
{
- info->internalIntCount += 3;
- info->setInternalCandidates(l, (RBM_RDI | RBM_RCX | RBM_RSI));
-
putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::RepInstr;
}
-
- // Always mark the OBJ and ADDR as contained trees by the putarg_stk. The codegen will deal with this tree.
- MakeSrcContained(putArgStk, src);
-
- if (haveLocalAddr)
- {
- // If the source address is the address of a lclVar, make the source address contained to avoid unnecessary
- // copies.
- //
- // To avoid an assertion in MakeSrcContained, increment the parent's source count beforehand and decrement it
- // afterwards.
- info->srcCount++;
- MakeSrcContained(putArgStk, srcAddr);
- info->srcCount--;
- }
}
#endif // FEATURE_PUT_STRUCT_ARG_STK
-//------------------------------------------------------------------------
-// TreeNodeInfoInitLclHeap: Set the NodeInfo for a GT_LCLHEAP.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitLclHeap(GenTree* tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
-
- info->srcCount = 1;
- info->dstCount = 1;
-
- // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
- // Here '-' means don't care.
- //
- // Size? Init Memory? # temp regs
- // 0 - 0 (returns 0)
- // const and <=6 reg words - 0 (pushes '0')
- // const and >6 reg words Yes 0 (pushes '0')
- // const and <PageSize No 0 (amd64) 1 (x86)
- // (x86:tmpReg for sutracting from esp)
- // const and >=PageSize No 2 (regCnt and tmpReg for subtracing from sp)
- // Non-const Yes 0 (regCnt=targetReg and pushes '0')
- // Non-const No 2 (regCnt and tmpReg for subtracting from sp)
- //
- // Note: Here we don't need internal register to be different from targetReg.
- // Rather, require it to be different from operand's reg.
-
- GenTreePtr size = tree->gtOp.gtOp1;
- if (size->IsCnsIntOrI())
- {
- MakeSrcContained(tree, size);
-
- size_t sizeVal = size->gtIntCon.gtIconVal;
-
- if (sizeVal == 0)
- {
- info->internalIntCount = 0;
- }
- else
- {
- // Compute the amount of memory to properly STACK_ALIGN.
- // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size.
- // This should also help in debugging as we can examine the original size specified with localloc.
- sizeVal = AlignUp(sizeVal, STACK_ALIGN);
-
- // For small allocations up to 6 pointer sized words (i.e. 48 bytes of localloc)
- // we will generate 'push 0'.
- assert((sizeVal % REGSIZE_BYTES) == 0);
- size_t cntRegSizedWords = sizeVal / REGSIZE_BYTES;
- if (cntRegSizedWords <= 6)
- {
- info->internalIntCount = 0;
- }
- else if (!compiler->info.compInitMem)
- {
- // No need to initialize allocated stack space.
- if (sizeVal < compiler->eeGetPageSize())
- {
-#ifdef _TARGET_X86_
- info->internalIntCount = 1; // x86 needs a register here to avoid generating "sub" on ESP.
-#else // !_TARGET_X86_
- info->internalIntCount = 0;
-#endif // !_TARGET_X86_
- }
- else
- {
- // We need two registers: regCnt and RegTmp
- info->internalIntCount = 2;
- }
- }
- else
- {
- // >6 and need to zero initialize allocated stack space.
- info->internalIntCount = 0;
- }
- }
- }
- else
- {
- if (!compiler->info.compInitMem)
- {
- info->internalIntCount = 2;
- }
- else
- {
- info->internalIntCount = 0;
- }
- }
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitLogicalOp: Set the NodeInfo for GT_AND/GT_OR/GT_XOR,
-// as well as GT_ADD/GT_SUB.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitLogicalOp(GenTree* tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* l = m_lsra;
-
- // We're not marking a constant hanging on the left of the add
- // as containable so we assign it to a register having CQ impact.
- // TODO-XArch-CQ: Detect this case and support both generating a single instruction
- // for GT_ADD(Constant, SomeTree)
- info->srcCount = 2;
- info->dstCount = 1;
-
- GenTree* op1 = tree->gtGetOp1();
- GenTree* op2 = tree->gtGetOp2();
-
- // We can directly encode the second operand if it is either a containable constant or a memory-op.
- // In case of memory-op, we can encode it directly provided its type matches with 'tree' type.
- // This is because during codegen, type of 'tree' is used to determine emit Type size. If the types
- // do not match, they get normalized (i.e. sign/zero extended) on load into a register.
- bool directlyEncodable = false;
- bool binOpInRMW = false;
- GenTreePtr operand = nullptr;
-
- if (IsContainableImmed(tree, op2))
- {
- directlyEncodable = true;
- operand = op2;
- }
- else
- {
- binOpInRMW = IsBinOpInRMWStoreInd(tree);
- if (!binOpInRMW)
- {
- if (op2->isMemoryOp() && tree->TypeGet() == op2->TypeGet())
- {
- directlyEncodable = true;
- operand = op2;
- }
- else if (tree->OperIsCommutative())
- {
- if (IsContainableImmed(tree, op1) ||
- (op1->isMemoryOp() && tree->TypeGet() == op1->TypeGet() && IsSafeToContainMem(tree, op1)))
- {
- // If it is safe, we can reverse the order of operands of commutative operations for efficient
- // codegen
- directlyEncodable = true;
- operand = op1;
- }
- }
- }
- }
-
- if (directlyEncodable)
- {
- assert(operand != nullptr);
- MakeSrcContained(tree, operand);
- }
- else if (!binOpInRMW)
- {
- // If this binary op neither has contained operands, nor is a
- // Read-Modify-Write (RMW) operation, we can mark its operands
- // as reg optional.
- SetRegOptionalForBinOp(tree);
- }
-
- // Codegen of this tree node sets ZF and SF flags.
- tree->gtFlags |= GTF_ZSF_SET;
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitModDiv: Set the NodeInfo for GT_MOD/GT_DIV/GT_UMOD/GT_UDIV.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* l = m_lsra;
-
- GenTree* op1 = tree->gtGetOp1();
- GenTree* op2 = tree->gtGetOp2();
-
- info->srcCount = 2;
- info->dstCount = 1;
-
- switch (tree->OperGet())
- {
- case GT_MOD:
- case GT_DIV:
- if (varTypeIsFloating(tree->TypeGet()))
- {
- // No implicit conversions at this stage as the expectation is that
- // everything is made explicit by adding casts.
- assert(op1->TypeGet() == op2->TypeGet());
-
- if (op2->isMemoryOp() || op2->IsCnsNonZeroFltOrDbl())
- {
- MakeSrcContained(tree, op2);
- }
- else
- {
- // If there are no containable operands, we can make an operand reg optional.
- // SSE2 allows only op2 to be a memory-op.
- SetRegOptional(op2);
- }
-
- return;
- }
- break;
-
- default:
- break;
- }
-
- // Amd64 Div/Idiv instruction:
- // Dividend in RAX:RDX and computes
- // Quotient in RAX, Remainder in RDX
-
- if (tree->OperGet() == GT_MOD || tree->OperGet() == GT_UMOD)
- {
- // We are interested in just the remainder.
- // RAX is used as a trashable register during computation of remainder.
- info->setDstCandidates(l, RBM_RDX);
- }
- else
- {
- // We are interested in just the quotient.
- // RDX gets used as trashable register during computation of quotient
- info->setDstCandidates(l, RBM_RAX);
- }
-
- bool op2CanBeRegOptional = true;
-#ifdef _TARGET_X86_
- if (op1->OperGet() == GT_LONG)
- {
- // To avoid reg move would like to have op1's low part in RAX and high part in RDX.
- GenTree* loVal = op1->gtGetOp1();
- GenTree* hiVal = op1->gtGetOp2();
-
- // Src count is actually 3, so increment.
- assert(op2->IsCnsIntOrI());
- assert(tree->OperGet() == GT_UMOD);
- info->srcCount++;
- op2CanBeRegOptional = false;
-
- // This situation also requires an internal register.
- info->internalIntCount = 1;
- info->setInternalCandidates(l, l->allRegs(TYP_INT));
-
- loVal->gtLsraInfo.setSrcCandidates(l, RBM_EAX);
- hiVal->gtLsraInfo.setSrcCandidates(l, RBM_EDX);
- }
- else
-#endif
- {
- // If possible would like to have op1 in RAX to avoid a register move
- op1->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
- }
-
- // divisor can be an r/m, but the memory indirection must be of the same size as the divide
- if (op2->isMemoryOp() && (op2->TypeGet() == tree->TypeGet()))
- {
- MakeSrcContained(tree, op2);
- }
- else if (op2CanBeRegOptional)
- {
- op2->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX));
-
- // If there are no containable operands, we can make an operand reg optional.
- // Div instruction allows only op2 to be a memory op.
- SetRegOptional(op2);
- }
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitIntrinsic: Set the NodeInfo for a GT_INTRINSIC.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitIntrinsic(GenTree* tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* l = m_lsra;
-
- // Both operand and its result must be of floating point type.
- GenTree* op1 = tree->gtGetOp1();
- assert(varTypeIsFloating(op1));
- assert(op1->TypeGet() == tree->TypeGet());
-
- info->srcCount = 1;
- info->dstCount = 1;
-
- switch (tree->gtIntrinsic.gtIntrinsicId)
- {
- case CORINFO_INTRINSIC_Sqrt:
- if (op1->isMemoryOp() || op1->IsCnsNonZeroFltOrDbl())
- {
- MakeSrcContained(tree, op1);