summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/jit/codegenarmarch.cpp2
-rw-r--r--src/jit/codegenlinear.cpp13
-rw-r--r--src/jit/codegenxarch.cpp94
-rw-r--r--src/jit/decomposelongs.cpp69
-rw-r--r--src/jit/decomposelongs.h1
-rw-r--r--src/jit/flowgraph.cpp4
-rw-r--r--src/jit/gentree.cpp44
-rw-r--r--src/jit/gentree.h44
-rw-r--r--src/jit/gtlist.h2
-rw-r--r--src/jit/lir.cpp13
-rw-r--r--src/jit/lower.cpp39
-rw-r--r--src/jit/lowerxarch.cpp10
-rw-r--r--src/jit/lsra.cpp4
-rw-r--r--src/jit/lsra.h2
-rw-r--r--src/jit/lsraarm.cpp19
-rw-r--r--src/jit/lsraarm64.cpp15
-rw-r--r--src/jit/lsraarmarch.cpp9
-rw-r--r--src/jit/lsraxarch.cpp214
-rw-r--r--src/jit/nodeinfo.h4
-rw-r--r--tests/src/JIT/SIMD/Matrix4x4.cs38
-rw-r--r--tests/src/JIT/SIMD/Matrix4x4_r.csproj33
-rw-r--r--tests/src/JIT/SIMD/Matrix4x4_ro.csproj33
-rw-r--r--tests/src/JIT/SIMD/Plane.cs33
-rw-r--r--tests/src/JIT/SIMD/Plane_r.csproj33
-rw-r--r--tests/src/JIT/SIMD/Plane_ro.csproj33
25 files changed, 558 insertions, 247 deletions
diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp
index e3966bbc08..6d4660fc13 100644
--- a/src/jit/codegenarmarch.cpp
+++ b/src/jit/codegenarmarch.cpp
@@ -541,8 +541,8 @@ void CodeGen::genIntrinsic(GenTreePtr treeNode)
void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
{
assert(treeNode->OperIs(GT_PUTARG_STK));
- var_types targetType = treeNode->TypeGet();
GenTreePtr source = treeNode->gtOp1;
+ var_types targetType = source->TypeGet();
emitter* emit = getEmitter();
// This is the varNum for our store operations,
diff --git a/src/jit/codegenlinear.cpp b/src/jit/codegenlinear.cpp
index 1ee288b505..830183ed5a 100644
--- a/src/jit/codegenlinear.cpp
+++ b/src/jit/codegenlinear.cpp
@@ -398,11 +398,8 @@ void CodeGen::genCodeForBBlist()
// performed at the end of each block.
// TODO: could these checks be performed more frequently? E.g., at each location where
// the register allocator says there are no live non-variable registers. Perhaps this could
- // be done by (a) keeping a running count of live non-variable registers by using
- // gtLsraInfo.srcCount and gtLsraInfo.dstCount to decrement and increment the count, respectively,
- // and running the checks when the count is zero. Or, (b) use the map maintained by LSRA
- // (operandToLocationInfoMap) to mark a node somehow when, after the execution of that node,
- // there will be no live non-variable registers.
+ // be done by using the map maintained by LSRA (operandToLocationInfoMap) to mark a node
+ // somehow when, after the execution of that node, there will be no live non-variable registers.
regSet.rsSpillChk();
@@ -1375,15 +1372,12 @@ void CodeGen::genConsumePutStructArgStk(GenTreePutArgStk* putArgNode,
regNumber srcReg,
regNumber sizeReg)
{
- assert(varTypeIsStruct(putArgNode));
-
// The putArgNode children are always contained. We should not consume any registers.
assert(putArgNode->gtGetOp1()->isContained());
- GenTree* dstAddr = putArgNode;
-
// Get the source address.
GenTree* src = putArgNode->gtGetOp1();
+ assert(varTypeIsStruct(src));
assert((src->gtOper == GT_OBJ) || ((src->gtOper == GT_IND && varTypeIsSIMD(src))));
GenTree* srcAddr = src->gtGetOp1();
@@ -1406,6 +1400,7 @@ void CodeGen::genConsumePutStructArgStk(GenTreePutArgStk* putArgNode,
assert(dstReg != REG_SPBASE);
inst_RV_RV(INS_mov, dstReg, REG_SPBASE);
#else // !_TARGET_X86_
+ GenTree* dstAddr = putArgNode;
if (dstAddr->gtRegNum != dstReg)
{
// Generate LEA instruction to load the stack of the outgoing var + SlotNum offset (or the incoming arg area
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index 99507ea2ea..ed18c69a87 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -3204,13 +3204,10 @@ unsigned CodeGen::genMove1IfNeeded(unsigned size, regNumber intTmpReg, GenTree*
//
void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode)
{
+ GenTreePtr src = putArgNode->gtOp.gtOp1;
// We will never call this method for SIMD types, which are stored directly
// in genPutStructArgStk().
- noway_assert(putArgNode->TypeGet() == TYP_STRUCT);
-
- // Make sure we got the arguments of the cpblk operation in the right registers
- GenTreePtr dstAddr = putArgNode;
- GenTreePtr src = putArgNode->gtOp.gtOp1;
+ noway_assert(src->TypeGet() == TYP_STRUCT);
unsigned size = putArgNode->getArgSize();
assert(size <= CPBLK_UNROLL_LIMIT);
@@ -3328,14 +3325,12 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode)
//
void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode)
{
- assert(putArgNode->TypeGet() == TYP_STRUCT);
- assert(putArgNode->getArgSize() > CPBLK_UNROLL_LIMIT);
-
- // Make sure we got the arguments of the cpblk operation in the right registers
- GenTreePtr dstAddr = putArgNode;
GenTreePtr srcAddr = putArgNode->gtGetOp1();
+ assert(srcAddr->TypeGet() == TYP_STRUCT);
+ assert(putArgNode->getArgSize() > CPBLK_UNROLL_LIMIT);
- // Validate state.
+ // Make sure we got the arguments of the cpblk operation in the right registers, and that
+ // 'srcAddr' is contained as expected.
assert(putArgNode->gtRsvdRegs == (RBM_RDI | RBM_RCX | RBM_RSI));
assert(srcAddr->isContained());
@@ -3421,7 +3416,7 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
sourceIsLocal = true;
}
- bool dstOnStack = dstAddr->OperIsLocalAddr();
+ bool dstOnStack = dstAddr->gtSkipReloadOrCopy()->OperIsLocalAddr();
#ifdef DEBUG
@@ -5213,34 +5208,24 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
GenTreePtr arg = args->gtOp.gtOp1;
if (arg->OperGet() != GT_ARGPLACE && !(arg->gtFlags & GTF_LATE_ARG))
{
-#if defined(_TARGET_X86_)
- if ((arg->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp1()->OperGet() == GT_FIELD_LIST))
+ if (arg->OperGet() == GT_PUTARG_STK)
{
+ GenTree* source = arg->gtOp.gtOp1;
+ ssize_t size = arg->AsPutArgStk()->getArgSize();
+ stackArgBytes += size;
+#ifdef DEBUG
fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
assert(curArgTabEntry);
- stackArgBytes += curArgTabEntry->numSlots * TARGET_POINTER_SIZE;
- }
- else
-#endif // defined(_TARGET_X86_)
-
+ assert(size == (curArgTabEntry->numSlots * TARGET_POINTER_SIZE));
#ifdef FEATURE_PUT_STRUCT_ARG_STK
- if (genActualType(arg->TypeGet()) == TYP_STRUCT)
- {
- assert(arg->OperGet() == GT_PUTARG_STK);
-
- GenTreeObj* obj = arg->gtGetOp1()->AsObj();
- unsigned argBytes = (unsigned)roundUp(obj->gtBlkSize, TARGET_POINTER_SIZE);
-#ifdef DEBUG
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
- assert((curArgTabEntry->numSlots * TARGET_POINTER_SIZE) == argBytes);
-#endif // DEBUG
- stackArgBytes += argBytes;
- }
- else
+ if (source->TypeGet() == TYP_STRUCT)
+ {
+ GenTreeObj* obj = source->AsObj();
+ unsigned argBytes = (unsigned)roundUp(obj->gtBlkSize, TARGET_POINTER_SIZE);
+ assert((curArgTabEntry->numSlots * TARGET_POINTER_SIZE) == argBytes);
+ }
#endif // FEATURE_PUT_STRUCT_ARG_STK
-
- {
- stackArgBytes += genTypeSize(genActualType(arg->TypeGet()));
+#endif // DEBUG
}
}
args = args->gtOp.gtOp2;
@@ -7591,10 +7576,12 @@ void CodeGen::genRemoveAlignmentAfterCall(GenTreeCall* call, unsigned bias)
//
bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk)
{
+ const unsigned argSize = putArgStk->getArgSize();
+ GenTree* source = putArgStk->gtGetOp1();
+
#ifdef FEATURE_SIMD
- if (varTypeIsSIMD(putArgStk))
+ if (!source->OperIs(GT_FIELD_LIST) && varTypeIsSIMD(source))
{
- const unsigned argSize = genTypeSize(putArgStk);
inst_RV_IV(INS_sub, REG_SPBASE, argSize, EA_PTRSIZE);
AddStackLevel(argSize);
m_pushStkArg = false;
@@ -7602,8 +7589,6 @@ bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk)
}
#endif // FEATURE_SIMD
- const unsigned argSize = putArgStk->getArgSize();
-
// If the gtPutArgStkKind is one of the push types, we do not pre-adjust the stack.
// This is set in Lowering, and is true if and only if:
// - This argument contains any GC pointers OR
@@ -7616,13 +7601,11 @@ bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk)
{
case GenTreePutArgStk::Kind::RepInstr:
case GenTreePutArgStk::Kind::Unroll:
- assert((putArgStk->gtNumberReferenceSlots == 0) && (putArgStk->gtGetOp1()->OperGet() != GT_FIELD_LIST) &&
- (argSize >= 16));
+ assert((putArgStk->gtNumberReferenceSlots == 0) && (source->OperGet() != GT_FIELD_LIST) && (argSize >= 16));
break;
case GenTreePutArgStk::Kind::Push:
case GenTreePutArgStk::Kind::PushAllSlots:
- assert((putArgStk->gtNumberReferenceSlots != 0) || (putArgStk->gtGetOp1()->OperGet() == GT_FIELD_LIST) ||
- (argSize < 16));
+ assert((putArgStk->gtNumberReferenceSlots != 0) || (source->OperGet() == GT_FIELD_LIST) || (argSize < 16));
break;
case GenTreePutArgStk::Kind::Invalid:
default:
@@ -7866,26 +7849,21 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
//
void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
{
- var_types targetType = putArgStk->TypeGet();
+ GenTreePtr data = putArgStk->gtOp1;
+ var_types targetType = genActualType(data->TypeGet());
#ifdef _TARGET_X86_
genAlignStackBeforeCall(putArgStk);
- if (varTypeIsStruct(targetType))
+ if ((data->OperGet() != GT_FIELD_LIST) && varTypeIsStruct(targetType))
{
(void)genAdjustStackForPutArgStk(putArgStk);
genPutStructArgStk(putArgStk);
return;
}
- // The following logic is applicable for x86 arch.
- assert(!varTypeIsFloating(targetType) || (targetType == putArgStk->gtOp1->TypeGet()));
-
- GenTreePtr data = putArgStk->gtOp1;
-
- // On a 32-bit target, all of the long arguments are handled with GT_FIELD_LIST,
- // and the type of the putArgStk is TYP_VOID.
+ // On a 32-bit target, all of the long arguments are handled with GT_FIELD_LISTs of TYP_INT.
assert(targetType != TYP_LONG);
const unsigned argSize = putArgStk->getArgSize();
@@ -7931,7 +7909,6 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
noway_assert(targetType != TYP_STRUCT);
- assert(!varTypeIsFloating(targetType) || (targetType == putArgStk->gtOp1->TypeGet()));
// Get argument offset on stack.
// Here we cross check that argument offset hasn't changed from lowering to codegen since
@@ -7944,8 +7921,6 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE);
#endif
- GenTreePtr data = putArgStk->gtOp1;
-
if (data->isContainedIntOrIImmed())
{
getEmitter()->emitIns_S_I(ins_Store(targetType), emitTypeSize(targetType), baseVarNum, argOffset,
@@ -8129,10 +8104,11 @@ void CodeGen::genStoreRegToStackArg(var_types type, regNumber srcReg, int offset
// For non tail calls this is the outgoingArgSpace.
void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
{
- var_types targetType = putArgStk->TypeGet();
+ GenTree* source = putArgStk->gtGetOp1();
+ var_types targetType = source->TypeGet();
#if defined(_TARGET_X86_) && defined(FEATURE_SIMD)
- if (targetType == TYP_SIMD12)
+ if (putArgStk->isSIMD12())
{
genPutArgStkSIMD12(putArgStk);
return;
@@ -8141,7 +8117,7 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
if (varTypeIsSIMD(targetType))
{
- regNumber srcReg = genConsumeReg(putArgStk->gtGetOp1());
+ regNumber srcReg = genConsumeReg(source);
assert((srcReg != REG_NA) && (genIsValidFloatReg(srcReg)));
genStoreRegToStackArg(targetType, srcReg, 0);
return;
@@ -8180,7 +8156,7 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
// future.
assert(m_pushStkArg);
- GenTree* srcAddr = putArgStk->gtGetOp1()->gtGetOp1();
+ GenTree* srcAddr = source->gtGetOp1();
BYTE* gcPtrs = putArgStk->gtGcPtrs;
const unsigned numSlots = putArgStk->gtNumSlots;
diff --git a/src/jit/decomposelongs.cpp b/src/jit/decomposelongs.cpp
index 1a78533cea..32c8bf729d 100644
--- a/src/jit/decomposelongs.cpp
+++ b/src/jit/decomposelongs.cpp
@@ -274,31 +274,7 @@ GenTree* DecomposeLongs::DecomposeNode(GenTree* tree)
// element into two elements: one for each half of the GT_LONG.
if ((use.Def()->OperGet() == GT_LONG) && !use.IsDummyUse() && (use.User()->OperGet() == GT_FIELD_LIST))
{
- GenTreeOp* value = use.Def()->AsOp();
- Range().Remove(value);
-
- // The node returned by `use.User()` is the head of the field list. We need to find the actual node that uses
- // the `GT_LONG` so that we can split it.
- GenTreeFieldList* listNode = use.User()->AsFieldList();
- for (; listNode != nullptr; listNode = listNode->Rest())
- {
- if (listNode->Current() == value)
- {
- break;
- }
- }
-
- assert(listNode != nullptr);
- GenTree* rest = listNode->gtOp2;
-
- GenTreeFieldList* loNode = listNode;
- loNode->gtOp1 = value->gtOp1;
- loNode->gtFieldType = TYP_INT;
-
- GenTreeFieldList* hiNode =
- new (m_compiler, GT_FIELD_LIST) GenTreeFieldList(value->gtOp2, loNode->gtFieldOffset + 4, TYP_INT, loNode);
-
- hiNode->gtOp2 = rest;
+ DecomposeFieldList(use.User()->AsFieldList(), use.Def()->AsOp());
}
#ifdef DEBUG
@@ -725,6 +701,49 @@ GenTree* DecomposeLongs::DecomposeCnsLng(LIR::Use& use)
}
//------------------------------------------------------------------------
+// DecomposeFieldList: Decompose GT_FIELD_LIST.
+//
+// Arguments:
+// listNode - the head of the FIELD_LIST that contains the given GT_LONG.
+// longNode - the node to decompose
+//
+// Return Value:
+// The next node to process.
+//
+// Notes:
+// Split a LONG field list element into two elements: one for each half of the GT_LONG.
+//
+GenTree* DecomposeLongs::DecomposeFieldList(GenTreeFieldList* listNode, GenTreeOp* longNode)
+{
+ assert(longNode->OperGet() == GT_LONG);
+ // We are given the head of the field list. We need to find the actual node that uses
+ // the `GT_LONG` so that we can split it.
+ for (; listNode != nullptr; listNode = listNode->Rest())
+ {
+ if (listNode->Current() == longNode)
+ {
+ break;
+ }
+ }
+ assert(listNode != nullptr);
+
+ Range().Remove(longNode);
+
+ GenTree* rest = listNode->gtOp2;
+
+ GenTreeFieldList* loNode = listNode;
+ loNode->gtType = TYP_INT;
+ loNode->gtOp1 = longNode->gtOp1;
+ loNode->gtFieldType = TYP_INT;
+
+ GenTreeFieldList* hiNode =
+ new (m_compiler, GT_FIELD_LIST) GenTreeFieldList(longNode->gtOp2, loNode->gtFieldOffset + 4, TYP_INT, loNode);
+ hiNode->gtOp2 = rest;
+
+ return listNode->gtNext;
+}
+
+//------------------------------------------------------------------------
// DecomposeCall: Decompose GT_CALL.
//
// Arguments:
diff --git a/src/jit/decomposelongs.h b/src/jit/decomposelongs.h
index 7a0d6ff5ba..c008fa255c 100644
--- a/src/jit/decomposelongs.h
+++ b/src/jit/decomposelongs.h
@@ -45,6 +45,7 @@ private:
GenTree* DecomposeStoreLclFld(LIR::Use& use);
GenTree* DecomposeCast(LIR::Use& use);
GenTree* DecomposeCnsLng(LIR::Use& use);
+ GenTree* DecomposeFieldList(GenTreeFieldList* listNode, GenTreeOp* longNode);
GenTree* DecomposeCall(LIR::Use& use);
GenTree* DecomposeInd(LIR::Use& use);
GenTree* DecomposeStoreInd(LIR::Use& use);
diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp
index 0f9a28c13b..057de240f5 100644
--- a/src/jit/flowgraph.cpp
+++ b/src/jit/flowgraph.cpp
@@ -14465,6 +14465,10 @@ bool Compiler::fgOptimizeBranchToNext(BasicBlock* block, BasicBlock* bNext, Basi
LIR::Range& blockRange = LIR::AsRange(block);
GenTree* jmp = blockRange.LastNode();
assert(jmp->OperIsConditionalJump());
+ if (jmp->OperGet() == GT_JTRUE)
+ {
+ jmp->gtOp.gtOp1->gtFlags &= ~GTF_SET_FLAGS;
+ }
bool isClosed;
unsigned sideEffects;
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
index e6a21f4fa9..31ac9104c8 100644
--- a/src/jit/gentree.cpp
+++ b/src/jit/gentree.cpp
@@ -1089,6 +1089,13 @@ bool GenTreeCall::AreArgsComplete() const
return false;
}
+#if !defined(FEATURE_PUT_STRUCT_ARG_STK) && !defined(LEGACY_BACKEND)
+unsigned GenTreePutArgStk::getArgSize()
+{
+ return genTypeSize(genActualType(gtOp1->gtType));
+}
+#endif // !defined(FEATURE_PUT_STRUCT_ARG_STK) && !defined(LEGACY_BACKEND)
+
/*****************************************************************************
*
* Returns non-zero if the two trees are identical.
@@ -11139,25 +11146,28 @@ void Compiler::gtDispTree(GenTreePtr tree,
tree->AsFieldList()->gtFieldOffset);
}
#if FEATURE_PUT_STRUCT_ARG_STK
- else if ((tree->OperGet() == GT_PUTARG_STK) &&
- (tree->AsPutArgStk()->gtPutArgStkKind != GenTreePutArgStk::Kind::Invalid))
+ else if (tree->OperGet() == GT_PUTARG_STK)
{
- switch (tree->AsPutArgStk()->gtPutArgStkKind)
+ printf(" (%d slots)", tree->AsPutArgStk()->gtNumSlots);
+ if (tree->AsPutArgStk()->gtPutArgStkKind != GenTreePutArgStk::Kind::Invalid)
{
- case GenTreePutArgStk::Kind::RepInstr:
- printf(" (RepInstr)");
- break;
- case GenTreePutArgStk::Kind::Unroll:
- printf(" (Unroll)");
- break;
- case GenTreePutArgStk::Kind::Push:
- printf(" (Push)");
- break;
- case GenTreePutArgStk::Kind::PushAllSlots:
- printf(" (PushAllSlots)");
- break;
- default:
- unreached();
+ switch (tree->AsPutArgStk()->gtPutArgStkKind)
+ {
+ case GenTreePutArgStk::Kind::RepInstr:
+ printf(" (RepInstr)");
+ break;
+ case GenTreePutArgStk::Kind::Unroll:
+ printf(" (Unroll)");
+ break;
+ case GenTreePutArgStk::Kind::Push:
+ printf(" (Push)");
+ break;
+ case GenTreePutArgStk::Kind::PushAllSlots:
+ printf(" (PushAllSlots)");
+ break;
+ default:
+ unreached();
+ }
}
}
#endif // FEATURE_PUT_STRUCT_ARG_STK
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
index 7fe616d892..37deba98ab 100644
--- a/src/jit/gentree.h
+++ b/src/jit/gentree.h
@@ -1122,9 +1122,12 @@ public:
return false;
}
- if (gtOper == GT_NOP || gtOper == GT_CALL)
+ if (gtType == TYP_VOID)
{
- return gtType != TYP_VOID;
+ // These are the only operators which can produce either VOID or non-VOID results.
+ assert(OperIs(GT_NOP, GT_CALL, GT_LOCKADD, GT_FIELD_LIST, GT_COMMA) || OperIsCompare() || OperIsLong() ||
+ OperIsSIMD());
+ return false;
}
if (gtOper == GT_FIELD_LIST)
@@ -1177,13 +1180,13 @@ public:
inline void ClearUnusedValue();
inline bool IsUnusedValue() const;
- bool OperIs(genTreeOps oper)
+ bool OperIs(genTreeOps oper) const
{
return OperGet() == oper;
}
template <typename... T>
- bool OperIs(genTreeOps oper, T... rest)
+ bool OperIs(genTreeOps oper, T... rest) const
{
return OperIs(oper) || OperIs(rest...);
}
@@ -1588,7 +1591,7 @@ public:
return OperIsAtomicOp(gtOper);
}
- // This is basically here for cleaner FEATURE_SIMD #ifdefs.
+ // This is here for cleaner FEATURE_SIMD #ifdefs.
static bool OperIsSIMD(genTreeOps gtOper)
{
#ifdef FEATURE_SIMD
@@ -1598,11 +1601,26 @@ public:
#endif // !FEATURE_SIMD
}
- bool OperIsSIMD()
+ bool OperIsSIMD() const
{
return OperIsSIMD(gtOper);
}
+ // This is here for cleaner GT_LONG #ifdefs.
+ static bool OperIsLong(genTreeOps gtOper)
+ {
+#if defined(_TARGET_64BIT_) || defined(LEGACY_BACKEND)
+ return false;
+#else
+ return gtOper == GT_LONG;
+#endif
+ }
+
+ bool OperIsLong() const
+ {
+ return OperIsLong(gtOper);
+ }
+
bool OperIsFieldListHead()
{
return (gtOper == GT_FIELD_LIST) && ((gtFlags & GTF_FIELD_LIST_HEAD) != 0);
@@ -3145,6 +3163,7 @@ struct GenTreeFieldList : public GenTreeArgList
assert(!arg->OperIsAnyList());
gtFieldOffset = fieldOffset;
gtFieldType = fieldType;
+ gtType = fieldType;
if (prevList == nullptr)
{
gtFlags |= GTF_FIELD_LIST_HEAD;
@@ -3958,7 +3977,7 @@ struct GenTreeMultiRegOp : public GenTreeOp
{
regNumber gtOtherReg;
- // GTF_SPILL or GTF_SPILLED flag on a multi-reg call node indicates that one or
+ // GTF_SPILL or GTF_SPILLED flag on a multi-reg node indicates that one or
// more of its result regs are in that state. The spill flag of each of the
// return register is stored here. We only need 2 bits per returned register,
// so this is treated as a 2-bit array. No architecture needs more than 8 bits.
@@ -5231,6 +5250,13 @@ struct GenTreePutArgStk : public GenTreeUnOp
return gtNumSlots * TARGET_POINTER_SIZE;
}
+ // Return true if this is a PutArgStk of a SIMD12 struct.
+ // This is needed because such values are re-typed to SIMD16, and the type of PutArgStk is VOID.
+ unsigned isSIMD12()
+ {
+ return (varTypeIsSIMD(gtOp1) && (gtNumSlots == 3));
+ }
+
//------------------------------------------------------------------------
// setGcPointers: Sets the number of references and the layout of the struct object returned by the VM.
//
@@ -5272,7 +5298,9 @@ struct GenTreePutArgStk : public GenTreeUnOp
unsigned gtNumberReferenceSlots; // Number of reference slots.
BYTE* gtGcPtrs; // gcPointers
-#endif // FEATURE_PUT_STRUCT_ARG_STK
+#elif !defined(LEGACY_BACKEND)
+ unsigned getArgSize();
+#endif // !LEGACY_BACKEND
#if defined(DEBUG) || defined(UNIX_X86_ABI)
GenTreeCall* gtCall; // the call node to which this argument belongs
diff --git a/src/jit/gtlist.h b/src/jit/gtlist.h
index 6ca636df20..26863deb14 100644
--- a/src/jit/gtlist.h
+++ b/src/jit/gtlist.h
@@ -57,7 +57,7 @@ GTNODE(ARR_LENGTH , GenTreeArrLen ,0,GTK_UNOP|GTK_EXOP) // array
GTNODE(INTRINSIC , GenTreeIntrinsic ,0,GTK_BINOP|GTK_EXOP) // intrinsics
-GTNODE(LOCKADD , GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE)
+GTNODE(LOCKADD , GenTreeOp ,0,GTK_BINOP)
GTNODE(XADD , GenTreeOp ,0,GTK_BINOP)
GTNODE(XCHG , GenTreeOp ,0,GTK_BINOP)
GTNODE(CMPXCHG , GenTreeCmpXchg ,0,GTK_SPECIAL)
diff --git a/src/jit/lir.cpp b/src/jit/lir.cpp
index a2343ad313..80b9c34f2c 100644
--- a/src/jit/lir.cpp
+++ b/src/jit/lir.cpp
@@ -981,7 +981,11 @@ void LIR::Range::Remove(GenTree* node, bool markOperandsUnused)
if (markOperandsUnused)
{
node->VisitOperands([](GenTree* operand) -> GenTree::VisitResult {
- operand->SetUnusedValue();
+ // The operand of JTRUE does not produce a value (just sets the flags).
+ if (operand->IsValue())
+ {
+ operand->SetUnusedValue();
+ }
return GenTree::VisitResult::Continue;
});
}
@@ -1608,8 +1612,11 @@ bool LIR::Range::CheckLIR(Compiler* compiler, bool checkUnusedValues) const
// The GT_NOP case is because sometimes we eliminate stack argument stores as dead, but
// instead of removing them we replace with a NOP.
// ARGPLACE nodes are not represented in the LIR sequence. Ignore them.
- assert((node->OperGet() == GT_CALL) &&
- (def->OperIsStore() || def->OperIs(GT_PUTARG_STK, GT_NOP, GT_ARGPLACE)));
+ // The argument of a JTRUE doesn't produce a value (just sets a flag).
+ assert(((node->OperGet() == GT_CALL) &&
+ (def->OperIsStore() || def->OperIs(GT_PUTARG_STK, GT_NOP, GT_ARGPLACE))) ||
+ ((node->OperGet() == GT_JTRUE) && (def->TypeGet() == TYP_VOID) &&
+ ((def->gtFlags & GTF_SET_FLAGS) != 0)));
continue;
}
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index d2f2bb05a4..ed608e965c 100644
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -1079,11 +1079,29 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
// This provides the info to put this argument in in-coming arg area slot
// instead of in out-going arg area slot.
- PUT_STRUCT_ARG_STK_ONLY(assert(info->isStruct == varTypeIsStruct(type))); // Make sure state is correct
+ // Make sure state is correct. The PUTARG_STK has TYP_VOID, as it doesn't produce
+ // a result. So the type of its operand must be the correct type to push on the stack.
+ // For a FIELD_LIST, this will be the type of the field (not the type of the arg),
+ // but otherwise it is generally the type of the operand.
+ PUT_STRUCT_ARG_STK_ONLY(assert(info->isStruct == varTypeIsStruct(type)));
+ if ((arg->OperGet() != GT_FIELD_LIST))
+ {
+#if defined(FEATURE_SIMD) && defined(FEATURE_PUT_STRUCT_ARG_STK)
+ if (type == TYP_SIMD12)
+ {
+ assert(info->numSlots == 3);
+ }
+ else
+#endif // defined(FEATURE_SIMD) && defined(FEATURE_PUT_STRUCT_ARG_STK)
+ {
+ assert(genActualType(arg->TypeGet()) == type);
+ }
+ }
- putArg = new (comp, GT_PUTARG_STK)
- GenTreePutArgStk(GT_PUTARG_STK, type, arg, info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots),
- call->IsFastTailCall(), call);
+ putArg =
+ new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK, TYP_VOID, arg,
+ info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots),
+ call->IsFastTailCall(), call);
#ifdef FEATURE_PUT_STRUCT_ARG_STK
// If the ArgTabEntry indicates that this arg is a struct
@@ -1266,7 +1284,7 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg)
GenTreeFieldList* fieldList = new (comp, GT_FIELD_LIST) GenTreeFieldList(argLo, 0, TYP_INT, nullptr);
// Only the first fieldList node (GTF_FIELD_LIST_HEAD) is in the instruction sequence.
(void)new (comp, GT_FIELD_LIST) GenTreeFieldList(argHi, 4, TYP_INT, fieldList);
- putArg = NewPutArg(call, fieldList, info, TYP_VOID);
+ putArg = NewPutArg(call, fieldList, info, type);
BlockRange().InsertBefore(arg, putArg);
BlockRange().Remove(arg);
@@ -1275,17 +1293,17 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg)
}
else
{
+ assert(arg->OperGet() == GT_LONG);
// For longs, we will replace the GT_LONG with a GT_FIELD_LIST, and put that under a PUTARG_STK.
// Although the hi argument needs to be pushed first, that will be handled by the general case,
// in which the fields will be reversed.
- noway_assert(arg->OperGet() == GT_LONG);
assert(info->numSlots == 2);
GenTreePtr argLo = arg->gtGetOp1();
GenTreePtr argHi = arg->gtGetOp2();
GenTreeFieldList* fieldList = new (comp, GT_FIELD_LIST) GenTreeFieldList(argLo, 0, TYP_INT, nullptr);
// Only the first fieldList node (GTF_FIELD_LIST_HEAD) is in the instruction sequence.
(void)new (comp, GT_FIELD_LIST) GenTreeFieldList(argHi, 4, TYP_INT, fieldList);
- putArg = NewPutArg(call, fieldList, info, TYP_VOID);
+ putArg = NewPutArg(call, fieldList, info, type);
putArg->gtRegNum = info->regNum;
// We can't call ReplaceArgWithPutArgOrCopy here because it presumes that we are keeping the original arg.
@@ -2757,7 +2775,7 @@ GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue)
relop->SetOper(GT_JCMP);
relop->gtFlags &= ~(GTF_JCMP_TST | GTF_JCMP_EQ);
relop->gtFlags |= flags;
- relop->gtLsraInfo.isNoRegCompare = true;
+ relop->gtType = TYP_VOID;
relopOp2->SetContained();
@@ -5736,8 +5754,9 @@ void Lowering::ContainCheckRet(GenTreeOp* ret)
void Lowering::ContainCheckJTrue(GenTreeOp* node)
{
// The compare does not need to be generated into a register.
- GenTree* cmp = node->gtGetOp1();
- cmp->gtLsraInfo.isNoRegCompare = true;
+ GenTree* cmp = node->gtGetOp1();
+ cmp->gtType = TYP_VOID;
+ cmp->gtFlags |= GTF_SET_FLAGS;
}
#endif // !LEGACY_BACKEND
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index 415c10619b..f509475fe1 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -474,8 +474,9 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk)
BlockRange().InsertAfter(fieldList, head);
BlockRange().Remove(fieldList);
- fieldList = head;
- putArgStk->gtOp1 = fieldList;
+ fieldList = head;
+ putArgStk->gtOp1 = fieldList;
+ putArgStk->gtType = fieldList->gtType;
}
// Now that the fields have been sorted, the kind of code we will generate.
@@ -557,7 +558,7 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk)
GenTreePtr src = putArgStk->gtOp1;
#ifdef FEATURE_PUT_STRUCT_ARG_STK
- if (putArgStk->TypeGet() != TYP_STRUCT)
+ if (src->TypeGet() != TYP_STRUCT)
#endif // FEATURE_PUT_STRUCT_ARG_STK
{
// If the child of GT_PUTARG_STK is a constant, we don't need a register to
@@ -858,8 +859,7 @@ void Lowering::LowerSIMD(GenTreeSIMD* simdNode)
}
simdNode->gtFlags |= GTF_SET_FLAGS;
- simdNode->SetUnusedValue();
- simdNode->gtLsraInfo.isNoRegCompare = true;
+ simdNode->gtType = TYP_VOID;
}
#endif
ContainCheckSIMD(simdNode);
diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp
index 3fa7905384..c7a5b6c7b6 100644
--- a/src/jit/lsra.cpp
+++ b/src/jit/lsra.cpp
@@ -3598,7 +3598,6 @@ static int ComputeOperandDstCount(GenTree* operand)
// pointers to argument setup stores.
return 0;
}
-#ifdef _TARGET_ARMARCH_
else if (operand->OperIsPutArgStk())
{
// A PUTARG_STK argument is an operand of a call, but is neither contained, nor does it produce
@@ -3606,7 +3605,6 @@ static int ComputeOperandDstCount(GenTree* operand)
assert(!operand->isContained());
return 0;
}
-#endif // _TARGET_ARMARCH_
else
{
// If a field list or non-void-typed operand is not an unused value and does not have source registers,
@@ -4840,7 +4838,7 @@ void LinearScan::buildIntervals()
TreeNodeInfoInit(node);
// If the node produces an unused value, mark it as a local def-use
- if (node->IsValue() && node->IsUnusedValue() && !node->gtLsraInfo.isNoRegCompare)
+ if (node->IsValue() && node->IsUnusedValue())
{
node->gtLsraInfo.isLocalDefUse = true;
node->gtLsraInfo.dstCount = 0;
diff --git a/src/jit/lsra.h b/src/jit/lsra.h
index d149b3207a..3f06e4e61f 100644
--- a/src/jit/lsra.h
+++ b/src/jit/lsra.h
@@ -1225,7 +1225,7 @@ private:
void TreeNodeInfoInitCheckByteable(GenTree* tree);
- void SetDelayFree(GenTree* delayUseSrc);
+ bool CheckAndSetDelayFree(GenTree* delayUseSrc);
void TreeNodeInfoInitSimple(GenTree* tree);
int GetOperandSourceCount(GenTree* node);
diff --git a/src/jit/lsraarm.cpp b/src/jit/lsraarm.cpp
index e8f70475c4..83b150b7eb 100644
--- a/src/jit/lsraarm.cpp
+++ b/src/jit/lsraarm.cpp
@@ -214,7 +214,18 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree)
}
// Set the default dstCount. This may be modified below.
- info->dstCount = tree->IsValue() ? 1 : 0;
+ if (tree->IsValue())
+ {
+ info->dstCount = 1;
+ if (tree->IsUnusedValue())
+ {
+ info->isLocalDefUse = true;
+ }
+ }
+ else
+ {
+ info->dstCount = 0;
+ }
switch (tree->OperGet())
{
@@ -442,6 +453,10 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree)
case GT_LONG:
assert(tree->IsUnusedValue()); // Contained nodes are already processed, only unused GT_LONG can reach here.
+ // An unused GT_LONG doesn't produce any registers.
+ tree->gtType = TYP_VOID;
+ tree->ClearUnusedValue();
+ info->isLocalDefUse = false;
// An unused GT_LONG node needs to consume its sources.
info->srcCount = 2;
@@ -796,6 +811,8 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree)
}
// We need to be sure that we've set info->srcCount and info->dstCount appropriately
assert((info->dstCount < 2) || tree->IsMultiRegNode());
+ assert(info->isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue()));
+ assert(!tree->IsUnusedValue() || (info->dstCount != 0));
}
#endif // _TARGET_ARM_
diff --git a/src/jit/lsraarm64.cpp b/src/jit/lsraarm64.cpp
index c67a1c50d3..f59e6bdf33 100644
--- a/src/jit/lsraarm64.cpp
+++ b/src/jit/lsraarm64.cpp
@@ -58,7 +58,18 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree)
}
// Set the default dstCount. This may be modified below.
- info->dstCount = tree->IsValue() ? 1 : 0;
+ if (tree->IsValue())
+ {
+ info->dstCount = 1;
+ if (tree->IsUnusedValue())
+ {
+ info->isLocalDefUse = true;
+ }
+ }
+ else
+ {
+ info->dstCount = 0;
+ }
switch (tree->OperGet())
{
@@ -697,6 +708,8 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree)
}
// We need to be sure that we've set info->srcCount and info->dstCount appropriately
assert((info->dstCount < 2) || tree->IsMultiRegCall());
+ assert(info->isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue()));
+ assert(!tree->IsUnusedValue() || (info->dstCount != 0));
}
//------------------------------------------------------------------------
diff --git a/src/jit/lsraarmarch.cpp b/src/jit/lsraarmarch.cpp
index ffa25a0bb4..b3e9bfa054 100644
--- a/src/jit/lsraarmarch.cpp
+++ b/src/jit/lsraarmarch.cpp
@@ -92,15 +92,8 @@ void LinearScan::TreeNodeInfoInitCmp(GenTreePtr tree)
{
TreeNodeInfo* info = &(tree->gtLsraInfo);
+ assert((info->dstCount == 1) || (tree->TypeGet() == TYP_VOID));
info->srcCount = tree->gtOp.gtOp2->isContained() ? 1 : 2;
- if (info->isNoRegCompare)
- {
- info->dstCount = 0;
- }
- else
- {
- assert((info->dstCount == 1) || tree->OperIs(GT_CMP, GT_TEST_EQ, GT_TEST_NE));
- }
}
void LinearScan::TreeNodeInfoInitGCWriteBarrier(GenTree* tree)
diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp
index 24f7639e8b..4d5c1f7304 100644
--- a/src/jit/lsraxarch.cpp
+++ b/src/jit/lsraxarch.cpp
@@ -116,12 +116,22 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree)
{
info->dstCount = 0;
assert(info->srcCount == 0);
- TreeNodeInfoInitCheckByteable(tree);
return;
}
// Set the default dstCount. This may be modified below.
- info->dstCount = tree->IsValue() ? 1 : 0;
+ if (tree->IsValue())
+ {
+ info->dstCount = 1;
+ if (tree->IsUnusedValue())
+ {
+ info->isLocalDefUse = true;
+ }
+ }
+ else
+ {
+ info->dstCount = 0;
+ }
// floating type generates AVX instruction (vmovss etc.), set the flag
SetContainsAVXFlags(varTypeIsFloating(tree->TypeGet()));
@@ -150,6 +160,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree)
info->regOptional = false;
tree->SetContained();
info->dstCount = 0;
+ return;
}
}
__fallthrough;
@@ -194,6 +205,10 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree)
case GT_LONG:
assert(tree->IsUnusedValue()); // Contained nodes are already processed, only unused GT_LONG can reach here.
+ // An unused GT_LONG doesn't produce any registers.
+ tree->gtType = TYP_VOID;
+ tree->ClearUnusedValue();
+ info->isLocalDefUse = false;
// An unused GT_LONG node needs to consume its sources.
info->srcCount = 2;
@@ -439,7 +454,7 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree)
tree->gtCmpXchg.gtOpComparand->gtLsraInfo.setSrcCandidates(this, RBM_RAX);
tree->gtCmpXchg.gtOpLocation->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RAX);
tree->gtCmpXchg.gtOpValue->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RAX);
- tree->gtLsraInfo.setDstCandidates(this, RBM_RAX);
+ info->setDstCandidates(this, RBM_RAX);
break;
case GT_LOCKADD:
@@ -632,16 +647,18 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree)
// Commutative opers like add/mul/and/or/xor could reverse the order of
// operands if it is safe to do so. In such a case we would like op2 to be
// target preferenced instead of op1.
- if (tree->OperIsCommutative() && op1->gtLsraInfo.dstCount == 0 && op2 != nullptr)
+ if (tree->OperIsCommutative() && op1->isContained() && op2 != nullptr)
{
op1 = op2;
op2 = tree->gtOp.gtOp1;
}
- // If we have a read-modify-write operation, we want to preference op1 to the target.
- // If op1 is contained, we don't want to preference it, but it won't
- // show up as a source in that case, so it will be ignored.
- op1->gtLsraInfo.isTgtPref = true;
+ // If we have a read-modify-write operation, we want to preference op1 to the target,
+ // if it is not contained.
+ if (!op1->isContained())
+ {
+ op1->gtLsraInfo.isTgtPref = true;
+ }
// Is this a non-commutative operator, or is op2 a contained memory op?
// In either case, we need to make op2 remain live until the op is complete, by marking
@@ -672,29 +689,28 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree)
// which allows its second operand to be a contained
// immediate wheres xadd instruction requires its
// second operand to be in a register.
- assert(tree->gtLsraInfo.dstCount == 0);
-
- // Give it an artificial type and mark it isLocalDefUse = true.
- // This would result in a Def position created but not considered
- // consumed by its parent node.
- tree->gtType = TYP_INT;
- tree->gtLsraInfo.isLocalDefUse = true;
+ assert(info->dstCount == 0);
+
+ // Give it an artificial type and mark it as an unused value.
+ // This results in a Def position created but not considered consumed by its parent node.
+ tree->gtType = TYP_INT;
+ info->dstCount = 1;
+ info->isLocalDefUse = true;
+ tree->SetUnusedValue();
}
else
{
- assert(tree->gtLsraInfo.dstCount != 0);
+ assert(info->dstCount != 0);
}
delayUseSrc = op1;
}
- else if ((op2 != nullptr) &&
- (!tree->OperIsCommutative() || (isContainableMemoryOp(op2) && (op2->gtLsraInfo.srcCount == 0))))
+ else if ((op2 != nullptr) && (!tree->OperIsCommutative() || (op2->isContained() && !op2->IsCnsIntOrI())))
{
delayUseSrc = op2;
}
- if (delayUseSrc != nullptr)
+ if ((delayUseSrc != nullptr) && CheckAndSetDelayFree(delayUseSrc))
{
- SetDelayFree(delayUseSrc);
info->hasDelayFreeSrc = true;
}
}
@@ -702,36 +718,55 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree)
TreeNodeInfoInitCheckByteable(tree);
- if (tree->IsUnusedValue() && (info->dstCount != 0))
- {
- info->isLocalDefUse = true;
- }
// We need to be sure that we've set info->srcCount and info->dstCount appropriately
assert((info->dstCount < 2) || (tree->IsMultiRegCall() && info->dstCount == MAX_RET_REG_COUNT));
+ assert(info->isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue()));
+ assert(!tree->IsUnusedValue() || (info->dstCount != 0));
}
-void LinearScan::SetDelayFree(GenTree* delayUseSrc)
+//---------------------------------------------------------------------
+// CheckAndSetDelayFree - Set isDelayFree on the given operand or its child(ren), if appropriate
+//
+// Arguments
+// delayUseSrc - a node that may have a delayed use
+//
+// Return Value:
+// True iff the node or one of its children has been marked isDelayFree
+//
+// Notes:
+// Only register operands should be marked isDelayFree, not contained immediates or memory.
+//
+bool LinearScan::CheckAndSetDelayFree(GenTree* delayUseSrc)
{
// If delayUseSrc is an indirection and it doesn't produce a result, then we need to set "delayFree'
// on the base & index, if any.
// Otherwise, we set it on delayUseSrc itself.
- if (delayUseSrc->isIndir() && (delayUseSrc->gtLsraInfo.dstCount == 0))
+ bool returnValue = false;
+ if (delayUseSrc->isContained())
{
- GenTree* base = delayUseSrc->AsIndir()->Base();
- GenTree* index = delayUseSrc->AsIndir()->Index();
- if (base != nullptr)
- {
- base->gtLsraInfo.isDelayFree = true;
- }
- if (index != nullptr)
+ // If delayUseSrc is a non-Indir contained node (e.g. a local) there's no register use to delay.
+ if (delayUseSrc->isIndir())
{
- index->gtLsraInfo.isDelayFree = true;
+ GenTree* base = delayUseSrc->AsIndir()->Base();
+ GenTree* index = delayUseSrc->AsIndir()->Index();
+ if (base != nullptr)
+ {
+ base->gtLsraInfo.isDelayFree = true;
+ returnValue = true;
+ }
+ if (index != nullptr)
+ {
+ index->gtLsraInfo.isDelayFree = true;
+ returnValue = true;
+ }
}
}
else
{
delayUseSrc->gtLsraInfo.isDelayFree = true;
+ returnValue = true;
}
+ return returnValue;
}
//------------------------------------------------------------------------
@@ -886,7 +921,8 @@ void LinearScan::TreeNodeInfoInitSimple(GenTree* tree)
void LinearScan::TreeNodeInfoInitReturn(GenTree* tree)
{
TreeNodeInfo* info = &(tree->gtLsraInfo);
- GenTree* op1 = tree->gtGetOp1();
+ assert(info->dstCount == 0);
+ GenTree* op1 = tree->gtGetOp1();
#if !defined(_TARGET_64BIT_)
if (tree->TypeGet() == TYP_LONG)
@@ -897,15 +933,14 @@ void LinearScan::TreeNodeInfoInitReturn(GenTree* tree)
info->srcCount = 2;
loVal->gtLsraInfo.setSrcCandidates(this, RBM_LNGRET_LO);
hiVal->gtLsraInfo.setSrcCandidates(this, RBM_LNGRET_HI);
- assert(info->dstCount == 0);
}
else
#endif // !defined(_TARGET_64BIT_)
+ if ((tree->TypeGet() != TYP_VOID) && !op1->isContained())
{
regMaskTP useCandidates = RBM_NONE;
- info->srcCount = ((tree->TypeGet() == TYP_VOID) || op1->isContained()) ? 0 : 1;
- assert(info->dstCount == 0);
+ info->srcCount = 1;
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
if (varTypeIsStruct(tree))
@@ -926,8 +961,7 @@ void LinearScan::TreeNodeInfoInitReturn(GenTree* tree)
switch (tree->TypeGet())
{
case TYP_VOID:
- useCandidates = RBM_NONE;
- break;
+ unreached();
case TYP_FLOAT:
useCandidates = RBM_FLOATRET;
break;
@@ -975,56 +1009,60 @@ void LinearScan::TreeNodeInfoInitShiftRotate(GenTree* tree)
// We will allow whatever can be encoded - hope you know what you are doing.
if (!shiftBy->isContained())
{
- source->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RCX);
shiftBy->gtLsraInfo.setSrcCandidates(this, RBM_RCX);
- info->setDstCandidates(this, allRegs(TYP_INT) & ~RBM_RCX);
- if (!tree->isContained())
+ if (!source->isContained())
{
- info->srcCount = 2;
- }
- }
- else
- {
- // Note that Rotate Left/Right instructions don't set ZF and SF flags.
- //
- // If the operand being shifted is 32-bits then upper three bits are masked
- // by hardware to get actual shift count. Similarly for 64-bit operands
- // shift count is narrowed to [0..63]. If the resulting shift count is zero,
- // then shift operation won't modify flags.
- //
- // TODO-CQ-XARCH: We can optimize generating 'test' instruction for GT_EQ/NE(shift, 0)
- // if the shift count is known to be non-zero and in the range depending on the
- // operand size.
- if (!tree->isContained())
- {
- info->srcCount = 1;
+ source->gtLsraInfo.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RCX);
}
+ info->setDstCandidates(this, allRegs(TYP_INT) & ~RBM_RCX);
}
-#ifdef _TARGET_X86_
- // The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that
- // we can have a three operand form. Increment the srcCount.
- if (tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO)
+ // Note that Rotate Left/Right instructions don't set ZF and SF flags.
+ //
+ // If the operand being shifted is 32-bits then upper three bits are masked
+ // by hardware to get actual shift count. Similarly for 64-bit operands
+ // shift count is narrowed to [0..63]. If the resulting shift count is zero,
+ // then shift operation won't modify flags.
+ //
+ // TODO-CQ-XARCH: We can optimize generating 'test' instruction for GT_EQ/NE(shift, 0)
+ // if the shift count is known to be non-zero and in the range depending on the
+ // operand size.
+
+ if (!tree->isContained())
{
- assert((source->OperGet() == GT_LONG) && source->isContained());
+#ifdef _TARGET_X86_
+ // The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that
+ // we can have a three operand form. Increment the srcCount.
+ if (tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO)
+ {
+ assert((source->OperGet() == GT_LONG) && source->isContained());
- info->srcCount++;
+ if (tree->OperGet() == GT_LSH_HI)
+ {
+ GenTreePtr sourceLo = source->gtOp.gtOp1;
+ sourceLo->gtLsraInfo.isDelayFree = true;
+ }
+ else
+ {
+ GenTreePtr sourceHi = source->gtOp.gtOp2;
+ sourceHi->gtLsraInfo.isDelayFree = true;
+ }
- if (tree->OperGet() == GT_LSH_HI)
- {
- GenTreePtr sourceLo = source->gtOp.gtOp1;
- sourceLo->gtLsraInfo.isDelayFree = true;
+ source->gtLsraInfo.hasDelayFreeSrc = true;
+ info->hasDelayFreeSrc = true;
+ info->srcCount += 2;
}
else
+#endif
+ if (!source->isContained())
{
- GenTreePtr sourceHi = source->gtOp.gtOp2;
- sourceHi->gtLsraInfo.isDelayFree = true;
+ info->srcCount++;
+ }
+ if (!shiftBy->isContained())
+ {
+ info->srcCount++;
}
-
- source->gtLsraInfo.hasDelayFreeSrc = true;
- info->hasDelayFreeSrc = true;
}
-#endif
}
//------------------------------------------------------------------------
@@ -1649,9 +1687,12 @@ void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
#endif // _TARGET_X86_
}
+ GenTreePtr src = putArgStk->gtOp1;
+ var_types type = src->TypeGet();
+
#if defined(FEATURE_SIMD) && defined(_TARGET_X86_)
// For PutArgStk of a TYP_SIMD12, we need an extra register.
- if (putArgStk->TypeGet() == TYP_SIMD12)
+ if (putArgStk->isSIMD12())
{
info->srcCount = putArgStk->gtOp1->gtLsraInfo.dstCount;
info->internalFloatCount = 1;
@@ -1660,14 +1701,13 @@ void LinearScan::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
}
#endif // defined(FEATURE_SIMD) && defined(_TARGET_X86_)
- if (putArgStk->TypeGet() != TYP_STRUCT)
+ if (type != TYP_STRUCT)
{
TreeNodeInfoInitSimple(putArgStk);
return;
}
GenTreePtr dst = putArgStk;
- GenTreePtr src = putArgStk->gtOp1;
GenTreePtr srcAddr = nullptr;
info->srcCount = GetOperandSourceCount(src);
@@ -2607,14 +2647,7 @@ void LinearScan::TreeNodeInfoInitCmp(GenTreePtr tree)
TreeNodeInfo* info = &(tree->gtLsraInfo);
info->srcCount = 0;
- if (info->isNoRegCompare)
- {
- info->dstCount = 0;
- }
- else
- {
- assert((info->dstCount == 1) || tree->OperIs(GT_CMP));
- }
+ assert((info->dstCount == 1) || (tree->TypeGet() == TYP_VOID));
#ifdef _TARGET_X86_
// If the compare is used by a jump, we just need to set the condition codes. If not, then we need
@@ -2630,7 +2663,7 @@ void LinearScan::TreeNodeInfoInitCmp(GenTreePtr tree)
var_types op1Type = op1->TypeGet();
var_types op2Type = op2->TypeGet();
- if (!op1->gtLsraInfo.isNoRegCompare)
+ if (op1->TypeGet() != TYP_VOID)
{
info->srcCount += GetOperandSourceCount(op1);
}
@@ -2718,9 +2751,8 @@ void LinearScan::TreeNodeInfoInitMul(GenTreePtr tree)
{
containedMemOp = op2;
}
- if (containedMemOp != nullptr)
+ if ((containedMemOp != nullptr) && CheckAndSetDelayFree(containedMemOp))
{
- SetDelayFree(containedMemOp);
info->hasDelayFreeSrc = true;
}
}
diff --git a/src/jit/nodeinfo.h b/src/jit/nodeinfo.h
index 3f8532bd37..5f03da2776 100644
--- a/src/jit/nodeinfo.h
+++ b/src/jit/nodeinfo.h
@@ -32,7 +32,6 @@ public:
regOptional = false;
definesAnyRegisters = false;
isInternalRegDelayFree = false;
- isNoRegCompare = false;
#ifdef DEBUG
isInitialized = false;
#endif
@@ -145,9 +144,6 @@ public:
// in which result is produced.
unsigned char isInternalRegDelayFree : 1;
- // True if this is a compare feeding a JTRUE that doesn't need to be generated into a register.
- unsigned char isNoRegCompare : 1;
-
#ifdef DEBUG
// isInitialized is set when the tree node is handled.
unsigned char isInitialized : 1;
diff --git a/tests/src/JIT/SIMD/Matrix4x4.cs b/tests/src/JIT/SIMD/Matrix4x4.cs
new file mode 100644
index 0000000000..f3b37ab833
--- /dev/null
+++ b/tests/src/JIT/SIMD/Matrix4x4.cs
@@ -0,0 +1,38 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+using System;
+using System.Numerics;
+
+class Matrix4x4Test
+{
+ private const int Pass = 100;
+ private const int Fail = -1;
+
+ public static int Matrix4x4CreateScaleCenterTest3()
+ {
+ int returnVal = Pass;
+ Vector3 scale = new Vector3(3, 4, 5);
+ Vector3 center = new Vector3(23, 42, 666);
+
+ Matrix4x4 scaleAroundZero = Matrix4x4.CreateScale(scale.X, scale.Y, scale.Z, Vector3.Zero);
+ Matrix4x4 scaleAroundZeroExpected = Matrix4x4.CreateScale(scale.X, scale.Y, scale.Z);
+ if (!scaleAroundZero.Equals(scaleAroundZeroExpected))
+ {
+ returnVal = Fail;
+ }
+
+ Matrix4x4 scaleAroundCenter = Matrix4x4.CreateScale(scale.X, scale.Y, scale.Z, center);
+ Matrix4x4 scaleAroundCenterExpected = Matrix4x4.CreateTranslation(-center) * Matrix4x4.CreateScale(scale.X, scale.Y, scale.Z) * Matrix4x4.CreateTranslation(center);
+ if (!scaleAroundCenter.Equals(scaleAroundCenterExpected))
+ {
+ returnVal = Fail;
+ }
+ return returnVal;
+ }
+
+ static int Main()
+ {
+ return Matrix4x4CreateScaleCenterTest3();
+ }
+}
diff --git a/tests/src/JIT/SIMD/Matrix4x4_r.csproj b/tests/src/JIT/SIMD/Matrix4x4_r.csproj
new file mode 100644
index 0000000000..1cfe4dc8f3
--- /dev/null
+++ b/tests/src/JIT/SIMD/Matrix4x4_r.csproj
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <SchemaVersion>2.0</SchemaVersion>
+ <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+ <OutputType>Exe</OutputType>
+ <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+ <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+ </PropertyGroup>
+ <!-- Default configurations to help VS understand the configurations -->
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+ <ItemGroup>
+ <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+ <Visible>False</Visible>
+ </CodeAnalysisDependentAssemblyPaths>
+ </ItemGroup>
+ <PropertyGroup>
+ <DebugType>None</DebugType>
+ <Optimize></Optimize>
+ </PropertyGroup>
+ <ItemGroup>
+ <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="Matrix4x4.cs" />
+ </ItemGroup>
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+ <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project>
diff --git a/tests/src/JIT/SIMD/Matrix4x4_ro.csproj b/tests/src/JIT/SIMD/Matrix4x4_ro.csproj
new file mode 100644
index 0000000000..c5718069d7
--- /dev/null
+++ b/tests/src/JIT/SIMD/Matrix4x4_ro.csproj
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <SchemaVersion>2.0</SchemaVersion>
+ <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+ <OutputType>Exe</OutputType>
+ <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+ <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+ </PropertyGroup>
+ <!-- Default configurations to help VS understand the configurations -->
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+ <ItemGroup>
+ <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+ <Visible>False</Visible>
+ </CodeAnalysisDependentAssemblyPaths>
+ </ItemGroup>
+ <PropertyGroup>
+ <DebugType>None</DebugType>
+ <Optimize>True</Optimize>
+ </PropertyGroup>
+ <ItemGroup>
+ <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="Matrix4x4.cs" />
+ </ItemGroup>
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+ <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project>
diff --git a/tests/src/JIT/SIMD/Plane.cs b/tests/src/JIT/SIMD/Plane.cs
new file mode 100644
index 0000000000..6cc9c4a07d
--- /dev/null
+++ b/tests/src/JIT/SIMD/Plane.cs
@@ -0,0 +1,33 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+using System;
+using System.Numerics;
+
+class PlaneTest
+{
+ private const int Pass = 100;
+ private const int Fail = -1;
+
+ public static int PlaneCreateFromVerticesTest()
+ {
+ int returnVal = Pass;
+
+ Vector3 point1 = new Vector3(0.0f, 1.0f, 1.0f);
+ Vector3 point2 = new Vector3(0.0f, 0.0f, 1.0f);
+ Vector3 point3 = new Vector3(1.0f, 0.0f, 1.0f);
+
+ Plane target = Plane.CreateFromVertices(point1, point2, point3);
+ Plane expected = new Plane(new Vector3(0, 0, 1), -1.0f);
+ if (!target.Equals(expected))
+ {
+ returnVal = Fail;
+ }
+ return returnVal;
+ }
+
+ static int Main()
+ {
+ return PlaneCreateFromVerticesTest();
+ }
+}
diff --git a/tests/src/JIT/SIMD/Plane_r.csproj b/tests/src/JIT/SIMD/Plane_r.csproj
new file mode 100644
index 0000000000..543106d321
--- /dev/null
+++ b/tests/src/JIT/SIMD/Plane_r.csproj
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <SchemaVersion>2.0</SchemaVersion>
+ <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+ <OutputType>Exe</OutputType>
+ <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+ <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+ </PropertyGroup>
+ <!-- Default configurations to help VS understand the configurations -->
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+ <ItemGroup>
+ <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+ <Visible>False</Visible>
+ </CodeAnalysisDependentAssemblyPaths>
+ </ItemGroup>
+ <PropertyGroup>
+ <DebugType>None</DebugType>
+ <Optimize></Optimize>
+ </PropertyGroup>
+ <ItemGroup>
+ <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="Plane.cs" />
+ </ItemGroup>
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+ <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project>
diff --git a/tests/src/JIT/SIMD/Plane_ro.csproj b/tests/src/JIT/SIMD/Plane_ro.csproj
new file mode 100644
index 0000000000..17563c97d1
--- /dev/null
+++ b/tests/src/JIT/SIMD/Plane_ro.csproj
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <SchemaVersion>2.0</SchemaVersion>
+ <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+ <OutputType>Exe</OutputType>
+ <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+ <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+ </PropertyGroup>
+ <!-- Default configurations to help VS understand the configurations -->
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+ <ItemGroup>
+ <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+ <Visible>False</Visible>
+ </CodeAnalysisDependentAssemblyPaths>
+ </ItemGroup>
+ <PropertyGroup>
+ <DebugType>None</DebugType>
+ <Optimize>True</Optimize>
+ </PropertyGroup>
+ <ItemGroup>
+ <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="Plane.cs" />
+ </ItemGroup>
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+ <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project>