summaryrefslogtreecommitdiff
path: root/src/jit/morph.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit/morph.cpp')
-rw-r--r--src/jit/morph.cpp567
1 files changed, 320 insertions, 247 deletions
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp
index 678bb34c54..dabca57710 100644
--- a/src/jit/morph.cpp
+++ b/src/jit/morph.cpp
@@ -855,9 +855,12 @@ fgArgInfo::fgArgInfo(Compiler* comp, GenTreePtr call, unsigned numArgs)
compiler = comp;
callTree = call;
assert(call->IsCall());
- argCount = 0; // filled in arg count, starts at zero
- nextSlotNum = INIT_ARG_STACK_SLOT;
- stkLevel = 0;
+ argCount = 0; // filled in arg count, starts at zero
+ nextSlotNum = INIT_ARG_STACK_SLOT;
+ stkLevel = 0;
+#if defined(UNIX_X86_ABI)
+ padStkAlign = 0;
+#endif
argTableSize = numArgs; // the allocated table size
hasRegArgs = false;
@@ -897,9 +900,12 @@ fgArgInfo::fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall)
;
callTree = newCall;
assert(newCall->IsCall());
- argCount = 0; // filled in arg count, starts at zero
- nextSlotNum = INIT_ARG_STACK_SLOT;
- stkLevel = oldArgInfo->stkLevel;
+ argCount = 0; // filled in arg count, starts at zero
+ nextSlotNum = INIT_ARG_STACK_SLOT;
+ stkLevel = oldArgInfo->stkLevel;
+#if defined(UNIX_X86_ABI)
+ padStkAlign = oldArgInfo->padStkAlign;
+#endif
argTableSize = oldArgInfo->argTableSize;
argsComplete = false;
argTable = nullptr;
@@ -1079,16 +1085,19 @@ fgArgTabEntryPtr fgArgInfo::AddRegArg(
{
fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
- curArgTabEntry->argNum = argNum;
- curArgTabEntry->node = node;
- curArgTabEntry->parent = parent;
- curArgTabEntry->regNum = regNum;
- curArgTabEntry->slotNum = 0;
- curArgTabEntry->numRegs = numRegs;
- curArgTabEntry->numSlots = 0;
- curArgTabEntry->alignment = alignment;
- curArgTabEntry->lateArgInx = (unsigned)-1;
- curArgTabEntry->tmpNum = (unsigned)-1;
+ curArgTabEntry->argNum = argNum;
+ curArgTabEntry->node = node;
+ curArgTabEntry->parent = parent;
+ curArgTabEntry->regNum = regNum;
+ curArgTabEntry->slotNum = 0;
+ curArgTabEntry->numRegs = numRegs;
+ curArgTabEntry->numSlots = 0;
+ curArgTabEntry->alignment = alignment;
+ curArgTabEntry->lateArgInx = (unsigned)-1;
+ curArgTabEntry->tmpNum = (unsigned)-1;
+#if defined(UNIX_X86_ABI)
+ curArgTabEntry->padStkAlign = 0;
+#endif
curArgTabEntry->isSplit = false;
curArgTabEntry->isTmp = false;
curArgTabEntry->needTmp = false;
@@ -1154,16 +1163,19 @@ fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned argNum,
curArgTabEntry->isStruct = isStruct; // is this a struct arg
#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
- curArgTabEntry->argNum = argNum;
- curArgTabEntry->node = node;
- curArgTabEntry->parent = parent;
- curArgTabEntry->regNum = REG_STK;
- curArgTabEntry->slotNum = nextSlotNum;
- curArgTabEntry->numRegs = 0;
- curArgTabEntry->numSlots = numSlots;
- curArgTabEntry->alignment = alignment;
- curArgTabEntry->lateArgInx = (unsigned)-1;
- curArgTabEntry->tmpNum = (unsigned)-1;
+ curArgTabEntry->argNum = argNum;
+ curArgTabEntry->node = node;
+ curArgTabEntry->parent = parent;
+ curArgTabEntry->regNum = REG_STK;
+ curArgTabEntry->slotNum = nextSlotNum;
+ curArgTabEntry->numRegs = 0;
+ curArgTabEntry->numSlots = numSlots;
+ curArgTabEntry->alignment = alignment;
+ curArgTabEntry->lateArgInx = (unsigned)-1;
+ curArgTabEntry->tmpNum = (unsigned)-1;
+#if defined(UNIX_X86_ABI)
+ curArgTabEntry->padStkAlign = 0;
+#endif
curArgTabEntry->isSplit = false;
curArgTabEntry->isTmp = false;
curArgTabEntry->needTmp = false;
@@ -1689,6 +1701,52 @@ void fgArgInfo::ArgsComplete()
argsComplete = true;
}
+#if defined(UNIX_X86_ABI)
+// Get the stack alignment value for a Call holding this object
+//
+// NOTE: This function will calculate number of padding slots, to align the
+// stack before pushing arguments to the stack. Padding value is stored in
+// the first argument in fgArgTabEntry structure padStkAlign member so that
+// code (sub esp, n) can be emitted before generating argument push in
+// fgArgTabEntry node. As of result stack will be aligned right before
+// making a "Call". After the Call, stack is re-adjusted to the value it
+// was with fgArgInfo->padStkAlign value as we cann't use the one in fgArgTabEntry.
+//
+void fgArgInfo::ArgsAlignPadding()
+{
+ // To get the padding amount, sum up all the slots and get the remainder for padding
+ unsigned curInx;
+ unsigned numSlots = 0;
+ fgArgTabEntryPtr firstArgTabEntry = nullptr;
+
+ for (curInx = 0; curInx < argCount; curInx++)
+ {
+ fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+ if (curArgTabEntry->numSlots > 0)
+ {
+ // The argument may be REG_STK or constant or register that goes to stack
+ assert(nextSlotNum >= curArgTabEntry->slotNum);
+
+ numSlots += curArgTabEntry->numSlots;
+ if (firstArgTabEntry == nullptr)
+ {
+ // First argument will be used to hold the padding amount
+ firstArgTabEntry = curArgTabEntry;
+ }
+ }
+ }
+
+ if (firstArgTabEntry != nullptr)
+ {
+ const int numSlotsAligned = STACK_ALIGN / TARGET_POINTER_SIZE;
+ // Set stack align pad for the first argument
+ firstArgTabEntry->padStkAlign = AlignmentPad(numSlots, numSlotsAligned);
+ // Set also for fgArgInfo that will be used to reset stack pointer after the Call
+ this->padStkAlign = firstArgTabEntry->padStkAlign;
+ }
+}
+#endif // UNIX_X86_ABI
+
void fgArgInfo::SortArgs()
{
assert(argsComplete == true);
@@ -2431,6 +2489,22 @@ void fgArgInfo::EvalArgsToTemps()
#endif
}
+// Get the late arg for arg at position argIndex.
+// argIndex - 0-based position to get late arg for.
+// Caller must ensure this position has a late arg.
+GenTreePtr fgArgInfo::GetLateArg(unsigned argIndex)
+{
+ for (unsigned j = 0; j < this->ArgCount(); j++)
+ {
+ if (this->ArgTable()[j]->argNum == argIndex)
+ {
+ return this->ArgTable()[j]->node;
+ }
+ }
+ // Caller must ensure late arg exists.
+ unreached();
+}
+
void fgArgInfo::RecordStkLevel(unsigned stkLvl)
{
assert(!IsUninitialized(stkLvl));
@@ -4211,6 +4285,11 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
if (!reMorphing)
{
call->fgArgInfo->ArgsComplete();
+
+#if defined(UNIX_X86_ABI)
+ call->fgArgInfo->ArgsAlignPadding();
+#endif // UNIX_X86_ABI
+
#ifdef LEGACY_BACKEND
call->gtCallRegUsedMask = genIntAllRegArgMask(intArgRegNum);
#if defined(_TARGET_ARM_)
@@ -5629,8 +5708,13 @@ GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree)
// to ensure that the same values are used in the bounds check and the actual
// dereference.
// Also we allocate the temporary when the arrRef is sufficiently complex/expensive.
+ // Note that if 'arrRef' is a GT_FIELD, it has not yet been morphed so its true
+ // complexity is not exposed. (Without that condition there are cases of local struct
+ // fields that were previously, needlessly, marked as GTF_GLOB_REF, and when that was
+ // fixed, there were some regressions that were mostly ameliorated by adding this condition.)
//
- if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY))
+ if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) ||
+ gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY) || (arrRef->OperGet() == GT_FIELD))
{
unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
arrRefDefn = gtNewTempAssign(arrRefTmpNum, arrRef);
@@ -5649,7 +5733,8 @@ GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree)
// dereference.
// Also we allocate the temporary when the index is sufficiently complex/expensive.
//
- if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY))
+ if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY) ||
+ (arrRef->OperGet() == GT_FIELD))
{
unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
indexDefn = gtNewTempAssign(indexTmpNum, index);
@@ -5683,7 +5768,7 @@ GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree)
}
GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK)
- GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, arrLen, index, SCK_RNGCHK_FAIL);
+ GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, index, arrLen, SCK_RNGCHK_FAIL);
bndsChk = arrBndsChk;
@@ -6051,14 +6136,15 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
{
assert(tree->gtOper == GT_FIELD);
- noway_assert(tree->gtFlags & GTF_GLOB_REF);
-
CORINFO_FIELD_HANDLE symHnd = tree->gtField.gtFldHnd;
unsigned fldOffset = tree->gtField.gtFldOffset;
GenTreePtr objRef = tree->gtField.gtFldObj;
bool fieldMayOverlap = false;
bool objIsLocal = false;
+ noway_assert(((objRef != nullptr) && (objRef->IsLocalAddrExpr() != nullptr)) ||
+ ((tree->gtFlags & GTF_GLOB_REF) != 0));
+
if (tree->gtField.gtFldMayOverlap)
{
fieldMayOverlap = true;
@@ -6067,8 +6153,8 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
}
#ifdef FEATURE_SIMD
- // if this field belongs to simd struct, tranlate it to simd instrinsic.
- if (mac == nullptr || mac->m_kind != MACK_Addr)
+ // if this field belongs to simd struct, translate it to simd instrinsic.
+ if (mac == nullptr)
{
GenTreePtr newTree = fgMorphFieldToSIMDIntrinsicGet(tree);
if (newTree != tree)
@@ -6077,13 +6163,6 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
return newTree;
}
}
- else if (objRef != nullptr && objRef->OperGet() == GT_ADDR && objRef->OperIsSIMD())
- {
- // We have a field of an SIMD intrinsic in an address-taken context.
- // We need to copy the SIMD result to a temp, and take the field of that.
- GenTree* copy = fgCopySIMDNode(objRef->gtOp.gtOp1->AsSIMD());
- objRef->gtOp.gtOp1 = copy;
- }
#endif
/* Is this an instance data member? */
@@ -6468,6 +6547,11 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
addr->gtIntCon.gtFieldSeq = fieldSeq;
tree->SetOper(GT_IND);
+ // The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN.
+ // We must clear it when we transform the node.
+ // TODO-Cleanup: It appears that the GTF_FLD_NULLCHECK flag is never checked, and note
+ // that the logic above does its own checking to determine whether a nullcheck is needed.
+ tree->gtFlags &= ~GTF_IND_ARR_LEN;
tree->gtOp.gtOp1 = addr;
return fgMorphSmpOp(tree);
@@ -6507,6 +6591,11 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
}
}
noway_assert(tree->gtOper == GT_IND);
+ // The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN.
+ // We must clear it when we transform the node.
+ // TODO-Cleanup: It appears that the GTF_FLD_NULLCHECK flag is never checked, and note
+ // that the logic above does its own checking to determine whether a nullcheck is needed.
+ tree->gtFlags &= ~GTF_IND_ARR_LEN;
GenTreePtr res = fgMorphSmpOp(tree);
@@ -8467,7 +8556,7 @@ GenTreePtr Compiler::fgMorphOneAsgBlockOp(GenTreePtr tree)
// The SIMD type in question could be Vector2f which is 8-bytes in size.
// The below check is to make sure that we don't turn that copyblk
// into a assignment, since rationalizer logic will transform the
- // copyblk apropriately. Otherwise, the transormation made in this
+ // copyblk appropriately. Otherwise, the transformation made in this
// routine will prevent rationalizer logic and we might end up with
// GT_ADDR(GT_SIMD) node post rationalization, leading to a noway assert
// in codegen.
@@ -8495,6 +8584,12 @@ GenTreePtr Compiler::fgMorphOneAsgBlockOp(GenTreePtr tree)
}
else
{
+ // Is this an enregisterable struct that is already a simple assignment?
+ // This can happen if we are re-morphing.
+ if ((dest->OperGet() == GT_IND) && (dest->TypeGet() != TYP_STRUCT) && isCopyBlock)
+ {
+ return tree;
+ }
noway_assert(dest->OperIsLocal());
lclVarTree = dest;
destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
@@ -9185,7 +9280,7 @@ GenTree* Compiler::fgMorphBlkNode(GenTreePtr tree, bool isDest)
// Therefore, we insert a GT_ADDR just above the node, and wrap it in an obj or ind.
// TODO-1stClassStructs: Consider whether this can be improved.
// Also consider whether some of this can be included in gtNewBlockVal (though note
- // that doing so may cause us to query the type system before we otherwise would.
+ // that doing so may cause us to query the type system before we otherwise would).
GenTree* lastComma = nullptr;
for (GenTree* next = tree; next != nullptr && next->gtOper == GT_COMMA; next = next->gtGetOp2())
{
@@ -10223,31 +10318,46 @@ _Done:
// FP architectures
GenTree* Compiler::fgMorphForRegisterFP(GenTree* tree)
{
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
-
- if (tree->OperIsArithmetic() && varTypeIsFloating(tree))
+ if (tree->OperIsArithmetic())
{
- if (op1->TypeGet() != tree->TypeGet())
- {
- tree->gtOp.gtOp1 = gtNewCastNode(tree->TypeGet(), tree->gtOp.gtOp1, tree->TypeGet());
- }
- if (op2->TypeGet() != tree->TypeGet())
+ if (varTypeIsFloating(tree))
{
- tree->gtOp.gtOp2 = gtNewCastNode(tree->TypeGet(), tree->gtOp.gtOp2, tree->TypeGet());
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ if (op1->TypeGet() != tree->TypeGet())
+ {
+ tree->gtOp.gtOp1 = gtNewCastNode(tree->TypeGet(), op1, tree->TypeGet());
+ }
+ if (op2->TypeGet() != tree->TypeGet())
+ {
+ tree->gtOp.gtOp2 = gtNewCastNode(tree->TypeGet(), op2, tree->TypeGet());
+ }
}
}
- else if (tree->OperIsCompare() && varTypeIsFloating(op1) && op1->TypeGet() != op2->TypeGet())
+ else if (tree->OperIsCompare())
{
- // both had better be floating, just one bigger than other
- assert(varTypeIsFloating(op2));
- if (op1->TypeGet() == TYP_FLOAT)
- {
- tree->gtOp.gtOp1 = gtNewCastNode(TYP_DOUBLE, tree->gtOp.gtOp1, TYP_DOUBLE);
- }
- else if (op2->TypeGet() == TYP_FLOAT)
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+
+ if (varTypeIsFloating(op1))
{
- tree->gtOp.gtOp2 = gtNewCastNode(TYP_DOUBLE, tree->gtOp.gtOp2, TYP_DOUBLE);
+ GenTreePtr op2 = tree->gtGetOp2();
+ assert(varTypeIsFloating(op2));
+
+ if (op1->TypeGet() != op2->TypeGet())
+ {
+ // both had better be floating, just one bigger than other
+ if (op1->TypeGet() == TYP_FLOAT)
+ {
+ assert(op2->TypeGet() == TYP_DOUBLE);
+ tree->gtOp.gtOp1 = gtNewCastNode(TYP_DOUBLE, op1, TYP_DOUBLE);
+ }
+ else if (op2->TypeGet() == TYP_FLOAT)
+ {
+ assert(op1->TypeGet() == TYP_DOUBLE);
+ tree->gtOp.gtOp2 = gtNewCastNode(TYP_DOUBLE, op2, TYP_DOUBLE);
+ }
+ }
}
}
@@ -10323,50 +10433,6 @@ GenTree* Compiler::fgMorphRecognizeBoxNullable(GenTree* compare)
#ifdef FEATURE_SIMD
-//--------------------------------------------------------------------------------------
-// fgCopySIMDNode: make a copy of a SIMD intrinsic node, e.g. so that a field can be accessed.
-//
-// Arguments:
-// simdNode - The GenTreeSIMD node to be copied
-//
-// Return Value:
-// A comma node where op1 is the assignment of the simd node to a temp, and op2 is the temp lclVar.
-//
-GenTree* Compiler::fgCopySIMDNode(GenTreeSIMD* simdNode)
-{
- // Copy the result of the SIMD intrinsic into a temp.
- unsigned lclNum = lvaGrabTemp(true DEBUGARG("Copy of SIMD intrinsic with field access"));
-
- CORINFO_CLASS_HANDLE simdHandle = NO_CLASS_HANDLE;
- // We only have fields of the fixed float vectors.
- noway_assert(simdNode->gtSIMDBaseType == TYP_FLOAT);
- switch (simdNode->gtSIMDSize)
- {
- case 8:
- simdHandle = SIMDVector2Handle;
- break;
- case 12:
- simdHandle = SIMDVector3Handle;
- break;
- case 16:
- simdHandle = SIMDVector4Handle;
- break;
- default:
- noway_assert(!"field of unexpected SIMD type");
- break;
- }
- assert(simdHandle != NO_CLASS_HANDLE);
-
- lvaSetStruct(lclNum, simdHandle, false, true);
- lvaTable[lclNum].lvFieldAccessed = true;
-
- GenTree* asg = gtNewTempAssign(lclNum, simdNode);
- GenTree* newLclVarNode = new (this, GT_LCL_VAR) GenTreeLclVar(simdNode->TypeGet(), lclNum, BAD_IL_OFFSET);
-
- GenTree* comma = gtNewOperNode(GT_COMMA, simdNode->TypeGet(), asg, newLclVarNode);
- return comma;
-}
-
//--------------------------------------------------------------------------------------------------------------
// getSIMDStructFromField:
// Checking whether the field belongs to a simd struct or not. If it is, return the GenTreePtr for
@@ -10449,12 +10515,12 @@ GenTreePtr Compiler::getSIMDStructFromField(GenTreePtr tree,
}
/*****************************************************************************
-* If a read operation tries to access simd struct field, then transform the this
-* operation to to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree.
+* If a read operation tries to access simd struct field, then transform the
+* operation to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree.
* Otherwise, return the old tree.
* Argument:
* tree - GenTreePtr. If this pointer points to simd struct which is used for simd
-* intrinsic. We will morph it as simd intrinsic SIMDIntrinsicGetItem.
+* intrinsic, we will morph it as simd intrinsic SIMDIntrinsicGetItem.
* Return:
* A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
* return nullptr.
@@ -10468,7 +10534,6 @@ GenTreePtr Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTreePtr tree)
GenTreePtr simdStructNode = getSIMDStructFromField(tree, &baseType, &index, &simdSize);
if (simdStructNode != nullptr)
{
-
assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
GenTree* op2 = gtNewIconNode(index);
tree = gtNewSIMDNode(baseType, simdStructNode, op2, SIMDIntrinsicGetItem, baseType, simdSize);
@@ -10481,11 +10546,11 @@ GenTreePtr Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTreePtr tree)
/*****************************************************************************
* Transform an assignment of a SIMD struct field to SIMD intrinsic
-* SIMDIntrinsicGetItem, and return a new tree. If If it is not such an assignment,
+* SIMDIntrinsicSet*, and return a new tree. If it is not such an assignment,
* then return the old tree.
* Argument:
* tree - GenTreePtr. If this pointer points to simd struct which is used for simd
-* intrinsic. We will morph it as simd intrinsic set.
+* intrinsic, we will morph it as simd intrinsic set.
* Return:
* A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
* return nullptr.
@@ -10538,7 +10603,8 @@ GenTreePtr Compiler::fgMorphFieldAssignToSIMDIntrinsicSet(GenTreePtr tree)
return tree;
}
-#endif
+#endif // FEATURE_SIMD
+
/*****************************************************************************
*
* Transform the given GTK_SMPOP tree for code generation.
@@ -10584,7 +10650,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
genTreeOps oper = tree->OperGet();
var_types typ = tree->TypeGet();
GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr op2 = tree->gtGetOp2IfPresent();
/*-------------------------------------------------------------------------
* First do any PRE-ORDER processing
@@ -10998,6 +11064,9 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
//
// a % b = a - (a / b) * b;
//
+ // NOTE: we should never need to perform this transformation when remorphing, since global morphing
+ // should already have done so and we do not introduce new modulus nodes in later phases.
+ assert(!optValnumCSE_phase);
tree = fgMorphModToSubMulDiv(tree->AsOp());
op1 = tree->gtOp.gtOp1;
op2 = tree->gtOp.gtOp2;
@@ -11010,7 +11079,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
// the redundant division. If there's no redundant division then
// nothing is lost, lowering would have done this transform anyway.
- if ((tree->OperGet() == GT_MOD) && op2->IsIntegralConst())
+ if (!optValnumCSE_phase && ((tree->OperGet() == GT_MOD) && op2->IsIntegralConst()))
{
ssize_t divisorValue = op2->AsIntCon()->IconValue();
size_t absDivisorValue = (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue)
@@ -11206,7 +11275,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
}
fgMorphRecognizeBoxNullable(tree);
op1 = tree->gtOp.gtOp1;
- op2 = tree->gtGetOp2();
+ op2 = tree->gtGetOp2IfPresent();
break;
@@ -11297,12 +11366,6 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
// comma list. The left arg (op1) gets a fresh context.
subMac1 = nullptr;
break;
- case GT_ASG:
- if (tree->OperIsBlkOp())
- {
- subMac1 = &subIndMac1;
- }
- break;
case GT_OBJ:
case GT_BLK:
case GT_DYN_BLK:
@@ -11440,12 +11503,6 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
}
}
break;
- case GT_ASG:
- if (tree->OperIsBlkOp())
- {
- mac = &subIndMac2;
- }
- break;
default:
break;
}
@@ -11611,7 +11668,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
/* gtFoldExpr could have changed op1 and op2 */
op1 = tree->gtOp.gtOp1;
- op2 = tree->gtGetOp2();
+ op2 = tree->gtGetOp2IfPresent();
// Do we have an integer compare operation?
//
@@ -13508,12 +13565,10 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
/* and also "a = x <op> a" into "a <op>= x" for communative ops */
CLANG_FORMAT_COMMENT_ANCHOR;
-#if !LONG_ASG_OPS
if (typ == TYP_LONG)
{
break;
}
-#endif
if (varTypeIsStruct(typ) && !tree->IsPhiDefn())
{
@@ -13669,25 +13724,9 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
case GT_LSH:
case GT_RSH:
case GT_RSZ:
-
-#if LONG_ASG_OPS
-
- if (typ == TYP_LONG)
- break;
-#endif
-
case GT_OR:
case GT_XOR:
case GT_AND:
-
-#if LONG_ASG_OPS
-
- /* TODO: allow non-const long assignment operators */
-
- if (typ == TYP_LONG && op2->gtOp.gtOp2->gtOper != GT_CNS_LNG)
- break;
-#endif
-
ASG_OP:
{
bool bReverse = false;
@@ -14048,11 +14087,21 @@ GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree)
{
numerator = fgMakeMultiUse(&tree->gtOp1);
}
+ else if (lvaLocalVarRefCounted && numerator->OperIsLocal())
+ {
+ // Morphing introduces new lclVar references. Increase ref counts
+ lvaIncRefCnts(numerator);
+ }
if (!denominator->OperIsLeaf())
{
denominator = fgMakeMultiUse(&tree->gtOp2);
}
+ else if (lvaLocalVarRefCounted && denominator->OperIsLocal())
+ {
+ // Morphing introduces new lclVar references. Increase ref counts
+ lvaIncRefCnts(denominator);
+ }
// The numerator and denominator may have been assigned to temps, in which case
// their defining assignments are in the current tree. Therefore, we need to
@@ -14335,7 +14384,7 @@ GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree)
genTreeOps oper = tree->OperGet();
var_types typ = tree->TypeGet();
GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr op2 = tree->gtGetOp2IfPresent();
/*
We have to use helper calls for all FP operations:
@@ -14683,8 +14732,8 @@ GenTreePtr Compiler::fgMorphTree(GenTreePtr tree, MorphAddrContext* mac)
fgSetRngChkTarget(tree);
GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
- bndsChk->gtArrLen = fgMorphTree(bndsChk->gtArrLen);
bndsChk->gtIndex = fgMorphTree(bndsChk->gtIndex);
+ bndsChk->gtArrLen = fgMorphTree(bndsChk->gtArrLen);
// If the index is a comma(throw, x), just return that.
if (!optValnumCSE_phase && fgIsCommaThrow(bndsChk->gtIndex))
{
@@ -14692,8 +14741,8 @@ GenTreePtr Compiler::fgMorphTree(GenTreePtr tree, MorphAddrContext* mac)
}
// Propagate effects flags upwards
- bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT);
bndsChk->gtFlags |= (bndsChk->gtIndex->gtFlags & GTF_ALL_EFFECT);
+ bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT);
// Otherwise, we don't change the tree.
}
@@ -15972,7 +16021,6 @@ void Compiler::fgMorphBlocks()
// genReturnLocal
noway_assert(ret->OperGet() == GT_RETURN);
noway_assert(ret->gtGetOp1() != nullptr);
- noway_assert(ret->gtGetOp2() == nullptr);
GenTreePtr tree = gtNewTempAssign(genReturnLocal, ret->gtGetOp1());
@@ -15991,7 +16039,6 @@ void Compiler::fgMorphBlocks()
// Must be a void GT_RETURN with null operand; delete it as this block branches to oneReturn block
noway_assert(ret->TypeGet() == TYP_VOID);
noway_assert(ret->gtGetOp1() == nullptr);
- noway_assert(ret->gtGetOp2() == nullptr);
fgRemoveStmt(block, last);
}
@@ -16897,6 +16944,20 @@ void Compiler::fgMorph()
fgDebugCheckBBlist(false, false);
#endif // DEBUG
+ fgRemoveEmptyTry();
+
+ EndPhase(PHASE_EMPTY_TRY);
+
+ fgRemoveEmptyFinally();
+
+ EndPhase(PHASE_EMPTY_FINALLY);
+
+ fgCloneFinally();
+
+ EndPhase(PHASE_CLONE_FINALLY);
+
+ fgUpdateFinallyTargetFlags();
+
/* For x64 and ARM64 we need to mark irregular parameters early so that they don't get promoted */
fgMarkImplicitByRefArgs();
@@ -17002,6 +17063,14 @@ void Compiler::fgPromoteStructs()
return;
}
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nlvaTable before fgPromoteStructs\n");
+ lvaTableDump();
+ }
+#endif // DEBUG
+
// The lvaTable might grow as we grab temps. Make a local copy here.
unsigned startLvaCount = lvaCount;
@@ -17019,17 +17088,13 @@ void Compiler::fgPromoteStructs()
bool promotedVar = false;
LclVarDsc* varDsc = &lvaTable[lclNum];
-#ifdef FEATURE_SIMD
- if (varDsc->lvSIMDType && varDsc->lvUsedInSIMDIntrinsic)
+ if (varDsc->lvIsSIMDType() && varDsc->lvIsUsedInSIMDIntrinsic())
{
// If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote
// its fields. Instead, we will attempt to enregister the entire struct.
varDsc->lvRegStruct = true;
}
- else
-#endif // FEATURE_SIMD
- // Don't promote if we have reached the tracking limit.
- if (lvaHaveManyLocals())
+ else if (lvaHaveManyLocals()) // Don't promote if we have reached the tracking limit.
{
// Print the message first time when we detected this condition
if (!tooManyLocals)
@@ -17060,7 +17125,6 @@ void Compiler::fgPromoteStructs()
if (canPromote)
{
-
// We *can* promote; *should* we promote?
// We should only do so if promotion has potential savings. One source of savings
// is if a field of the struct is accessed, since this access will be turned into
@@ -17068,9 +17132,17 @@ void Compiler::fgPromoteStructs()
// field accesses, but only block-level operations on the whole struct, if the struct
// has only one or two fields, then doing those block operations field-wise is probably faster
// than doing a whole-variable block operation (e.g., a hardware "copy loop" on x86).
- // So if no fields are accessed independently, and there are three or more fields,
+ // Struct promotion also provides the following benefits: reduce stack frame size,
+ // reduce the need for zero init of stack frame and fine grained constant/copy prop.
+ // Asm diffs indicate that promoting structs up to 3 fields is a net size win.
+ // So if no fields are accessed independently, and there are four or more fields,
// then do not promote.
- if (structPromotionInfo.fieldCnt > 2 && !varDsc->lvFieldAccessed)
+ //
+ // TODO: Ideally we would want to consider the impact of whether the struct is
+ // passed as a parameter or assigned the return value of a call. Because once promoted,
+ // struct copying is done by field by field assignment instead of a more efficient
+ // rep.stos or xmm reg based copy.
+ if (structPromotionInfo.fieldCnt > 3 && !varDsc->lvFieldAccessed)
{
JITDUMP("Not promoting promotable struct local V%02u: #fields = %d, fieldAccessed = %d.\n", lclNum,
structPromotionInfo.fieldCnt, varDsc->lvFieldAccessed);
@@ -17185,114 +17257,115 @@ void Compiler::fgPromoteStructs()
}
#endif // FEATURE_SIMD
}
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nlvaTable after fgPromoteStructs\n");
+ lvaTableDump();
+ }
+#endif // DEBUG
}
Compiler::fgWalkResult Compiler::fgMorphStructField(GenTreePtr tree, fgWalkData* fgWalkPre)
{
noway_assert(tree->OperGet() == GT_FIELD);
- noway_assert(tree->gtFlags & GTF_GLOB_REF);
GenTreePtr objRef = tree->gtField.gtFldObj;
+ GenTreePtr obj = ((objRef != nullptr) && (objRef->gtOper == GT_ADDR)) ? objRef->gtOp.gtOp1 : nullptr;
+ noway_assert((tree->gtFlags & GTF_GLOB_REF) || ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR)));
/* Is this an instance data member? */
- if (objRef)
+ if ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR))
{
- if (objRef->gtOper == GT_ADDR)
- {
- GenTreePtr obj = objRef->gtOp.gtOp1;
+ unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &lvaTable[lclNum];
- if (obj->gtOper == GT_LCL_VAR)
+ if (varTypeIsStruct(obj))
+ {
+ if (varDsc->lvPromoted)
{
- unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
- LclVarDsc* varDsc = &lvaTable[lclNum];
+ // Promoted struct
+ unsigned fldOffset = tree->gtField.gtFldOffset;
+ unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
+ noway_assert(fieldLclIndex != BAD_VAR_NUM);
+
+ tree->SetOper(GT_LCL_VAR);
+ tree->gtLclVarCommon.SetLclNum(fieldLclIndex);
+ tree->gtType = lvaTable[fieldLclIndex].TypeGet();
+ tree->gtFlags &= GTF_NODE_MASK;
+ tree->gtFlags &= ~GTF_GLOB_REF;
- if (varTypeIsStruct(obj))
+ GenTreePtr parent = fgWalkPre->parentStack->Index(1);
+ if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
{
- if (varDsc->lvPromoted)
- {
- // Promoted struct
- unsigned fldOffset = tree->gtField.gtFldOffset;
- unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
- noway_assert(fieldLclIndex != BAD_VAR_NUM);
-
- tree->SetOper(GT_LCL_VAR);
- tree->gtLclVarCommon.SetLclNum(fieldLclIndex);
- tree->gtType = lvaTable[fieldLclIndex].TypeGet();
- tree->gtFlags &= GTF_NODE_MASK;
- tree->gtFlags &= ~GTF_GLOB_REF;
-
- GenTreePtr parent = fgWalkPre->parentStack->Index(1);
- if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
- {
- tree->gtFlags |= GTF_VAR_DEF;
- tree->gtFlags |= GTF_DONT_CSE;
- }
-#ifdef DEBUG
- if (verbose)
- {
- printf("Replacing the field in promoted struct with a local var:\n");
- fgWalkPre->printModified = true;
- }
-#endif // DEBUG
- return WALK_SKIP_SUBTREES;
- }
+ tree->gtFlags |= GTF_VAR_DEF;
+ tree->gtFlags |= GTF_DONT_CSE;
}
- else
+#ifdef DEBUG
+ if (verbose)
{
- // Normed struct
- // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if
- // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8
- // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However,
- // there is one extremely rare case where that won't be true. An enum type is a special value type
- // that contains exactly one element of a primitive integer type (that, for CLS programs is named
- // "value__"). The VM tells us that a local var of that enum type is the primitive type of the
- // enum's single field. It turns out that it is legal for IL to access this field using ldflda or
- // ldfld. For example:
- //
- // .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum
- // {
- // .field public specialname rtspecialname int16 value__
- // .field public static literal valuetype mynamespace.e_t one = int16(0x0000)
- // }
- // .method public hidebysig static void Main() cil managed
- // {
- // .locals init (valuetype mynamespace.e_t V_0)
- // ...
- // ldloca.s V_0
- // ldflda int16 mynamespace.e_t::value__
- // ...
- // }
- //
- // Normally, compilers will not generate the ldflda, since it is superfluous.
- //
- // In the example, the lclVar is short, but the JIT promotes all trees using this local to the
- // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type
- // mismatch like this, don't do this morphing. The local var may end up getting marked as
- // address taken, and the appropriate SHORT load will be done from memory in that case.
+ printf("Replacing the field in promoted struct with a local var:\n");
+ fgWalkPre->printModified = true;
+ }
+#endif // DEBUG
+ return WALK_SKIP_SUBTREES;
+ }
+ }
+ else
+ {
+ // Normed struct
+ // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if
+ // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8
+ // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However,
+ // there is one extremely rare case where that won't be true. An enum type is a special value type
+ // that contains exactly one element of a primitive integer type (that, for CLS programs is named
+ // "value__"). The VM tells us that a local var of that enum type is the primitive type of the
+ // enum's single field. It turns out that it is legal for IL to access this field using ldflda or
+ // ldfld. For example:
+ //
+ // .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum
+ // {
+ // .field public specialname rtspecialname int16 value__
+ // .field public static literal valuetype mynamespace.e_t one = int16(0x0000)
+ // }
+ // .method public hidebysig static void Main() cil managed
+ // {
+ // .locals init (valuetype mynamespace.e_t V_0)
+ // ...
+ // ldloca.s V_0
+ // ldflda int16 mynamespace.e_t::value__
+ // ...
+ // }
+ //
+ // Normally, compilers will not generate the ldflda, since it is superfluous.
+ //
+ // In the example, the lclVar is short, but the JIT promotes all trees using this local to the
+ // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type
+ // mismatch like this, don't do this morphing. The local var may end up getting marked as
+ // address taken, and the appropriate SHORT load will be done from memory in that case.
- if (tree->TypeGet() == obj->TypeGet())
- {
- tree->ChangeOper(GT_LCL_VAR);
- tree->gtLclVarCommon.SetLclNum(lclNum);
- tree->gtFlags &= GTF_NODE_MASK;
+ if (tree->TypeGet() == obj->TypeGet())
+ {
+ tree->ChangeOper(GT_LCL_VAR);
+ tree->gtLclVarCommon.SetLclNum(lclNum);
+ tree->gtFlags &= GTF_NODE_MASK;
- GenTreePtr parent = fgWalkPre->parentStack->Index(1);
- if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
- {
- tree->gtFlags |= GTF_VAR_DEF;
- tree->gtFlags |= GTF_DONT_CSE;
- }
+ GenTreePtr parent = fgWalkPre->parentStack->Index(1);
+ if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
+ {
+ tree->gtFlags |= GTF_VAR_DEF;
+ tree->gtFlags |= GTF_DONT_CSE;
+ }
#ifdef DEBUG
- if (verbose)
- {
- printf("Replacing the field in normed struct with the local var:\n");
- fgWalkPre->printModified = true;
- }
-#endif // DEBUG
- return WALK_SKIP_SUBTREES;
- }
+ if (verbose)
+ {
+ printf("Replacing the field in normed struct with the local var:\n");
+ fgWalkPre->printModified = true;
}
+#endif // DEBUG
+ return WALK_SKIP_SUBTREES;
}
}
}