diff options
Diffstat (limited to 'src/jit/morph.cpp')
-rw-r--r-- | src/jit/morph.cpp | 567 |
1 files changed, 320 insertions, 247 deletions
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index 678bb34c54..dabca57710 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -855,9 +855,12 @@ fgArgInfo::fgArgInfo(Compiler* comp, GenTreePtr call, unsigned numArgs) compiler = comp; callTree = call; assert(call->IsCall()); - argCount = 0; // filled in arg count, starts at zero - nextSlotNum = INIT_ARG_STACK_SLOT; - stkLevel = 0; + argCount = 0; // filled in arg count, starts at zero + nextSlotNum = INIT_ARG_STACK_SLOT; + stkLevel = 0; +#if defined(UNIX_X86_ABI) + padStkAlign = 0; +#endif argTableSize = numArgs; // the allocated table size hasRegArgs = false; @@ -897,9 +900,12 @@ fgArgInfo::fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall) ; callTree = newCall; assert(newCall->IsCall()); - argCount = 0; // filled in arg count, starts at zero - nextSlotNum = INIT_ARG_STACK_SLOT; - stkLevel = oldArgInfo->stkLevel; + argCount = 0; // filled in arg count, starts at zero + nextSlotNum = INIT_ARG_STACK_SLOT; + stkLevel = oldArgInfo->stkLevel; +#if defined(UNIX_X86_ABI) + padStkAlign = oldArgInfo->padStkAlign; +#endif argTableSize = oldArgInfo->argTableSize; argsComplete = false; argTable = nullptr; @@ -1079,16 +1085,19 @@ fgArgTabEntryPtr fgArgInfo::AddRegArg( { fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry; - curArgTabEntry->argNum = argNum; - curArgTabEntry->node = node; - curArgTabEntry->parent = parent; - curArgTabEntry->regNum = regNum; - curArgTabEntry->slotNum = 0; - curArgTabEntry->numRegs = numRegs; - curArgTabEntry->numSlots = 0; - curArgTabEntry->alignment = alignment; - curArgTabEntry->lateArgInx = (unsigned)-1; - curArgTabEntry->tmpNum = (unsigned)-1; + curArgTabEntry->argNum = argNum; + curArgTabEntry->node = node; + curArgTabEntry->parent = parent; + curArgTabEntry->regNum = regNum; + curArgTabEntry->slotNum = 0; + curArgTabEntry->numRegs = numRegs; + curArgTabEntry->numSlots = 0; + curArgTabEntry->alignment = alignment; + curArgTabEntry->lateArgInx = (unsigned)-1; + curArgTabEntry->tmpNum = (unsigned)-1; +#if defined(UNIX_X86_ABI) + curArgTabEntry->padStkAlign = 0; +#endif curArgTabEntry->isSplit = false; curArgTabEntry->isTmp = false; curArgTabEntry->needTmp = false; @@ -1154,16 +1163,19 @@ fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned argNum, curArgTabEntry->isStruct = isStruct; // is this a struct arg #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - curArgTabEntry->argNum = argNum; - curArgTabEntry->node = node; - curArgTabEntry->parent = parent; - curArgTabEntry->regNum = REG_STK; - curArgTabEntry->slotNum = nextSlotNum; - curArgTabEntry->numRegs = 0; - curArgTabEntry->numSlots = numSlots; - curArgTabEntry->alignment = alignment; - curArgTabEntry->lateArgInx = (unsigned)-1; - curArgTabEntry->tmpNum = (unsigned)-1; + curArgTabEntry->argNum = argNum; + curArgTabEntry->node = node; + curArgTabEntry->parent = parent; + curArgTabEntry->regNum = REG_STK; + curArgTabEntry->slotNum = nextSlotNum; + curArgTabEntry->numRegs = 0; + curArgTabEntry->numSlots = numSlots; + curArgTabEntry->alignment = alignment; + curArgTabEntry->lateArgInx = (unsigned)-1; + curArgTabEntry->tmpNum = (unsigned)-1; +#if defined(UNIX_X86_ABI) + curArgTabEntry->padStkAlign = 0; +#endif curArgTabEntry->isSplit = false; curArgTabEntry->isTmp = false; curArgTabEntry->needTmp = false; @@ -1689,6 +1701,52 @@ void fgArgInfo::ArgsComplete() argsComplete = true; } +#if defined(UNIX_X86_ABI) +// Get the stack alignment value for a Call holding this object +// +// NOTE: This function will calculate number of padding slots, to align the +// stack before pushing arguments to the stack. Padding value is stored in +// the first argument in fgArgTabEntry structure padStkAlign member so that +// code (sub esp, n) can be emitted before generating argument push in +// fgArgTabEntry node. As of result stack will be aligned right before +// making a "Call". After the Call, stack is re-adjusted to the value it +// was with fgArgInfo->padStkAlign value as we cann't use the one in fgArgTabEntry. +// +void fgArgInfo::ArgsAlignPadding() +{ + // To get the padding amount, sum up all the slots and get the remainder for padding + unsigned curInx; + unsigned numSlots = 0; + fgArgTabEntryPtr firstArgTabEntry = nullptr; + + for (curInx = 0; curInx < argCount; curInx++) + { + fgArgTabEntryPtr curArgTabEntry = argTable[curInx]; + if (curArgTabEntry->numSlots > 0) + { + // The argument may be REG_STK or constant or register that goes to stack + assert(nextSlotNum >= curArgTabEntry->slotNum); + + numSlots += curArgTabEntry->numSlots; + if (firstArgTabEntry == nullptr) + { + // First argument will be used to hold the padding amount + firstArgTabEntry = curArgTabEntry; + } + } + } + + if (firstArgTabEntry != nullptr) + { + const int numSlotsAligned = STACK_ALIGN / TARGET_POINTER_SIZE; + // Set stack align pad for the first argument + firstArgTabEntry->padStkAlign = AlignmentPad(numSlots, numSlotsAligned); + // Set also for fgArgInfo that will be used to reset stack pointer after the Call + this->padStkAlign = firstArgTabEntry->padStkAlign; + } +} +#endif // UNIX_X86_ABI + void fgArgInfo::SortArgs() { assert(argsComplete == true); @@ -2431,6 +2489,22 @@ void fgArgInfo::EvalArgsToTemps() #endif } +// Get the late arg for arg at position argIndex. +// argIndex - 0-based position to get late arg for. +// Caller must ensure this position has a late arg. +GenTreePtr fgArgInfo::GetLateArg(unsigned argIndex) +{ + for (unsigned j = 0; j < this->ArgCount(); j++) + { + if (this->ArgTable()[j]->argNum == argIndex) + { + return this->ArgTable()[j]->node; + } + } + // Caller must ensure late arg exists. + unreached(); +} + void fgArgInfo::RecordStkLevel(unsigned stkLvl) { assert(!IsUninitialized(stkLvl)); @@ -4211,6 +4285,11 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) if (!reMorphing) { call->fgArgInfo->ArgsComplete(); + +#if defined(UNIX_X86_ABI) + call->fgArgInfo->ArgsAlignPadding(); +#endif // UNIX_X86_ABI + #ifdef LEGACY_BACKEND call->gtCallRegUsedMask = genIntAllRegArgMask(intArgRegNum); #if defined(_TARGET_ARM_) @@ -5629,8 +5708,13 @@ GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree) // to ensure that the same values are used in the bounds check and the actual // dereference. // Also we allocate the temporary when the arrRef is sufficiently complex/expensive. + // Note that if 'arrRef' is a GT_FIELD, it has not yet been morphed so its true + // complexity is not exposed. (Without that condition there are cases of local struct + // fields that were previously, needlessly, marked as GTF_GLOB_REF, and when that was + // fixed, there were some regressions that were mostly ameliorated by adding this condition.) // - if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY)) + if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || + gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY) || (arrRef->OperGet() == GT_FIELD)) { unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr")); arrRefDefn = gtNewTempAssign(arrRefTmpNum, arrRef); @@ -5649,7 +5733,8 @@ GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree) // dereference. // Also we allocate the temporary when the index is sufficiently complex/expensive. // - if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY)) + if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY) || + (arrRef->OperGet() == GT_FIELD)) { unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("arr expr")); indexDefn = gtNewTempAssign(indexTmpNum, index); @@ -5683,7 +5768,7 @@ GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree) } GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK) - GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, arrLen, index, SCK_RNGCHK_FAIL); + GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, index, arrLen, SCK_RNGCHK_FAIL); bndsChk = arrBndsChk; @@ -6051,14 +6136,15 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac) { assert(tree->gtOper == GT_FIELD); - noway_assert(tree->gtFlags & GTF_GLOB_REF); - CORINFO_FIELD_HANDLE symHnd = tree->gtField.gtFldHnd; unsigned fldOffset = tree->gtField.gtFldOffset; GenTreePtr objRef = tree->gtField.gtFldObj; bool fieldMayOverlap = false; bool objIsLocal = false; + noway_assert(((objRef != nullptr) && (objRef->IsLocalAddrExpr() != nullptr)) || + ((tree->gtFlags & GTF_GLOB_REF) != 0)); + if (tree->gtField.gtFldMayOverlap) { fieldMayOverlap = true; @@ -6067,8 +6153,8 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac) } #ifdef FEATURE_SIMD - // if this field belongs to simd struct, tranlate it to simd instrinsic. - if (mac == nullptr || mac->m_kind != MACK_Addr) + // if this field belongs to simd struct, translate it to simd instrinsic. + if (mac == nullptr) { GenTreePtr newTree = fgMorphFieldToSIMDIntrinsicGet(tree); if (newTree != tree) @@ -6077,13 +6163,6 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac) return newTree; } } - else if (objRef != nullptr && objRef->OperGet() == GT_ADDR && objRef->OperIsSIMD()) - { - // We have a field of an SIMD intrinsic in an address-taken context. - // We need to copy the SIMD result to a temp, and take the field of that. - GenTree* copy = fgCopySIMDNode(objRef->gtOp.gtOp1->AsSIMD()); - objRef->gtOp.gtOp1 = copy; - } #endif /* Is this an instance data member? */ @@ -6468,6 +6547,11 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac) addr->gtIntCon.gtFieldSeq = fieldSeq; tree->SetOper(GT_IND); + // The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN. + // We must clear it when we transform the node. + // TODO-Cleanup: It appears that the GTF_FLD_NULLCHECK flag is never checked, and note + // that the logic above does its own checking to determine whether a nullcheck is needed. + tree->gtFlags &= ~GTF_IND_ARR_LEN; tree->gtOp.gtOp1 = addr; return fgMorphSmpOp(tree); @@ -6507,6 +6591,11 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac) } } noway_assert(tree->gtOper == GT_IND); + // The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN. + // We must clear it when we transform the node. + // TODO-Cleanup: It appears that the GTF_FLD_NULLCHECK flag is never checked, and note + // that the logic above does its own checking to determine whether a nullcheck is needed. + tree->gtFlags &= ~GTF_IND_ARR_LEN; GenTreePtr res = fgMorphSmpOp(tree); @@ -8467,7 +8556,7 @@ GenTreePtr Compiler::fgMorphOneAsgBlockOp(GenTreePtr tree) // The SIMD type in question could be Vector2f which is 8-bytes in size. // The below check is to make sure that we don't turn that copyblk // into a assignment, since rationalizer logic will transform the - // copyblk apropriately. Otherwise, the transormation made in this + // copyblk appropriately. Otherwise, the transformation made in this // routine will prevent rationalizer logic and we might end up with // GT_ADDR(GT_SIMD) node post rationalization, leading to a noway assert // in codegen. @@ -8495,6 +8584,12 @@ GenTreePtr Compiler::fgMorphOneAsgBlockOp(GenTreePtr tree) } else { + // Is this an enregisterable struct that is already a simple assignment? + // This can happen if we are re-morphing. + if ((dest->OperGet() == GT_IND) && (dest->TypeGet() != TYP_STRUCT) && isCopyBlock) + { + return tree; + } noway_assert(dest->OperIsLocal()); lclVarTree = dest; destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum; @@ -9185,7 +9280,7 @@ GenTree* Compiler::fgMorphBlkNode(GenTreePtr tree, bool isDest) // Therefore, we insert a GT_ADDR just above the node, and wrap it in an obj or ind. // TODO-1stClassStructs: Consider whether this can be improved. // Also consider whether some of this can be included in gtNewBlockVal (though note - // that doing so may cause us to query the type system before we otherwise would. + // that doing so may cause us to query the type system before we otherwise would). GenTree* lastComma = nullptr; for (GenTree* next = tree; next != nullptr && next->gtOper == GT_COMMA; next = next->gtGetOp2()) { @@ -10223,31 +10318,46 @@ _Done: // FP architectures GenTree* Compiler::fgMorphForRegisterFP(GenTree* tree) { - GenTreePtr op1 = tree->gtOp.gtOp1; - GenTreePtr op2 = tree->gtGetOp2(); - - if (tree->OperIsArithmetic() && varTypeIsFloating(tree)) + if (tree->OperIsArithmetic()) { - if (op1->TypeGet() != tree->TypeGet()) - { - tree->gtOp.gtOp1 = gtNewCastNode(tree->TypeGet(), tree->gtOp.gtOp1, tree->TypeGet()); - } - if (op2->TypeGet() != tree->TypeGet()) + if (varTypeIsFloating(tree)) { - tree->gtOp.gtOp2 = gtNewCastNode(tree->TypeGet(), tree->gtOp.gtOp2, tree->TypeGet()); + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtGetOp2(); + + if (op1->TypeGet() != tree->TypeGet()) + { + tree->gtOp.gtOp1 = gtNewCastNode(tree->TypeGet(), op1, tree->TypeGet()); + } + if (op2->TypeGet() != tree->TypeGet()) + { + tree->gtOp.gtOp2 = gtNewCastNode(tree->TypeGet(), op2, tree->TypeGet()); + } } } - else if (tree->OperIsCompare() && varTypeIsFloating(op1) && op1->TypeGet() != op2->TypeGet()) + else if (tree->OperIsCompare()) { - // both had better be floating, just one bigger than other - assert(varTypeIsFloating(op2)); - if (op1->TypeGet() == TYP_FLOAT) - { - tree->gtOp.gtOp1 = gtNewCastNode(TYP_DOUBLE, tree->gtOp.gtOp1, TYP_DOUBLE); - } - else if (op2->TypeGet() == TYP_FLOAT) + GenTreePtr op1 = tree->gtOp.gtOp1; + + if (varTypeIsFloating(op1)) { - tree->gtOp.gtOp2 = gtNewCastNode(TYP_DOUBLE, tree->gtOp.gtOp2, TYP_DOUBLE); + GenTreePtr op2 = tree->gtGetOp2(); + assert(varTypeIsFloating(op2)); + + if (op1->TypeGet() != op2->TypeGet()) + { + // both had better be floating, just one bigger than other + if (op1->TypeGet() == TYP_FLOAT) + { + assert(op2->TypeGet() == TYP_DOUBLE); + tree->gtOp.gtOp1 = gtNewCastNode(TYP_DOUBLE, op1, TYP_DOUBLE); + } + else if (op2->TypeGet() == TYP_FLOAT) + { + assert(op1->TypeGet() == TYP_DOUBLE); + tree->gtOp.gtOp2 = gtNewCastNode(TYP_DOUBLE, op2, TYP_DOUBLE); + } + } } } @@ -10323,50 +10433,6 @@ GenTree* Compiler::fgMorphRecognizeBoxNullable(GenTree* compare) #ifdef FEATURE_SIMD -//-------------------------------------------------------------------------------------- -// fgCopySIMDNode: make a copy of a SIMD intrinsic node, e.g. so that a field can be accessed. -// -// Arguments: -// simdNode - The GenTreeSIMD node to be copied -// -// Return Value: -// A comma node where op1 is the assignment of the simd node to a temp, and op2 is the temp lclVar. -// -GenTree* Compiler::fgCopySIMDNode(GenTreeSIMD* simdNode) -{ - // Copy the result of the SIMD intrinsic into a temp. - unsigned lclNum = lvaGrabTemp(true DEBUGARG("Copy of SIMD intrinsic with field access")); - - CORINFO_CLASS_HANDLE simdHandle = NO_CLASS_HANDLE; - // We only have fields of the fixed float vectors. - noway_assert(simdNode->gtSIMDBaseType == TYP_FLOAT); - switch (simdNode->gtSIMDSize) - { - case 8: - simdHandle = SIMDVector2Handle; - break; - case 12: - simdHandle = SIMDVector3Handle; - break; - case 16: - simdHandle = SIMDVector4Handle; - break; - default: - noway_assert(!"field of unexpected SIMD type"); - break; - } - assert(simdHandle != NO_CLASS_HANDLE); - - lvaSetStruct(lclNum, simdHandle, false, true); - lvaTable[lclNum].lvFieldAccessed = true; - - GenTree* asg = gtNewTempAssign(lclNum, simdNode); - GenTree* newLclVarNode = new (this, GT_LCL_VAR) GenTreeLclVar(simdNode->TypeGet(), lclNum, BAD_IL_OFFSET); - - GenTree* comma = gtNewOperNode(GT_COMMA, simdNode->TypeGet(), asg, newLclVarNode); - return comma; -} - //-------------------------------------------------------------------------------------------------------------- // getSIMDStructFromField: // Checking whether the field belongs to a simd struct or not. If it is, return the GenTreePtr for @@ -10449,12 +10515,12 @@ GenTreePtr Compiler::getSIMDStructFromField(GenTreePtr tree, } /***************************************************************************** -* If a read operation tries to access simd struct field, then transform the this -* operation to to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree. +* If a read operation tries to access simd struct field, then transform the +* operation to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree. * Otherwise, return the old tree. * Argument: * tree - GenTreePtr. If this pointer points to simd struct which is used for simd -* intrinsic. We will morph it as simd intrinsic SIMDIntrinsicGetItem. +* intrinsic, we will morph it as simd intrinsic SIMDIntrinsicGetItem. * Return: * A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic, * return nullptr. @@ -10468,7 +10534,6 @@ GenTreePtr Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTreePtr tree) GenTreePtr simdStructNode = getSIMDStructFromField(tree, &baseType, &index, &simdSize); if (simdStructNode != nullptr) { - assert(simdSize >= ((index + 1) * genTypeSize(baseType))); GenTree* op2 = gtNewIconNode(index); tree = gtNewSIMDNode(baseType, simdStructNode, op2, SIMDIntrinsicGetItem, baseType, simdSize); @@ -10481,11 +10546,11 @@ GenTreePtr Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTreePtr tree) /***************************************************************************** * Transform an assignment of a SIMD struct field to SIMD intrinsic -* SIMDIntrinsicGetItem, and return a new tree. If If it is not such an assignment, +* SIMDIntrinsicSet*, and return a new tree. If it is not such an assignment, * then return the old tree. * Argument: * tree - GenTreePtr. If this pointer points to simd struct which is used for simd -* intrinsic. We will morph it as simd intrinsic set. +* intrinsic, we will morph it as simd intrinsic set. * Return: * A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic, * return nullptr. @@ -10538,7 +10603,8 @@ GenTreePtr Compiler::fgMorphFieldAssignToSIMDIntrinsicSet(GenTreePtr tree) return tree; } -#endif +#endif // FEATURE_SIMD + /***************************************************************************** * * Transform the given GTK_SMPOP tree for code generation. @@ -10584,7 +10650,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) genTreeOps oper = tree->OperGet(); var_types typ = tree->TypeGet(); GenTreePtr op1 = tree->gtOp.gtOp1; - GenTreePtr op2 = tree->gtGetOp2(); + GenTreePtr op2 = tree->gtGetOp2IfPresent(); /*------------------------------------------------------------------------- * First do any PRE-ORDER processing @@ -10998,6 +11064,9 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) // // a % b = a - (a / b) * b; // + // NOTE: we should never need to perform this transformation when remorphing, since global morphing + // should already have done so and we do not introduce new modulus nodes in later phases. + assert(!optValnumCSE_phase); tree = fgMorphModToSubMulDiv(tree->AsOp()); op1 = tree->gtOp.gtOp1; op2 = tree->gtOp.gtOp2; @@ -11010,7 +11079,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) // the redundant division. If there's no redundant division then // nothing is lost, lowering would have done this transform anyway. - if ((tree->OperGet() == GT_MOD) && op2->IsIntegralConst()) + if (!optValnumCSE_phase && ((tree->OperGet() == GT_MOD) && op2->IsIntegralConst())) { ssize_t divisorValue = op2->AsIntCon()->IconValue(); size_t absDivisorValue = (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue) @@ -11206,7 +11275,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) } fgMorphRecognizeBoxNullable(tree); op1 = tree->gtOp.gtOp1; - op2 = tree->gtGetOp2(); + op2 = tree->gtGetOp2IfPresent(); break; @@ -11297,12 +11366,6 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) // comma list. The left arg (op1) gets a fresh context. subMac1 = nullptr; break; - case GT_ASG: - if (tree->OperIsBlkOp()) - { - subMac1 = &subIndMac1; - } - break; case GT_OBJ: case GT_BLK: case GT_DYN_BLK: @@ -11440,12 +11503,6 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) } } break; - case GT_ASG: - if (tree->OperIsBlkOp()) - { - mac = &subIndMac2; - } - break; default: break; } @@ -11611,7 +11668,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) /* gtFoldExpr could have changed op1 and op2 */ op1 = tree->gtOp.gtOp1; - op2 = tree->gtGetOp2(); + op2 = tree->gtGetOp2IfPresent(); // Do we have an integer compare operation? // @@ -13508,12 +13565,10 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree) /* and also "a = x <op> a" into "a <op>= x" for communative ops */ CLANG_FORMAT_COMMENT_ANCHOR; -#if !LONG_ASG_OPS if (typ == TYP_LONG) { break; } -#endif if (varTypeIsStruct(typ) && !tree->IsPhiDefn()) { @@ -13669,25 +13724,9 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree) case GT_LSH: case GT_RSH: case GT_RSZ: - -#if LONG_ASG_OPS - - if (typ == TYP_LONG) - break; -#endif - case GT_OR: case GT_XOR: case GT_AND: - -#if LONG_ASG_OPS - - /* TODO: allow non-const long assignment operators */ - - if (typ == TYP_LONG && op2->gtOp.gtOp2->gtOper != GT_CNS_LNG) - break; -#endif - ASG_OP: { bool bReverse = false; @@ -14048,11 +14087,21 @@ GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree) { numerator = fgMakeMultiUse(&tree->gtOp1); } + else if (lvaLocalVarRefCounted && numerator->OperIsLocal()) + { + // Morphing introduces new lclVar references. Increase ref counts + lvaIncRefCnts(numerator); + } if (!denominator->OperIsLeaf()) { denominator = fgMakeMultiUse(&tree->gtOp2); } + else if (lvaLocalVarRefCounted && denominator->OperIsLocal()) + { + // Morphing introduces new lclVar references. Increase ref counts + lvaIncRefCnts(denominator); + } // The numerator and denominator may have been assigned to temps, in which case // their defining assignments are in the current tree. Therefore, we need to @@ -14335,7 +14384,7 @@ GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree) genTreeOps oper = tree->OperGet(); var_types typ = tree->TypeGet(); GenTreePtr op1 = tree->gtOp.gtOp1; - GenTreePtr op2 = tree->gtGetOp2(); + GenTreePtr op2 = tree->gtGetOp2IfPresent(); /* We have to use helper calls for all FP operations: @@ -14683,8 +14732,8 @@ GenTreePtr Compiler::fgMorphTree(GenTreePtr tree, MorphAddrContext* mac) fgSetRngChkTarget(tree); GenTreeBoundsChk* bndsChk = tree->AsBoundsChk(); - bndsChk->gtArrLen = fgMorphTree(bndsChk->gtArrLen); bndsChk->gtIndex = fgMorphTree(bndsChk->gtIndex); + bndsChk->gtArrLen = fgMorphTree(bndsChk->gtArrLen); // If the index is a comma(throw, x), just return that. if (!optValnumCSE_phase && fgIsCommaThrow(bndsChk->gtIndex)) { @@ -14692,8 +14741,8 @@ GenTreePtr Compiler::fgMorphTree(GenTreePtr tree, MorphAddrContext* mac) } // Propagate effects flags upwards - bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT); bndsChk->gtFlags |= (bndsChk->gtIndex->gtFlags & GTF_ALL_EFFECT); + bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT); // Otherwise, we don't change the tree. } @@ -15972,7 +16021,6 @@ void Compiler::fgMorphBlocks() // genReturnLocal noway_assert(ret->OperGet() == GT_RETURN); noway_assert(ret->gtGetOp1() != nullptr); - noway_assert(ret->gtGetOp2() == nullptr); GenTreePtr tree = gtNewTempAssign(genReturnLocal, ret->gtGetOp1()); @@ -15991,7 +16039,6 @@ void Compiler::fgMorphBlocks() // Must be a void GT_RETURN with null operand; delete it as this block branches to oneReturn block noway_assert(ret->TypeGet() == TYP_VOID); noway_assert(ret->gtGetOp1() == nullptr); - noway_assert(ret->gtGetOp2() == nullptr); fgRemoveStmt(block, last); } @@ -16897,6 +16944,20 @@ void Compiler::fgMorph() fgDebugCheckBBlist(false, false); #endif // DEBUG + fgRemoveEmptyTry(); + + EndPhase(PHASE_EMPTY_TRY); + + fgRemoveEmptyFinally(); + + EndPhase(PHASE_EMPTY_FINALLY); + + fgCloneFinally(); + + EndPhase(PHASE_CLONE_FINALLY); + + fgUpdateFinallyTargetFlags(); + /* For x64 and ARM64 we need to mark irregular parameters early so that they don't get promoted */ fgMarkImplicitByRefArgs(); @@ -17002,6 +17063,14 @@ void Compiler::fgPromoteStructs() return; } +#ifdef DEBUG + if (verbose) + { + printf("\nlvaTable before fgPromoteStructs\n"); + lvaTableDump(); + } +#endif // DEBUG + // The lvaTable might grow as we grab temps. Make a local copy here. unsigned startLvaCount = lvaCount; @@ -17019,17 +17088,13 @@ void Compiler::fgPromoteStructs() bool promotedVar = false; LclVarDsc* varDsc = &lvaTable[lclNum]; -#ifdef FEATURE_SIMD - if (varDsc->lvSIMDType && varDsc->lvUsedInSIMDIntrinsic) + if (varDsc->lvIsSIMDType() && varDsc->lvIsUsedInSIMDIntrinsic()) { // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote // its fields. Instead, we will attempt to enregister the entire struct. varDsc->lvRegStruct = true; } - else -#endif // FEATURE_SIMD - // Don't promote if we have reached the tracking limit. - if (lvaHaveManyLocals()) + else if (lvaHaveManyLocals()) // Don't promote if we have reached the tracking limit. { // Print the message first time when we detected this condition if (!tooManyLocals) @@ -17060,7 +17125,6 @@ void Compiler::fgPromoteStructs() if (canPromote) { - // We *can* promote; *should* we promote? // We should only do so if promotion has potential savings. One source of savings // is if a field of the struct is accessed, since this access will be turned into @@ -17068,9 +17132,17 @@ void Compiler::fgPromoteStructs() // field accesses, but only block-level operations on the whole struct, if the struct // has only one or two fields, then doing those block operations field-wise is probably faster // than doing a whole-variable block operation (e.g., a hardware "copy loop" on x86). - // So if no fields are accessed independently, and there are three or more fields, + // Struct promotion also provides the following benefits: reduce stack frame size, + // reduce the need for zero init of stack frame and fine grained constant/copy prop. + // Asm diffs indicate that promoting structs up to 3 fields is a net size win. + // So if no fields are accessed independently, and there are four or more fields, // then do not promote. - if (structPromotionInfo.fieldCnt > 2 && !varDsc->lvFieldAccessed) + // + // TODO: Ideally we would want to consider the impact of whether the struct is + // passed as a parameter or assigned the return value of a call. Because once promoted, + // struct copying is done by field by field assignment instead of a more efficient + // rep.stos or xmm reg based copy. + if (structPromotionInfo.fieldCnt > 3 && !varDsc->lvFieldAccessed) { JITDUMP("Not promoting promotable struct local V%02u: #fields = %d, fieldAccessed = %d.\n", lclNum, structPromotionInfo.fieldCnt, varDsc->lvFieldAccessed); @@ -17185,114 +17257,115 @@ void Compiler::fgPromoteStructs() } #endif // FEATURE_SIMD } + +#ifdef DEBUG + if (verbose) + { + printf("\nlvaTable after fgPromoteStructs\n"); + lvaTableDump(); + } +#endif // DEBUG } Compiler::fgWalkResult Compiler::fgMorphStructField(GenTreePtr tree, fgWalkData* fgWalkPre) { noway_assert(tree->OperGet() == GT_FIELD); - noway_assert(tree->gtFlags & GTF_GLOB_REF); GenTreePtr objRef = tree->gtField.gtFldObj; + GenTreePtr obj = ((objRef != nullptr) && (objRef->gtOper == GT_ADDR)) ? objRef->gtOp.gtOp1 : nullptr; + noway_assert((tree->gtFlags & GTF_GLOB_REF) || ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR))); /* Is this an instance data member? */ - if (objRef) + if ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR)) { - if (objRef->gtOper == GT_ADDR) - { - GenTreePtr obj = objRef->gtOp.gtOp1; + unsigned lclNum = obj->gtLclVarCommon.gtLclNum; + LclVarDsc* varDsc = &lvaTable[lclNum]; - if (obj->gtOper == GT_LCL_VAR) + if (varTypeIsStruct(obj)) + { + if (varDsc->lvPromoted) { - unsigned lclNum = obj->gtLclVarCommon.gtLclNum; - LclVarDsc* varDsc = &lvaTable[lclNum]; + // Promoted struct + unsigned fldOffset = tree->gtField.gtFldOffset; + unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset); + noway_assert(fieldLclIndex != BAD_VAR_NUM); + + tree->SetOper(GT_LCL_VAR); + tree->gtLclVarCommon.SetLclNum(fieldLclIndex); + tree->gtType = lvaTable[fieldLclIndex].TypeGet(); + tree->gtFlags &= GTF_NODE_MASK; + tree->gtFlags &= ~GTF_GLOB_REF; - if (varTypeIsStruct(obj)) + GenTreePtr parent = fgWalkPre->parentStack->Index(1); + if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree)) { - if (varDsc->lvPromoted) - { - // Promoted struct - unsigned fldOffset = tree->gtField.gtFldOffset; - unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset); - noway_assert(fieldLclIndex != BAD_VAR_NUM); - - tree->SetOper(GT_LCL_VAR); - tree->gtLclVarCommon.SetLclNum(fieldLclIndex); - tree->gtType = lvaTable[fieldLclIndex].TypeGet(); - tree->gtFlags &= GTF_NODE_MASK; - tree->gtFlags &= ~GTF_GLOB_REF; - - GenTreePtr parent = fgWalkPre->parentStack->Index(1); - if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree)) - { - tree->gtFlags |= GTF_VAR_DEF; - tree->gtFlags |= GTF_DONT_CSE; - } -#ifdef DEBUG - if (verbose) - { - printf("Replacing the field in promoted struct with a local var:\n"); - fgWalkPre->printModified = true; - } -#endif // DEBUG - return WALK_SKIP_SUBTREES; - } + tree->gtFlags |= GTF_VAR_DEF; + tree->gtFlags |= GTF_DONT_CSE; } - else +#ifdef DEBUG + if (verbose) { - // Normed struct - // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if - // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8 - // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However, - // there is one extremely rare case where that won't be true. An enum type is a special value type - // that contains exactly one element of a primitive integer type (that, for CLS programs is named - // "value__"). The VM tells us that a local var of that enum type is the primitive type of the - // enum's single field. It turns out that it is legal for IL to access this field using ldflda or - // ldfld. For example: - // - // .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum - // { - // .field public specialname rtspecialname int16 value__ - // .field public static literal valuetype mynamespace.e_t one = int16(0x0000) - // } - // .method public hidebysig static void Main() cil managed - // { - // .locals init (valuetype mynamespace.e_t V_0) - // ... - // ldloca.s V_0 - // ldflda int16 mynamespace.e_t::value__ - // ... - // } - // - // Normally, compilers will not generate the ldflda, since it is superfluous. - // - // In the example, the lclVar is short, but the JIT promotes all trees using this local to the - // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type - // mismatch like this, don't do this morphing. The local var may end up getting marked as - // address taken, and the appropriate SHORT load will be done from memory in that case. + printf("Replacing the field in promoted struct with a local var:\n"); + fgWalkPre->printModified = true; + } +#endif // DEBUG + return WALK_SKIP_SUBTREES; + } + } + else + { + // Normed struct + // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if + // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8 + // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However, + // there is one extremely rare case where that won't be true. An enum type is a special value type + // that contains exactly one element of a primitive integer type (that, for CLS programs is named + // "value__"). The VM tells us that a local var of that enum type is the primitive type of the + // enum's single field. It turns out that it is legal for IL to access this field using ldflda or + // ldfld. For example: + // + // .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum + // { + // .field public specialname rtspecialname int16 value__ + // .field public static literal valuetype mynamespace.e_t one = int16(0x0000) + // } + // .method public hidebysig static void Main() cil managed + // { + // .locals init (valuetype mynamespace.e_t V_0) + // ... + // ldloca.s V_0 + // ldflda int16 mynamespace.e_t::value__ + // ... + // } + // + // Normally, compilers will not generate the ldflda, since it is superfluous. + // + // In the example, the lclVar is short, but the JIT promotes all trees using this local to the + // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type + // mismatch like this, don't do this morphing. The local var may end up getting marked as + // address taken, and the appropriate SHORT load will be done from memory in that case. - if (tree->TypeGet() == obj->TypeGet()) - { - tree->ChangeOper(GT_LCL_VAR); - tree->gtLclVarCommon.SetLclNum(lclNum); - tree->gtFlags &= GTF_NODE_MASK; + if (tree->TypeGet() == obj->TypeGet()) + { + tree->ChangeOper(GT_LCL_VAR); + tree->gtLclVarCommon.SetLclNum(lclNum); + tree->gtFlags &= GTF_NODE_MASK; - GenTreePtr parent = fgWalkPre->parentStack->Index(1); - if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree)) - { - tree->gtFlags |= GTF_VAR_DEF; - tree->gtFlags |= GTF_DONT_CSE; - } + GenTreePtr parent = fgWalkPre->parentStack->Index(1); + if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree)) + { + tree->gtFlags |= GTF_VAR_DEF; + tree->gtFlags |= GTF_DONT_CSE; + } #ifdef DEBUG - if (verbose) - { - printf("Replacing the field in normed struct with the local var:\n"); - fgWalkPre->printModified = true; - } -#endif // DEBUG - return WALK_SKIP_SUBTREES; - } + if (verbose) + { + printf("Replacing the field in normed struct with the local var:\n"); + fgWalkPre->printModified = true; } +#endif // DEBUG + return WALK_SKIP_SUBTREES; } } } |