diff options
Diffstat (limited to 'src/jit/morph.cpp')
-rw-r--r-- | src/jit/morph.cpp | 629 |
1 files changed, 300 insertions, 329 deletions
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index dabca57710..92d5e0967e 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -92,7 +92,7 @@ GenTreePtr Compiler::fgMorphIntoHelperCall(GenTreePtr tree, int helper, GenTreeA tree->gtCall.gtEntryPoint.addr = nullptr; #endif -#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) +#if (defined(_TARGET_X86_) || defined(_TARGET_ARM_)) && !defined(LEGACY_BACKEND) if (varTypeIsLong(tree)) { GenTreeCall* callNode = tree->AsCall(); @@ -101,7 +101,7 @@ GenTreePtr Compiler::fgMorphIntoHelperCall(GenTreePtr tree, int helper, GenTreeA retTypeDesc->InitializeLongReturnType(this); callNode->ClearOtherRegs(); } -#endif +#endif // _TARGET_XXX_ /* Perform the morphing */ @@ -850,17 +850,22 @@ void fgArgTabEntry::Dump() } #endif -fgArgInfo::fgArgInfo(Compiler* comp, GenTreePtr call, unsigned numArgs) +fgArgInfo::fgArgInfo(Compiler* comp, GenTreeCall* call, unsigned numArgs) { - compiler = comp; - callTree = call; - assert(call->IsCall()); + compiler = comp; + callTree = call; argCount = 0; // filled in arg count, starts at zero nextSlotNum = INIT_ARG_STACK_SLOT; stkLevel = 0; #if defined(UNIX_X86_ABI) - padStkAlign = 0; + alignmentDone = false; + stkSizeBytes = 0; + padStkAlign = 0; #endif +#if FEATURE_FIXED_OUT_ARGS + outArgSize = 0; +#endif + argTableSize = numArgs; // the allocated table size hasRegArgs = false; @@ -889,22 +894,22 @@ fgArgInfo::fgArgInfo(Compiler* comp, GenTreePtr call, unsigned numArgs) * in the argTable contains pointers that must point to the * new arguments and not the old arguments. */ -fgArgInfo::fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall) +fgArgInfo::fgArgInfo(GenTreeCall* newCall, GenTreeCall* oldCall) { - assert(oldCall->IsCall()); - assert(newCall->IsCall()); - fgArgInfoPtr oldArgInfo = oldCall->gtCall.fgArgInfo; - compiler = oldArgInfo->compiler; - ; - callTree = newCall; - assert(newCall->IsCall()); + compiler = oldArgInfo->compiler; + callTree = newCall; argCount = 0; // filled in arg count, starts at zero nextSlotNum = INIT_ARG_STACK_SLOT; stkLevel = oldArgInfo->stkLevel; #if defined(UNIX_X86_ABI) - padStkAlign = oldArgInfo->padStkAlign; + alignmentDone = oldArgInfo->alignmentDone; + stkSizeBytes = oldArgInfo->stkSizeBytes; + padStkAlign = oldArgInfo->padStkAlign; +#endif +#if FEATURE_FIXED_OUT_ARGS + outArgSize = oldArgInfo->outArgSize; #endif argTableSize = oldArgInfo->argTableSize; argsComplete = false; @@ -924,22 +929,22 @@ fgArgInfo::fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall) // so we can iterate over these argument lists more uniformly. // Need to provide a temporary non-null first arguments to these constructors: if we use them, we'll replace them GenTreeArgList* newArgs; - GenTreeArgList newArgObjp(newCall, newCall->gtCall.gtCallArgs); + GenTreeArgList newArgObjp(newCall, newCall->gtCallArgs); GenTreeArgList* oldArgs; - GenTreeArgList oldArgObjp(oldCall, oldCall->gtCall.gtCallArgs); + GenTreeArgList oldArgObjp(oldCall, oldCall->gtCallArgs); - if (newCall->gtCall.gtCallObjp == nullptr) + if (newCall->gtCallObjp == nullptr) { - assert(oldCall->gtCall.gtCallObjp == nullptr); - newArgs = newCall->gtCall.gtCallArgs; - oldArgs = oldCall->gtCall.gtCallArgs; + assert(oldCall->gtCallObjp == nullptr); + newArgs = newCall->gtCallArgs; + oldArgs = oldCall->gtCallArgs; } else { - assert(oldCall->gtCall.gtCallObjp != nullptr); - newArgObjp.Current() = newCall->gtCall.gtCallArgs; + assert(oldCall->gtCallObjp != nullptr); + newArgObjp.Current() = newCall->gtCallArgs; newArgs = &newArgObjp; - oldArgObjp.Current() = oldCall->gtCall.gtCallObjp; + oldArgObjp.Current() = oldCall->gtCallObjp; oldArgs = &oldArgObjp; } @@ -1023,8 +1028,8 @@ fgArgInfo::fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall) if (scanRegArgs) { - newArgs = newCall->gtCall.gtCallLateArgs; - oldArgs = oldCall->gtCall.gtCallLateArgs; + newArgs = newCall->gtCallLateArgs; + oldArgs = oldCall->gtCallLateArgs; while (newArgs) { @@ -1085,19 +1090,16 @@ fgArgTabEntryPtr fgArgInfo::AddRegArg( { fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry; - curArgTabEntry->argNum = argNum; - curArgTabEntry->node = node; - curArgTabEntry->parent = parent; - curArgTabEntry->regNum = regNum; - curArgTabEntry->slotNum = 0; - curArgTabEntry->numRegs = numRegs; - curArgTabEntry->numSlots = 0; - curArgTabEntry->alignment = alignment; - curArgTabEntry->lateArgInx = (unsigned)-1; - curArgTabEntry->tmpNum = (unsigned)-1; -#if defined(UNIX_X86_ABI) - curArgTabEntry->padStkAlign = 0; -#endif + curArgTabEntry->argNum = argNum; + curArgTabEntry->node = node; + curArgTabEntry->parent = parent; + curArgTabEntry->regNum = regNum; + curArgTabEntry->slotNum = 0; + curArgTabEntry->numRegs = numRegs; + curArgTabEntry->numSlots = 0; + curArgTabEntry->alignment = alignment; + curArgTabEntry->lateArgInx = (unsigned)-1; + curArgTabEntry->tmpNum = (unsigned)-1; curArgTabEntry->isSplit = false; curArgTabEntry->isTmp = false; curArgTabEntry->needTmp = false; @@ -1163,19 +1165,16 @@ fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned argNum, curArgTabEntry->isStruct = isStruct; // is this a struct arg #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - curArgTabEntry->argNum = argNum; - curArgTabEntry->node = node; - curArgTabEntry->parent = parent; - curArgTabEntry->regNum = REG_STK; - curArgTabEntry->slotNum = nextSlotNum; - curArgTabEntry->numRegs = 0; - curArgTabEntry->numSlots = numSlots; - curArgTabEntry->alignment = alignment; - curArgTabEntry->lateArgInx = (unsigned)-1; - curArgTabEntry->tmpNum = (unsigned)-1; -#if defined(UNIX_X86_ABI) - curArgTabEntry->padStkAlign = 0; -#endif + curArgTabEntry->argNum = argNum; + curArgTabEntry->node = node; + curArgTabEntry->parent = parent; + curArgTabEntry->regNum = REG_STK; + curArgTabEntry->slotNum = nextSlotNum; + curArgTabEntry->numRegs = 0; + curArgTabEntry->numSlots = numSlots; + curArgTabEntry->alignment = alignment; + curArgTabEntry->lateArgInx = (unsigned)-1; + curArgTabEntry->tmpNum = (unsigned)-1; curArgTabEntry->isSplit = false; curArgTabEntry->isTmp = false; curArgTabEntry->needTmp = false; @@ -1701,52 +1700,6 @@ void fgArgInfo::ArgsComplete() argsComplete = true; } -#if defined(UNIX_X86_ABI) -// Get the stack alignment value for a Call holding this object -// -// NOTE: This function will calculate number of padding slots, to align the -// stack before pushing arguments to the stack. Padding value is stored in -// the first argument in fgArgTabEntry structure padStkAlign member so that -// code (sub esp, n) can be emitted before generating argument push in -// fgArgTabEntry node. As of result stack will be aligned right before -// making a "Call". After the Call, stack is re-adjusted to the value it -// was with fgArgInfo->padStkAlign value as we cann't use the one in fgArgTabEntry. -// -void fgArgInfo::ArgsAlignPadding() -{ - // To get the padding amount, sum up all the slots and get the remainder for padding - unsigned curInx; - unsigned numSlots = 0; - fgArgTabEntryPtr firstArgTabEntry = nullptr; - - for (curInx = 0; curInx < argCount; curInx++) - { - fgArgTabEntryPtr curArgTabEntry = argTable[curInx]; - if (curArgTabEntry->numSlots > 0) - { - // The argument may be REG_STK or constant or register that goes to stack - assert(nextSlotNum >= curArgTabEntry->slotNum); - - numSlots += curArgTabEntry->numSlots; - if (firstArgTabEntry == nullptr) - { - // First argument will be used to hold the padding amount - firstArgTabEntry = curArgTabEntry; - } - } - } - - if (firstArgTabEntry != nullptr) - { - const int numSlotsAligned = STACK_ALIGN / TARGET_POINTER_SIZE; - // Set stack align pad for the first argument - firstArgTabEntry->padStkAlign = AlignmentPad(numSlots, numSlotsAligned); - // Set also for fgArgInfo that will be used to reset stack pointer after the Call - this->padStkAlign = firstArgTabEntry->padStkAlign; - } -} -#endif // UNIX_X86_ABI - void fgArgInfo::SortArgs() { assert(argsComplete == true); @@ -2665,10 +2618,8 @@ GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE #pragma warning(push) #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function #endif -GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) +GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) { - GenTreeCall* call = callNode->AsCall(); - GenTreePtr args; GenTreePtr argx; @@ -2838,9 +2789,9 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // so we record the stack depth on the first morph call when reMorphing // was false (via RecordStkLevel) and then retrieve that value here (via RetrieveStkLevel) // - unsigned callStkLevel = call->fgArgInfo->RetrieveStkLevel(); if (call->gtCallLateArgs != nullptr) { + unsigned callStkLevel = call->fgArgInfo->RetrieveStkLevel(); fgPtrArgCntCur += callStkLevel; call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList(); flagsSummary |= call->gtCallLateArgs->gtFlags; @@ -2874,9 +2825,9 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) CLANG_FORMAT_COMMENT_ANCHOR; #if !defined(LEGACY_BACKEND) -#if defined(_TARGET_X86_) - // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper has a custom calling convention. Set the argument registers - // correctly here. +#if defined(_TARGET_X86_) || defined(_TARGET_ARM_) + // The x86 and arm32 CORINFO_HELP_INIT_PINVOKE_FRAME helpers has a custom calling convention. + // Set the argument registers correctly here. if (call->IsHelperCall(this, CORINFO_HELP_INIT_PINVOKE_FRAME)) { GenTreeArgList* args = call->gtCallArgs; @@ -2884,6 +2835,8 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) assert(arg1 != nullptr); nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME); } +#endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_) +#if defined(_TARGET_X86_) // The x86 shift helpers have custom calling conventions and expect the lo part of the long to be in EAX and the // hi part to be in EDX. This sets the argument registers up correctly. else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) || @@ -4286,10 +4239,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) { call->fgArgInfo->ArgsComplete(); -#if defined(UNIX_X86_ABI) - call->fgArgInfo->ArgsAlignPadding(); -#endif // UNIX_X86_ABI - #ifdef LEGACY_BACKEND call->gtCallRegUsedMask = genIntAllRegArgMask(intArgRegNum); #if defined(_TARGET_ARM_) @@ -4327,19 +4276,23 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) if (fgPtrArgCntMax < fgPtrArgCntCur) { + JITDUMP("Upping fgPtrArgCntMax from %d to %d\n", fgPtrArgCntMax, fgPtrArgCntCur); fgPtrArgCntMax = fgPtrArgCntCur; } + assert(fgPtrArgCntCur >= genPtrArgCntSav); + call->fgArgInfo->SetStkSizeBytes((fgPtrArgCntCur - genPtrArgCntSav) * TARGET_POINTER_SIZE); + /* The call will pop all the arguments we pushed */ fgPtrArgCntCur = genPtrArgCntSav; #if FEATURE_FIXED_OUT_ARGS - // Update the outgoing argument size. - // If the call is a fast tail call, it will setup its arguments in incoming arg - // area instead of the out-going arg area. Therefore, don't consider fast tail - // calls to update lvaOutgoingArgSpaceSize. + // Record the outgoing argument size. If the call is a fast tail + // call, it will setup its arguments in incoming arg area instead + // of the out-going arg area, so we don't need to track the + // outgoing arg size. if (!call->IsFastTailCall()) { unsigned preallocatedArgCount = call->fgArgInfo->GetNextSlotNum(); @@ -4359,26 +4312,14 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) } #endif // UNIX_AMD64_ABI - // Check if we need to increase the size of our Outgoing Arg Space - if (preallocatedArgCount * REGSIZE_BYTES > lvaOutgoingArgSpaceSize) - { - lvaOutgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES; + const unsigned outgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES; + call->fgArgInfo->SetOutArgSize(max(outgoingArgSpaceSize, MIN_ARG_AREA_FOR_CALL)); - // If a function has localloc, we will need to move the outgoing arg space when the - // localloc happens. When we do this, we need to maintain stack alignment. To avoid - // leaving alignment-related holes when doing this move, make sure the outgoing - // argument space size is a multiple of the stack alignment by aligning up to the next - // stack alignment boundary. - if (compLocallocUsed) - { - lvaOutgoingArgSpaceSize = (unsigned)roundUp(lvaOutgoingArgSpaceSize, STACK_ALIGN); - } - } #ifdef DEBUG if (verbose) { - printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, lvaOutgoingArgSpaceSize=%d\n", argSlots, - preallocatedArgCount, call->fgArgInfo->GetNextSlotNum(), lvaOutgoingArgSpaceSize); + printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, outgoingArgSpaceSize=%d\n", argSlots, + preallocatedArgCount, call->fgArgInfo->GetNextSlotNum(), outgoingArgSpaceSize); } #endif } @@ -5047,7 +4988,7 @@ GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr f // replace the existing LDOBJ(ADDR(LCLVAR)) // with a FIELD_LIST(LCLFLD-LO, FIELD_LIST(LCLFLD-HI, nullptr) ...) // - unsigned offset = 0; + unsigned offset = baseOffset; GenTreeFieldList* listEntry = nullptr; for (unsigned inx = 0; inx < elemCount; inx++) { @@ -6163,6 +6104,14 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac) return newTree; } } + else if ((objRef != nullptr) && (objRef->OperGet() == GT_ADDR) && varTypeIsSIMD(objRef->gtGetOp1())) + { + GenTreeLclVarCommon* lcl = objRef->IsLocalAddrExpr(); + if (lcl != nullptr) + { + lvaSetVarDoNotEnregister(lcl->gtLclNum DEBUGARG(DNER_LocalField)); + } + } #endif /* Is this an instance data member? */ @@ -6735,8 +6684,10 @@ void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result) printTreeID(fgMorphStmt); printf(" in BB%02u:\n", compCurBB->bbNum); gtDispTree(fgMorphStmt); - - // printf("startVars=%d.\n", startVars); + if (call->IsImplicitTailCall()) + { + printf("Note: candidate is implicit tail call\n"); + } } #endif @@ -7865,6 +7816,9 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) // Either a call stmt or // GT_RETURN(GT_CALL(..)) or GT_RETURN(GT_CAST(GT_CALL(..))) // var = GT_CALL(..) or var = (GT_CAST(GT_CALL(..))) + // GT_COMMA(GT_CALL(..), GT_NOP) or GT_COMMA(GT_CAST(GT_CALL(..)), GT_NOP) + // In the above, + // GT_CASTS may be nested. genTreeOps stmtOper = stmtExpr->gtOper; if (stmtOper == GT_CALL) { @@ -7872,24 +7826,31 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) } else { - noway_assert(stmtOper == GT_RETURN || stmtOper == GT_ASG); + noway_assert(stmtOper == GT_RETURN || stmtOper == GT_ASG || stmtOper == GT_COMMA); GenTreePtr treeWithCall; if (stmtOper == GT_RETURN) { treeWithCall = stmtExpr->gtGetOp1(); } - else + else if (stmtOper == GT_COMMA) { - treeWithCall = stmtExpr->gtGetOp2(); + // Second operation must be nop. + noway_assert(stmtExpr->gtGetOp2()->IsNothingNode()); + treeWithCall = stmtExpr->gtGetOp1(); } - if (treeWithCall->gtOper == GT_CAST) + else { - noway_assert(treeWithCall->gtGetOp1() == call && !treeWithCall->gtOverflow()); + treeWithCall = stmtExpr->gtGetOp2(); } - else + + // Peel off casts + while (treeWithCall->gtOper == GT_CAST) { - noway_assert(treeWithCall == call); + noway_assert(!treeWithCall->gtOverflow()); + treeWithCall = treeWithCall->gtGetOp1(); } + + noway_assert(treeWithCall == call); } #endif @@ -7909,10 +7870,11 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) // 2) tail.call, nop*, pop, nop*, ret // 3) var=tail.call, nop*, ret(var) // 4) var=tail.call, nop*, pop, ret + // 5) comma(tail.call, nop), nop*, ret // // See impIsTailCallILPattern() for details on tail call IL patterns // that are supported. - if ((stmtExpr->gtOper == GT_CALL) || (stmtExpr->gtOper == GT_ASG)) + if (stmtExpr->gtOper != GT_RETURN) { // First delete all GT_NOPs after the call GenTreeStmt* morphStmtToRemove = nullptr; @@ -7940,7 +7902,16 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) GenTreeStmt* popStmt = nextMorphStmt; nextMorphStmt = nextMorphStmt->gtNextStmt; - noway_assert((popStmt->gtStmtExpr->gtFlags & GTF_ALL_EFFECT) == 0); + // Side effect flags on a GT_COMMA may be overly pessimistic, so examine + // the constituent nodes. + GenTreePtr popExpr = popStmt->gtStmtExpr; + bool isSideEffectFree = (popExpr->gtFlags & GTF_ALL_EFFECT) == 0; + if (!isSideEffectFree && (popExpr->OperGet() == GT_COMMA)) + { + isSideEffectFree = ((popExpr->gtGetOp1()->gtFlags & GTF_ALL_EFFECT) == 0) && + ((popExpr->gtGetOp2()->gtFlags & GTF_ALL_EFFECT) == 0); + } + noway_assert(isSideEffectFree); fgRemoveStmt(compCurBB, popStmt); } @@ -9658,6 +9629,7 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree) assert(dest->gtOper == GT_LCL_FLD); blockWidth = genTypeSize(dest->TypeGet()); destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest); + destFldSeq = dest->AsLclFld()->gtFieldSeq; } } else @@ -9779,12 +9751,13 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree) // Check to see if we are required to do a copy block because the struct contains holes // and either the src or dest is externally visible // - bool requiresCopyBlock = false; - bool srcSingleLclVarAsg = false; + bool requiresCopyBlock = false; + bool srcSingleLclVarAsg = false; + bool destSingleLclVarAsg = false; - if ((destLclVar != nullptr) && (srcLclVar == destLclVar)) + if ((destLclVar != nullptr) && (srcLclVar == destLclVar) && (destFldSeq == srcFldSeq)) { - // Beyond perf reasons, it is not prudent to have a copy of a struct to itself. + // Self-assign; no effect. GenTree* nop = gtNewNothingNode(); INDEBUG(nop->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); return nop; @@ -9896,6 +9869,30 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree) } } } + else + { + assert(srcDoFldAsg); + // Check for the symmetric case (which happens for the _pointer field of promoted spans): + // + // [000240] -----+------ /--* lclVar struct(P) V18 tmp9 + // /--* byref V18._value (offs=0x00) -> V30 tmp21 + // [000245] -A------R--- * = struct (copy) + // [000244] -----+------ \--* obj(8) struct + // [000243] -----+------ \--* addr byref + // [000242] D----+-N---- \--* lclVar byref V28 tmp19 + // + if (blockWidthIsConst && (srcLclVar->lvFieldCnt == 1) && (destLclVar != nullptr) && + (blockWidth == genTypeSize(destLclVar->TypeGet()))) + { + // Check for type agreement + unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart; + var_types srcType = lvaTable[fieldLclNum].TypeGet(); + if (destLclVar->TypeGet() == srcType) + { + destSingleLclVarAsg = true; + } + } + } } // If we require a copy block the set both of the field assign bools to false @@ -9912,7 +9909,7 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree) // when they are not reg-sized non-field-addressed structs and we are using a CopyBlock // or the struct is not promoted // - if (!destDoFldAsg && (destLclVar != nullptr)) + if (!destDoFldAsg && (destLclVar != nullptr) && !destSingleLclVarAsg) { if (!destLclVar->lvRegStruct) { @@ -10166,45 +10163,56 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree) noway_assert(srcLclNum != BAD_VAR_NUM); unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i; - if (addrSpill) + if (destSingleLclVarAsg) { - assert(addrSpillTemp != BAD_VAR_NUM); - dest = gtNewLclvNode(addrSpillTemp, TYP_BYREF); + noway_assert(fieldCnt == 1); + noway_assert(destLclVar != nullptr); + noway_assert(addrSpill == nullptr); + + dest = gtNewLclvNode(destLclNum, destLclVar->TypeGet()); } else { - dest = gtCloneExpr(destAddr); - noway_assert(dest != nullptr); - - // Is the address of a local? - GenTreeLclVarCommon* lclVarTree = nullptr; - bool isEntire = false; - bool* pIsEntire = (blockWidthIsConst ? &isEntire : nullptr); - if (dest->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire)) + if (addrSpill) + { + assert(addrSpillTemp != BAD_VAR_NUM); + dest = gtNewLclvNode(addrSpillTemp, TYP_BYREF); + } + else { - lclVarTree->gtFlags |= GTF_VAR_DEF; - if (!isEntire) + dest = gtCloneExpr(destAddr); + noway_assert(dest != nullptr); + + // Is the address of a local? + GenTreeLclVarCommon* lclVarTree = nullptr; + bool isEntire = false; + bool* pIsEntire = (blockWidthIsConst ? &isEntire : nullptr); + if (dest->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire)) { - lclVarTree->gtFlags |= GTF_VAR_USEASG; + lclVarTree->gtFlags |= GTF_VAR_DEF; + if (!isEntire) + { + lclVarTree->gtFlags |= GTF_VAR_USEASG; + } } } - } - GenTreePtr fieldOffsetNode = gtNewIconNode(lvaTable[fieldLclNum].lvFldOffset, TYP_I_IMPL); - // Have to set the field sequence -- which means we need the field handle. - CORINFO_CLASS_HANDLE classHnd = lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle(); - CORINFO_FIELD_HANDLE fieldHnd = - info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal); - curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd); - fieldOffsetNode->gtIntCon.gtFieldSeq = curFieldSeq; + GenTreePtr fieldOffsetNode = gtNewIconNode(lvaTable[fieldLclNum].lvFldOffset, TYP_I_IMPL); + // Have to set the field sequence -- which means we need the field handle. + CORINFO_CLASS_HANDLE classHnd = lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle(); + CORINFO_FIELD_HANDLE fieldHnd = + info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal); + curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd); + fieldOffsetNode->gtIntCon.gtFieldSeq = curFieldSeq; - dest = gtNewOperNode(GT_ADD, TYP_BYREF, dest, fieldOffsetNode); + dest = gtNewOperNode(GT_ADD, TYP_BYREF, dest, fieldOffsetNode); - dest = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), dest); + dest = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), dest); - // !!! The destination could be on stack. !!! - // This flag will let us choose the correct write barrier. - dest->gtFlags |= GTF_IND_TGTANYWHERE; + // !!! The destination could be on stack. !!! + // This flag will let us choose the correct write barrier. + dest->gtFlags |= GTF_IND_TGTANYWHERE; + } } if (srcDoFldAsg) @@ -10849,7 +10857,6 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) op1->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op1->gtCast.CastOp()); op1->gtFlags &= ~GTF_ALL_EFFECT; op1->gtFlags |= (op1->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT); - op1->gtFlags |= GTF_DONT_CSE; } if (op2->gtCast.CastOp()->OperGet() != GT_NOP) @@ -10857,9 +10864,11 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) op2->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op2->gtCast.CastOp()); op2->gtFlags &= ~GTF_ALL_EFFECT; op2->gtFlags |= (op2->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT); - op2->gtFlags |= GTF_DONT_CSE; } + op1->gtFlags |= GTF_DONT_CSE; + op2->gtFlags |= GTF_DONT_CSE; + tree->gtFlags &= ~GTF_ALL_EFFECT; tree->gtFlags |= ((op1->gtFlags | op2->gtFlags) & GTF_ALL_EFFECT); @@ -11178,11 +11187,13 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) GenTreePtr pGetType; #ifdef LEGACY_BACKEND - bool bOp1ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op1); - bool bOp2ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op2); + bool bOp1ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op1->AsCall()); + bool bOp2ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op2->AsCall()); #else - bool bOp1ClassFromHandle = op1->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op1) : false; - bool bOp2ClassFromHandle = op2->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op2) : false; + bool bOp1ClassFromHandle = + op1->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op1->AsCall()) : false; + bool bOp2ClassFromHandle = + op2->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op2->AsCall()) : false; #endif // Optimize typeof(...) == typeof(...) @@ -12244,6 +12255,23 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) } } } + else // we have an unsigned comparison + { + if (op2->IsIntegralConst(0)) + { + if ((oper == GT_GT) || (oper == GT_LE)) + { + // IL doesn't have a cne instruction so compilers use cgt.un instead. The JIT + // recognizes certain patterns that involve GT_NE (e.g (x & 4) != 0) and fails + // if GT_GT is used instead. Transform (x GT_GT.unsigned 0) into (x GT_NE 0) + // and (x GT_LE.unsigned 0) into (x GT_EQ 0). The later case is rare, it sometimes + // occurs as a result of branch inversion. + oper = (oper == GT_LE) ? GT_EQ : GT_NE; + tree->SetOper(oper, GenTree::PRESERVE_VN); + tree->gtFlags &= ~GTF_UNSIGNED; + } + } + } COMPARE: @@ -14157,13 +14185,13 @@ GenTreePtr Compiler::fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree) // // OR ROL // / \ / \ - // LSH RSZ -> x y + // LSH RSZ -> x y // / \ / \ - // x AND x AND + // x AND x AND // / \ / \ - // y 31 ADD 31 + // y 31 ADD 31 // / \ - // NEG 32 + // NEG 32 // | // y // The patterns recognized: @@ -14534,7 +14562,10 @@ GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree) tree = fgMorphIntoHelperCall(tree, helper, args); if (fgPtrArgCntMax < fgPtrArgCntCur) + { + JITDUMP("Upping fgPtrArgCntMax from %d to %d\n", fgPtrArgCntMax, fgPtrArgCntCur); fgPtrArgCntMax = fgPtrArgCntCur; + } fgPtrArgCntCur -= argc; return tree; @@ -15090,13 +15121,13 @@ bool Compiler::fgFoldConditional(BasicBlock* block) // else if bTaken has valid profile weight and block does not we try to adjust block's weight // We can only adjust the block weights when (the edge block -> bTaken) is the only edge into bTaken // - if (block->bbFlags & BBF_PROF_WEIGHT) + if (block->hasProfileWeight()) { // The edge weights for (block -> bTaken) are 100% of block's weight edgeTaken->flEdgeWeightMin = block->bbWeight; edgeTaken->flEdgeWeightMax = block->bbWeight; - if ((bTaken->bbFlags & BBF_PROF_WEIGHT) == 0) + if (!bTaken->hasProfileWeight()) { if ((bTaken->countOfInEdges() == 1) || (bTaken->bbWeight < block->bbWeight)) { @@ -15106,7 +15137,7 @@ bool Compiler::fgFoldConditional(BasicBlock* block) } } } - else if (bTaken->bbFlags & BBF_PROF_WEIGHT) + else if (bTaken->hasProfileWeight()) { if (bTaken->countOfInEdges() == 1) { @@ -16171,7 +16202,9 @@ void Compiler::fgSetOptions() // to use a frame pointer because of EH. But until all the code uses // the same test, leave info.compXcptnsCount here. if (info.compXcptnsCount > 0) + { codeGen->setFramePointerRequiredEH(true); + } #else // !_TARGET_X86_ @@ -16182,6 +16215,15 @@ void Compiler::fgSetOptions() #endif // _TARGET_X86_ +#ifdef UNIX_X86_ABI + if (info.compXcptnsCount > 0) + { + assert(!codeGen->isGCTypeFixed()); + // Enforce fully interruptible codegen for funclet unwinding + genInterruptible = true; + } +#endif // UNIX_X86_ABI + fgCheckArgCnt(); if (info.compCallUnmanaged) @@ -16250,6 +16292,15 @@ GenTreePtr Compiler::fgInitThisClass() CORINFO_RESOLVED_TOKEN resolvedToken; memset(&resolvedToken, 0, sizeof(resolvedToken)); + // We are in a shared method body, but maybe we don't need a runtime lookup after all. + // This covers the case of a generic method on a non-generic type. + if (!(info.compClassAttr & CORINFO_FLG_SHAREDINST)) + { + resolvedToken.hClass = info.compClassHnd; + return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF); + } + + // We need a runtime lookup. GenTreePtr ctxTree = getRuntimeContextTree(kind.runtimeLookupKind); // CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE with a zeroed out resolvedToken means "get the static @@ -16263,7 +16314,7 @@ GenTreePtr Compiler::fgInitThisClass() // Collectible types requires that for shared generic code, if we use the generic context paramter // that we report it. (This is a conservative approach, we could detect some cases particularly when the // context parameter is this that we don't need the eager reporting logic.) - lvaGenericsContextUsed = true; + lvaGenericsContextUseCount++; switch (kind.runtimeLookupKind) { @@ -16952,6 +17003,10 @@ void Compiler::fgMorph() EndPhase(PHASE_EMPTY_FINALLY); + fgMergeFinallyChains(); + + EndPhase(PHASE_MERGE_FINALLY_CHAINS); + fgCloneFinally(); EndPhase(PHASE_CLONE_FINALLY); @@ -17072,13 +17127,11 @@ void Compiler::fgPromoteStructs() #endif // DEBUG // The lvaTable might grow as we grab temps. Make a local copy here. - unsigned startLvaCount = lvaCount; // // Loop through the original lvaTable. Looking for struct locals to be promoted. // - lvaStructPromotionInfo structPromotionInfo; bool tooManyLocals = false; @@ -17088,13 +17141,14 @@ void Compiler::fgPromoteStructs() bool promotedVar = false; LclVarDsc* varDsc = &lvaTable[lclNum]; + // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote + // its fields. Instead, we will attempt to enregister the entire struct. if (varDsc->lvIsSIMDType() && varDsc->lvIsUsedInSIMDIntrinsic()) { - // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote - // its fields. Instead, we will attempt to enregister the entire struct. varDsc->lvRegStruct = true; } - else if (lvaHaveManyLocals()) // Don't promote if we have reached the tracking limit. + // Don't promote if we have reached the tracking limit. + else if (lvaHaveManyLocals()) { // Print the message first time when we detected this condition if (!tooManyLocals) @@ -17103,159 +17157,56 @@ void Compiler::fgPromoteStructs() } tooManyLocals = true; } -#if !FEATURE_MULTIREG_STRUCT_PROMOTE - else if (varDsc->lvIsMultiRegArg) - { - JITDUMP("Skipping V%02u: marked lvIsMultiRegArg.\n", lclNum); - } -#endif // !FEATURE_MULTIREG_STRUCT_PROMOTE - else if (varDsc->lvIsMultiRegRet) - { - JITDUMP("Skipping V%02u: marked lvIsMultiRegRet.\n", lclNum); - } else if (varTypeIsStruct(varDsc)) { - lvaCanPromoteStructVar(lclNum, &structPromotionInfo); - bool canPromote = structPromotionInfo.canPromote; - - // We start off with shouldPromote same as canPromote. - // Based on further profitablity checks done below, shouldPromote - // could be set to false. - bool shouldPromote = canPromote; - - if (canPromote) - { - // We *can* promote; *should* we promote? - // We should only do so if promotion has potential savings. One source of savings - // is if a field of the struct is accessed, since this access will be turned into - // an access of the corresponding promoted field variable. Even if there are no - // field accesses, but only block-level operations on the whole struct, if the struct - // has only one or two fields, then doing those block operations field-wise is probably faster - // than doing a whole-variable block operation (e.g., a hardware "copy loop" on x86). - // Struct promotion also provides the following benefits: reduce stack frame size, - // reduce the need for zero init of stack frame and fine grained constant/copy prop. - // Asm diffs indicate that promoting structs up to 3 fields is a net size win. - // So if no fields are accessed independently, and there are four or more fields, - // then do not promote. - // - // TODO: Ideally we would want to consider the impact of whether the struct is - // passed as a parameter or assigned the return value of a call. Because once promoted, - // struct copying is done by field by field assignment instead of a more efficient - // rep.stos or xmm reg based copy. - if (structPromotionInfo.fieldCnt > 3 && !varDsc->lvFieldAccessed) - { - JITDUMP("Not promoting promotable struct local V%02u: #fields = %d, fieldAccessed = %d.\n", lclNum, - structPromotionInfo.fieldCnt, varDsc->lvFieldAccessed); - shouldPromote = false; - } -#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) - // TODO-PERF - Only do this when the LclVar is used in an argument context - // TODO-ARM64 - HFA support should also eliminate the need for this. - // TODO-LSRA - Currently doesn't support the passing of floating point LCL_VARS in the integer registers - // - // For now we currently don't promote structs with a single float field - // Promoting it can cause us to shuffle it back and forth between the int and - // the float regs when it is used as a argument, which is very expensive for XARCH - // - else if ((structPromotionInfo.fieldCnt == 1) && - varTypeIsFloating(structPromotionInfo.fields[0].fldType)) - { - JITDUMP("Not promoting promotable struct local V%02u: #fields = %d because it is a struct with " - "single float field.\n", - lclNum, structPromotionInfo.fieldCnt); - shouldPromote = false; - } -#endif // _TARGET_AMD64_ || _TARGET_ARM64_ + bool shouldPromote; -#if !FEATURE_MULTIREG_STRUCT_PROMOTE -#if defined(_TARGET_ARM64_) - // - // For now we currently don't promote structs that are passed in registers - // - else if (lvaIsMultiregStruct(varDsc)) - { - JITDUMP("Not promoting promotable multireg struct local V%02u (size==%d): ", lclNum, - lvaLclExactSize(lclNum)); - shouldPromote = false; - } -#endif // _TARGET_ARM64_ -#endif // !FEATURE_MULTIREG_STRUCT_PROMOTE - else if (varDsc->lvIsParam) - { -#if FEATURE_MULTIREG_STRUCT_PROMOTE - if (lvaIsMultiregStruct( - varDsc) && // Is this a variable holding a value that is passed in multiple registers? - (structPromotionInfo.fieldCnt != 2)) // Does it have exactly two fields - { - JITDUMP( - "Not promoting multireg struct local V%02u, because lvIsParam is true and #fields != 2\n", - lclNum); - shouldPromote = false; - } - else -#endif // !FEATURE_MULTIREG_STRUCT_PROMOTE - - // TODO-PERF - Implement struct promotion for incoming multireg structs - // Currently it hits assert(lvFieldCnt==1) in lclvar.cpp line 4417 - - if (structPromotionInfo.fieldCnt != 1) - { - JITDUMP("Not promoting promotable struct local V%02u, because lvIsParam is true and #fields = " - "%d.\n", - lclNum, structPromotionInfo.fieldCnt); - shouldPromote = false; - } - } - - // - // If the lvRefCnt is zero and we have a struct promoted parameter we can end up with an extra store of - // the the incoming register into the stack frame slot. - // In that case, we would like to avoid promortion. - // However we haven't yet computed the lvRefCnt values so we can't do that. - // - CLANG_FORMAT_COMMENT_ANCHOR; + lvaCanPromoteStructVar(lclNum, &structPromotionInfo); + if (structPromotionInfo.canPromote) + { + shouldPromote = lvaShouldPromoteStructVar(lclNum, &structPromotionInfo); + } + else + { + shouldPromote = false; + } #if 0 - // Often-useful debugging code: if you've narrowed down a struct-promotion problem to a single - // method, this allows you to select a subset of the vars to promote (by 1-based ordinal number). - static int structPromoVarNum = 0; - structPromoVarNum++; - if (atoi(getenv("structpromovarnumlo")) <= structPromoVarNum && structPromoVarNum <= atoi(getenv("structpromovarnumhi"))) + // Often-useful debugging code: if you've narrowed down a struct-promotion problem to a single + // method, this allows you to select a subset of the vars to promote (by 1-based ordinal number). + static int structPromoVarNum = 0; + structPromoVarNum++; + if (atoi(getenv("structpromovarnumlo")) <= structPromoVarNum && structPromoVarNum <= atoi(getenv("structpromovarnumhi"))) #endif // 0 - if (shouldPromote) - { - assert(canPromote); - - // Promote the this struct local var. - lvaPromoteStructVar(lclNum, &structPromotionInfo); - promotedVar = true; + if (shouldPromote) + { + // Promote the this struct local var. + lvaPromoteStructVar(lclNum, &structPromotionInfo); + promotedVar = true; #ifdef _TARGET_ARM_ - if (structPromotionInfo.requiresScratchVar) + if (structPromotionInfo.requiresScratchVar) + { + // Ensure that the scratch variable is allocated, in case we + // pass a promoted struct as an argument. + if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM) { - // Ensure that the scratch variable is allocated, in case we - // pass a promoted struct as an argument. - if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM) - { - lvaPromotedStructAssemblyScratchVar = - lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var.")); - lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL; - } + lvaPromotedStructAssemblyScratchVar = + lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var.")); + lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL; } -#endif // _TARGET_ARM_ } +#endif // _TARGET_ARM_ } } -#ifdef FEATURE_SIMD - if (!promotedVar && varDsc->lvSIMDType && !varDsc->lvFieldAccessed) + if (!promotedVar && varDsc->lvIsSIMDType() && !varDsc->lvFieldAccessed) { // Even if we have not used this in a SIMD intrinsic, if it is not being promoted, // we will treat it as a reg struct. varDsc->lvRegStruct = true; } -#endif // FEATURE_SIMD } #ifdef DEBUG @@ -17298,10 +17249,30 @@ Compiler::fgWalkResult Compiler::fgMorphStructField(GenTreePtr tree, fgWalkData* tree->gtFlags &= ~GTF_GLOB_REF; GenTreePtr parent = fgWalkPre->parentStack->Index(1); - if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree)) + if (parent->gtOper == GT_ASG) { - tree->gtFlags |= GTF_VAR_DEF; - tree->gtFlags |= GTF_DONT_CSE; + if (parent->gtOp.gtOp1 == tree) + { + tree->gtFlags |= GTF_VAR_DEF; + tree->gtFlags |= GTF_DONT_CSE; + } + + // Promotion of struct containing struct fields where the field + // is a struct with a single pointer sized scalar type field: in + // this case struct promotion uses the type of the underlying + // scalar field as the type of struct field instead of recursively + // promoting. This can lead to a case where we have a block-asgn + // with its RHS replaced with a scalar type. Mark RHS value as + // DONT_CSE so that assertion prop will not do const propagation. + // The reason this is required is that if RHS of a block-asg is a + // constant, then it is interpreted as init-block incorrectly. + // + // TODO - This can also be avoided if we implement recursive struct + // promotion. + if (varTypeIsStruct(parent) && parent->gtOp.gtOp2 == tree && !varTypeIsStruct(tree)) + { + tree->gtFlags |= GTF_DONT_CSE; + } } #ifdef DEBUG if (verbose) |