diff options
Diffstat (limited to 'src/jit')
35 files changed, 1811 insertions, 1426 deletions
diff --git a/src/jit/block.cpp b/src/jit/block.cpp index 6d8bc348fd..8e5dc2999f 100644 --- a/src/jit/block.cpp +++ b/src/jit/block.cpp @@ -16,6 +16,19 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #pragma hdrstop #endif +#if MEASURE_BLOCK_SIZE +/* static */ +size_t BasicBlock::s_Size; +/* static */ +size_t BasicBlock::s_Count; +#endif // MEASURE_BLOCK_SIZE + +#ifdef DEBUG +// The max # of tree nodes in any BB +/* static */ +unsigned BasicBlock::s_nMaxTrees; +#endif // DEBUG + #ifdef DEBUG flowList* ShuffleHelper(unsigned hash, flowList* res) { @@ -804,3 +817,552 @@ bool BasicBlock::isEmpty() return true; } + +GenTreeStmt* BasicBlock::FirstNonPhiDef() +{ + GenTreePtr stmt = bbTreeList; + if (stmt == nullptr) + { + return nullptr; + } + GenTreePtr tree = stmt->gtStmt.gtStmtExpr; + while ((tree->OperGet() == GT_ASG && tree->gtOp.gtOp2->OperGet() == GT_PHI) || + (tree->OperGet() == GT_STORE_LCL_VAR && tree->gtOp.gtOp1->OperGet() == GT_PHI)) + { + stmt = stmt->gtNext; + if (stmt == nullptr) + { + return nullptr; + } + tree = stmt->gtStmt.gtStmtExpr; + } + return stmt->AsStmt(); +} + +GenTreePtr BasicBlock::FirstNonPhiDefOrCatchArgAsg() +{ + GenTreePtr stmt = FirstNonPhiDef(); + if (stmt == nullptr) + { + return nullptr; + } + GenTreePtr tree = stmt->gtStmt.gtStmtExpr; + if ((tree->OperGet() == GT_ASG && tree->gtOp.gtOp2->OperGet() == GT_CATCH_ARG) || + (tree->OperGet() == GT_STORE_LCL_VAR && tree->gtOp.gtOp1->OperGet() == GT_CATCH_ARG)) + { + stmt = stmt->gtNext; + } + return stmt; +} + +/***************************************************************************** + * + * Mark a block as rarely run, we also don't want to have a loop in a + * rarely run block, and we set it's weight to zero. + */ + +void BasicBlock::bbSetRunRarely() +{ + setBBWeight(BB_ZERO_WEIGHT); + if (bbWeight == BB_ZERO_WEIGHT) + { + bbFlags |= BBF_RUN_RARELY; // This block is never/rarely run + } +} + +/***************************************************************************** + * + * Can a BasicBlock be inserted after this without altering the flowgraph + */ + +bool BasicBlock::bbFallsThrough() +{ + switch (bbJumpKind) + { + + case BBJ_THROW: + case BBJ_EHFINALLYRET: + case BBJ_EHFILTERRET: + case BBJ_EHCATCHRET: + case BBJ_RETURN: + case BBJ_ALWAYS: + case BBJ_LEAVE: + case BBJ_SWITCH: + return false; + + case BBJ_NONE: + case BBJ_COND: + return true; + + case BBJ_CALLFINALLY: + return ((bbFlags & BBF_RETLESS_CALL) == 0); + + default: + assert(!"Unknown bbJumpKind in bbFallsThrough()"); + return true; + } +} + +//------------------------------------------------------------------------ +// NumSucc: Returns the count of block successors. See the declaration comment for details. +// +// Arguments: +// None. +// +// Return Value: +// Count of block successors. +// +unsigned BasicBlock::NumSucc() +{ + switch (bbJumpKind) + { + case BBJ_THROW: + case BBJ_RETURN: + case BBJ_EHFINALLYRET: + case BBJ_EHFILTERRET: + return 0; + + case BBJ_CALLFINALLY: + case BBJ_ALWAYS: + case BBJ_EHCATCHRET: + case BBJ_LEAVE: + case BBJ_NONE: + return 1; + + case BBJ_COND: + if (bbJumpDest == bbNext) + { + return 1; + } + else + { + return 2; + } + + case BBJ_SWITCH: + return bbJumpSwt->bbsCount; + + default: + unreached(); + } +} + +//------------------------------------------------------------------------ +// GetSucc: Returns the requested block successor. See the declaration comment for details. +// +// Arguments: +// i - index of successor to return. 0 <= i <= NumSucc(). +// +// Return Value: +// Requested successor block +// +BasicBlock* BasicBlock::GetSucc(unsigned i) +{ + assert(i < NumSucc()); // Index bounds check. + switch (bbJumpKind) + { + case BBJ_CALLFINALLY: + case BBJ_ALWAYS: + case BBJ_EHCATCHRET: + case BBJ_LEAVE: + return bbJumpDest; + + case BBJ_NONE: + return bbNext; + + case BBJ_COND: + if (i == 0) + { + return bbNext; + } + else + { + assert(i == 1); + return bbJumpDest; + } + + case BBJ_SWITCH: + return bbJumpSwt->bbsDstTab[i]; + + default: + unreached(); + } +} + +//------------------------------------------------------------------------ +// NumSucc: Returns the count of block successors. See the declaration comment for details. +// +// Arguments: +// comp - Compiler instance +// +// Return Value: +// Count of block successors. +// +unsigned BasicBlock::NumSucc(Compiler* comp) +{ + assert(comp != nullptr); + + switch (bbJumpKind) + { + case BBJ_THROW: + case BBJ_RETURN: + return 0; + + case BBJ_EHFINALLYRET: + { + // The first block of the handler is labelled with the catch type. + BasicBlock* hndBeg = comp->fgFirstBlockOfHandler(this); + if (hndBeg->bbCatchTyp == BBCT_FINALLY) + { + return comp->fgNSuccsOfFinallyRet(this); + } + else + { + assert(hndBeg->bbCatchTyp == BBCT_FAULT); // We can only BBJ_EHFINALLYRET from FINALLY and FAULT. + // A FAULT block has no successors. + return 0; + } + } + + case BBJ_CALLFINALLY: + case BBJ_ALWAYS: + case BBJ_EHCATCHRET: + case BBJ_EHFILTERRET: + case BBJ_LEAVE: + case BBJ_NONE: + return 1; + + case BBJ_COND: + if (bbJumpDest == bbNext) + { + return 1; + } + else + { + return 2; + } + + case BBJ_SWITCH: + { + Compiler::SwitchUniqueSuccSet sd = comp->GetDescriptorForSwitch(this); + return sd.numDistinctSuccs; + } + + default: + unreached(); + } +} + +//------------------------------------------------------------------------ +// GetSucc: Returns the requested block successor. See the declaration comment for details. +// +// Arguments: +// i - index of successor to return. 0 <= i <= NumSucc(comp). +// comp - Compiler instance +// +// Return Value: +// Requested successor block +// +BasicBlock* BasicBlock::GetSucc(unsigned i, Compiler* comp) +{ + assert(comp != nullptr); + + assert(i < NumSucc(comp)); // Index bounds check. + switch (bbJumpKind) + { + case BBJ_EHFILTERRET: + { + // Handler is the (sole) normal successor of the filter. + assert(comp->fgFirstBlockOfHandler(this) == bbJumpDest); + return bbJumpDest; + } + + case BBJ_EHFINALLYRET: + // Note: the following call is expensive. + return comp->fgSuccOfFinallyRet(this, i); + + case BBJ_CALLFINALLY: + case BBJ_ALWAYS: + case BBJ_EHCATCHRET: + case BBJ_LEAVE: + return bbJumpDest; + + case BBJ_NONE: + return bbNext; + + case BBJ_COND: + if (i == 0) + { + return bbNext; + } + else + { + assert(i == 1); + return bbJumpDest; + } + + case BBJ_SWITCH: + { + Compiler::SwitchUniqueSuccSet sd = comp->GetDescriptorForSwitch(this); + assert(i < sd.numDistinctSuccs); // Range check. + return sd.nonDuplicates[i]; + } + + default: + unreached(); + } +} + +void BasicBlock::InitVarSets(Compiler* comp) +{ + VarSetOps::AssignNoCopy(comp, bbVarUse, VarSetOps::MakeEmpty(comp)); + VarSetOps::AssignNoCopy(comp, bbVarDef, VarSetOps::MakeEmpty(comp)); + VarSetOps::AssignNoCopy(comp, bbLiveIn, VarSetOps::MakeEmpty(comp)); + VarSetOps::AssignNoCopy(comp, bbLiveOut, VarSetOps::MakeEmpty(comp)); + VarSetOps::AssignNoCopy(comp, bbScope, VarSetOps::MakeEmpty(comp)); + + bbMemoryUse = emptyMemoryKindSet; + bbMemoryDef = emptyMemoryKindSet; + bbMemoryLiveIn = emptyMemoryKindSet; + bbMemoryLiveOut = emptyMemoryKindSet; +} + +// Returns true if the basic block ends with GT_JMP +bool BasicBlock::endsWithJmpMethod(Compiler* comp) +{ + if (comp->compJmpOpUsed && (bbJumpKind == BBJ_RETURN) && (bbFlags & BBF_HAS_JMP)) + { + GenTree* lastNode = this->lastNode(); + assert(lastNode != nullptr); + return lastNode->OperGet() == GT_JMP; + } + + return false; +} + +// Returns true if the basic block ends with either +// i) GT_JMP or +// ii) tail call (implicit or explicit) +// +// Params: +// comp - Compiler instance +// fastTailCallsOnly - Only consider fast tail calls excluding tail calls via helper. +// +bool BasicBlock::endsWithTailCallOrJmp(Compiler* comp, bool fastTailCallsOnly /*=false*/) +{ + GenTreePtr tailCall = nullptr; + bool tailCallsConvertibleToLoopOnly = false; + return endsWithJmpMethod(comp) || + endsWithTailCall(comp, fastTailCallsOnly, tailCallsConvertibleToLoopOnly, &tailCall); +} + +//------------------------------------------------------------------------------ +// endsWithTailCall : Check if the block ends with a tail call. +// +// Arguments: +// comp - compiler instance +// fastTailCallsOnly - check for fast tail calls only +// tailCallsConvertibleToLoopOnly - check for tail calls convertible to loop only +// tailCall - a pointer to a tree that will be set to the call tree if the block +// ends with a tail call and will be set to nullptr otherwise. +// +// Return Value: +// true if the block ends with a tail call; false otherwise. +// +// Notes: +// At most one of fastTailCallsOnly and tailCallsConvertibleToLoopOnly flags can be true. +// +bool BasicBlock::endsWithTailCall(Compiler* comp, + bool fastTailCallsOnly, + bool tailCallsConvertibleToLoopOnly, + GenTree** tailCall) +{ + assert(!fastTailCallsOnly || !tailCallsConvertibleToLoopOnly); + *tailCall = nullptr; + bool result = false; + + // Is this a tail call? + // The reason for keeping this under RyuJIT is so as not to impact existing Jit32 x86 and arm + // targets. + if (comp->compTailCallUsed) + { + if (fastTailCallsOnly || tailCallsConvertibleToLoopOnly) + { + // Only fast tail calls or only tail calls convertible to loops + result = (bbFlags & BBF_HAS_JMP) && (bbJumpKind == BBJ_RETURN); + } + else + { + // Fast tail calls, tail calls convertible to loops, and tails calls dispatched via helper + result = (bbJumpKind == BBJ_THROW) || ((bbFlags & BBF_HAS_JMP) && (bbJumpKind == BBJ_RETURN)); + } + + if (result) + { + GenTree* lastNode = this->lastNode(); + if (lastNode->OperGet() == GT_CALL) + { + GenTreeCall* call = lastNode->AsCall(); + if (tailCallsConvertibleToLoopOnly) + { + result = call->IsTailCallConvertibleToLoop(); + } + else if (fastTailCallsOnly) + { + result = call->IsFastTailCall(); + } + else + { + result = call->IsTailCall(); + } + + if (result) + { + *tailCall = call; + } + } + else + { + result = false; + } + } + } + + return result; +} + +//------------------------------------------------------------------------------ +// endsWithTailCallConvertibleToLoop : Check if the block ends with a tail call convertible to loop. +// +// Arguments: +// comp - compiler instance +// tailCall - a pointer to a tree that will be set to the call tree if the block +// ends with a tail call convertible to loop and will be set to nullptr otherwise. +// +// Return Value: +// true if the block ends with a tail call convertible to loop. +// +bool BasicBlock::endsWithTailCallConvertibleToLoop(Compiler* comp, GenTree** tailCall) +{ + bool fastTailCallsOnly = false; + bool tailCallsConvertibleToLoopOnly = true; + return endsWithTailCall(comp, fastTailCallsOnly, tailCallsConvertibleToLoopOnly, tailCall); +} + +/***************************************************************************** + * + * Allocate a basic block but don't append it to the current BB list. + */ + +BasicBlock* Compiler::bbNewBasicBlock(BBjumpKinds jumpKind) +{ + BasicBlock* block; + + /* Allocate the block descriptor and zero it out */ + assert(fgSafeBasicBlockCreation); + + block = new (this, CMK_BasicBlock) BasicBlock; + +#if MEASURE_BLOCK_SIZE + BasicBlock::s_Count += 1; + BasicBlock::s_Size += sizeof(*block); +#endif + +#ifdef DEBUG + // fgLookupBB() is invalid until fgInitBBLookup() is called again. + fgBBs = (BasicBlock**)0xCDCD; +#endif + + // TODO-Throughput: The following memset is pretty expensive - do something else? + // Note that some fields have to be initialized to 0 (like bbFPStateX87) + memset(block, 0, sizeof(*block)); + + // scopeInfo needs to be able to differentiate between blocks which + // correspond to some instrs (and so may have some LocalVarInfo + // boundaries), or have been inserted by the JIT + block->bbCodeOffs = BAD_IL_OFFSET; + block->bbCodeOffsEnd = BAD_IL_OFFSET; + + /* Give the block a number, set the ancestor count and weight */ + + ++fgBBcount; + + if (compIsForInlining()) + { + block->bbNum = ++impInlineInfo->InlinerCompiler->fgBBNumMax; + } + else + { + block->bbNum = ++fgBBNumMax; + } + +#ifndef LEGACY_BACKEND + if (compRationalIRForm) + { + block->bbFlags |= BBF_IS_LIR; + } +#endif // !LEGACY_BACKEND + + block->bbRefs = 1; + block->bbWeight = BB_UNITY_WEIGHT; + + block->bbStkTempsIn = NO_BASE_TMP; + block->bbStkTempsOut = NO_BASE_TMP; + + block->bbEntryState = nullptr; + + /* Record the jump kind in the block */ + + block->bbJumpKind = jumpKind; + + if (jumpKind == BBJ_THROW) + { + block->bbSetRunRarely(); + } + +#ifdef DEBUG + if (verbose) + { + printf("New Basic Block BB%02u [%p] created.\n", block->bbNum, dspPtr(block)); + } +#endif + + // We will give all the blocks var sets after the number of tracked variables + // is determined and frozen. After that, if we dynamically create a basic block, + // we will initialize its var sets. + if (fgBBVarSetsInited) + { + VarSetOps::AssignNoCopy(this, block->bbVarUse, VarSetOps::MakeEmpty(this)); + VarSetOps::AssignNoCopy(this, block->bbVarDef, VarSetOps::MakeEmpty(this)); + VarSetOps::AssignNoCopy(this, block->bbLiveIn, VarSetOps::MakeEmpty(this)); + VarSetOps::AssignNoCopy(this, block->bbLiveOut, VarSetOps::MakeEmpty(this)); + VarSetOps::AssignNoCopy(this, block->bbScope, VarSetOps::MakeEmpty(this)); + } + else + { + VarSetOps::AssignNoCopy(this, block->bbVarUse, VarSetOps::UninitVal()); + VarSetOps::AssignNoCopy(this, block->bbVarDef, VarSetOps::UninitVal()); + VarSetOps::AssignNoCopy(this, block->bbLiveIn, VarSetOps::UninitVal()); + VarSetOps::AssignNoCopy(this, block->bbLiveOut, VarSetOps::UninitVal()); + VarSetOps::AssignNoCopy(this, block->bbScope, VarSetOps::UninitVal()); + } + + block->bbMemoryUse = emptyMemoryKindSet; + block->bbMemoryDef = emptyMemoryKindSet; + block->bbMemoryLiveIn = emptyMemoryKindSet; + block->bbMemoryLiveOut = emptyMemoryKindSet; + + for (MemoryKind memoryKind : allMemoryKinds()) + { + block->bbMemorySsaPhiFunc[memoryKind] = nullptr; + block->bbMemorySsaNumIn[memoryKind] = 0; + block->bbMemorySsaNumOut[memoryKind] = 0; + } + + // Make sure we reserve a NOT_IN_LOOP value that isn't a legal table index. + static_assert_no_msg(MAX_LOOP_NUM < BasicBlock::NOT_IN_LOOP); + + block->bbNatLoopNum = BasicBlock::NOT_IN_LOOP; + + return block; +} diff --git a/src/jit/block.h b/src/jit/block.h index 752219bdb7..d67891d4dd 100644 --- a/src/jit/block.h +++ b/src/jit/block.h @@ -139,10 +139,9 @@ enum ThisInitState struct EntryState { - ThisInitState thisInitialized : 8; // used to track whether the this ptr is initialized (we could use - // fewer bits here) - unsigned esStackDepth : 24; // size of esStack - StackEntry* esStack; // ptr to stack + ThisInitState thisInitialized; // used to track whether the this ptr is initialized. + unsigned esStackDepth; // size of esStack + StackEntry* esStack; // ptr to stack }; // Enumeration of the kinds of memory whose state changes the compiler tracks @@ -706,33 +705,42 @@ struct BasicBlock : private LIR::Range BBswtDesc* bbJumpSwt; // switch descriptor }; - // NumSucc() gives the number of successors, and GetSucc() allows one to iterate over them. + // NumSucc() gives the number of successors, and GetSucc() returns a given numbered successor. // - // The behavior of both for blocks that end in BBJ_EHFINALLYRET (a return from a finally or fault block) - // depends on whether "comp" is non-null. If it is null, then the block is considered to have no - // successor. If it is non-null, we figure out the actual successors. Some cases will want one behavior, - // other cases the other. For example, IL verification requires that these blocks end in an empty operand + // There are two versions of these functions: ones that take a Compiler* and ones that don't. You must + // always use a matching set. Thus, if you call NumSucc() without a Compiler*, you must also call + // GetSucc() without a Compiler*. + // + // The behavior of NumSucc()/GetSucc() is different when passed a Compiler* for blocks that end in: + // (1) BBJ_EHFINALLYRET (a return from a finally or fault block) + // (2) BBJ_EHFILTERRET (a return from EH filter block) + // (3) BBJ_SWITCH + // + // For BBJ_EHFINALLYRET, if no Compiler* is passed, then the block is considered to have no + // successor. If Compiler* is passed, we figure out the actual successors. Some cases will want one behavior, + // other cases the other. For example, IL verification requires that these blocks end in an empty operand // stack, and since the dataflow analysis of IL verification is concerned only with the contents of the // operand stack, we can consider the finally block to have no successors. But a more general dataflow // analysis that is tracking the contents of local variables might want to consider *all* successors, // and would pass the current Compiler object. // - // Similarly, BBJ_EHFILTERRET blocks are assumed to have no successors if "comp" is null; if non-null, - // NumSucc/GetSucc yields the first block of the try blocks handler. + // Similarly, BBJ_EHFILTERRET blocks are assumed to have no successors if Compiler* is not passed; if + // Compiler* is passed, NumSucc/GetSucc yields the first block of the try block's handler. // - // Also, the behavior for switches changes depending on the value of "comp". If it is null, then all - // switch successors are returned. If it is non-null, then only unique switch successors are returned; - // the duplicate successors are omitted. + // For BBJ_SWITCH, if Compiler* is not passed, then all switch successors are returned. If Compiler* + // is passed, then only unique switch successors are returned; the duplicate successors are omitted. // // Note that for BBJ_COND, which has two successors (fall through and condition true branch target), // only the unique targets are returned. Thus, if both targets are the same, NumSucc() will only return 1 // instead of 2. - // - // Returns the number of successors of "this". - unsigned NumSucc(Compiler* comp = nullptr); - // Returns the "i"th successor. Requires (0 <= i < NumSucc()). - BasicBlock* GetSucc(unsigned i, Compiler* comp = nullptr); + // NumSucc: Returns the number of successors of "this". + unsigned NumSucc(); + unsigned NumSucc(Compiler* comp); + + // GetSucc: Returns the "i"th successor. Requires (0 <= i < NumSucc()). + BasicBlock* GetSucc(unsigned i); + BasicBlock* GetSucc(unsigned i, Compiler* comp); BasicBlock* GetUniquePred(Compiler* comp); @@ -1067,8 +1075,6 @@ struct BasicBlock : private LIR::Range GenTree* firstNode(); GenTree* lastNode(); - bool containsStatement(GenTree* statement); - bool endsWithJmpMethod(Compiler* comp); bool endsWithTailCall(Compiler* comp, diff --git a/src/jit/codegenarm.cpp b/src/jit/codegenarm.cpp index 41bd8040ac..c28b27bf9b 100644 --- a/src/jit/codegenarm.cpp +++ b/src/jit/codegenarm.cpp @@ -166,9 +166,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre if (targetType == TYP_FLOAT) { // Get a temp integer register - regMaskTP tmpRegMask = tree->gtRsvdRegs; - regNumber tmpReg = genRegNumFromMask(tmpRegMask); - assert(tmpReg != REG_NA); + regNumber tmpReg = tree->GetSingleTempReg(); float f = forceCastToFloat(constValue); genSetRegToIcon(tmpReg, *((int*)(&f))); @@ -181,15 +179,8 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre unsigned* cv = (unsigned*)&constValue; // Get two temp integer registers - regMaskTP tmpRegsMask = tree->gtRsvdRegs; - regMaskTP tmpRegMask = genFindHighestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask - regNumber tmpReg1 = genRegNumFromMask(tmpRegMask); - assert(tmpReg1 != REG_NA); - - tmpRegsMask &= ~genRegMask(tmpReg1); // remove the bit for 'tmpReg1' - tmpRegMask = genFindHighestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask - regNumber tmpReg2 = genRegNumFromMask(tmpRegMask); - assert(tmpReg2 != REG_NA); + regNumber tmpReg1 = tree->ExtractTempReg(); + regNumber tmpReg2 = tree->GetSingleTempReg(); genSetRegToIcon(tmpReg1, cv[0]); genSetRegToIcon(tmpReg2, cv[1]); @@ -225,14 +216,8 @@ void CodeGen::genCodeForBinary(GenTree* treeNode) var_types targetType = treeNode->TypeGet(); emitter* emit = getEmitter(); - assert(oper == GT_ADD || oper == GT_SUB || oper == GT_ADD_LO || oper == GT_ADD_HI || oper == GT_SUB_LO || - oper == GT_SUB_HI || oper == GT_OR || oper == GT_XOR || oper == GT_AND); - - if ((oper == GT_ADD || oper == GT_SUB || oper == GT_ADD_HI || oper == GT_SUB_HI) && treeNode->gtOverflow()) - { - // This is also checked in the importer. - NYI("Overflow not yet implemented"); - } + assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_ADD_LO || oper == GT_ADD_HI || + oper == GT_SUB_LO || oper == GT_SUB_HI || oper == GT_OR || oper == GT_XOR || oper == GT_AND); GenTreePtr op1 = treeNode->gtGetOp1(); GenTreePtr op2 = treeNode->gtGetOp2(); @@ -412,32 +397,9 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) case GT_SUB_HI: case GT_ADD: case GT_SUB: - genConsumeOperands(treeNode->AsOp()); - genCodeForBinary(treeNode); - break; - case GT_MUL: - { genConsumeOperands(treeNode->AsOp()); - - const genTreeOps oper = treeNode->OperGet(); - if (treeNode->gtOverflow()) - { - // This is also checked in the importer. - NYI("Overflow not yet implemented"); - } - - GenTreePtr op1 = treeNode->gtGetOp1(); - GenTreePtr op2 = treeNode->gtGetOp2(); - instruction ins = genGetInsForOper(treeNode->OperGet(), targetType); - - // The arithmetic node must be sitting in a register (since it's not contained) - noway_assert(targetReg != REG_NA); - - regNumber r = emit->emitInsTernary(ins, emitTypeSize(treeNode), treeNode, op1, op2); - assert(r == targetReg); - } - genProduceReg(treeNode); + genCodeForBinary(treeNode); break; case GT_LSH: @@ -529,7 +491,21 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) unsigned varNum = treeNode->gtLclVarCommon.gtLclNum; assert(varNum < compiler->lvaCount); - emit->emitIns_R_S(ins_Move_Extend(targetType, treeNode->InReg()), size, targetReg, varNum, offs); + if (varTypeIsFloating(targetType)) + { + if (treeNode->InReg()) + { + NYI("GT_LCL_FLD with reg-to-reg floating point move"); + } + else + { + emit->emitIns_R_S(ins_Load(targetType), size, targetReg, varNum, offs); + } + } + else + { + emit->emitIns_R_S(ins_Move_Extend(targetType, treeNode->InReg()), size, targetReg, varNum, offs); + } } genProduceReg(treeNode); break; @@ -804,22 +780,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) break; case GT_JTRUE: - { - GenTree* cmp = treeNode->gtOp.gtOp1->gtEffectiveVal(); - assert(cmp->OperIsCompare()); - assert(compiler->compCurBB->bbJumpKind == BBJ_COND); - - // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp - // is governed by a flag NOT by the inherent type of the node - // TODO-ARM-CQ: Check if we can use the currently set flags. - CompareKind compareKind = ((cmp->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED; - - emitJumpKind jmpKind = genJumpKindForOper(cmp->gtOper, compareKind); - BasicBlock* jmpTarget = compiler->compCurBB->bbJumpDest; - - inst_JMP(jmpKind, jmpTarget); - } - break; + genCodeForJumpTrue(treeNode); + break; case GT_JCC: { @@ -1013,8 +975,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) case GT_NULLCHECK: { assert(!treeNode->gtOp.gtOp1->isContained()); - regNumber reg = genConsumeReg(treeNode->gtOp.gtOp1); - emit->emitIns_AR_R(INS_cmp, EA_4BYTE, reg, reg, 0); + regNumber addrReg = genConsumeReg(treeNode->gtOp.gtOp1); + emit->emitIns_R_R_I(INS_ldr, EA_4BYTE, targetReg, addrReg, 0); } break; @@ -1129,7 +1091,6 @@ void CodeGen::genLclHeap(GenTreePtr tree) // Also it used as temporary register in code generation // for storing allocation size regNumber regCnt = tree->gtRegNum; - regMaskTP tmpRegsMask = tree->gtRsvdRegs; regNumber pspSymReg = REG_NA; var_types type = genActualType(size->gtType); emitAttr easz = emitTypeSize(type); @@ -1198,10 +1159,7 @@ void CodeGen::genLclHeap(GenTreePtr tree) stackAdjustment += STACK_ALIGN; // Save a copy of PSPSym - assert(genCountBits(tmpRegsMask) >= 1); - regMaskTP pspSymRegMask = genFindLowestBit(tmpRegsMask); - tmpRegsMask &= ~pspSymRegMask; - pspSymReg = genRegNumFromMask(pspSymRegMask); + pspSymReg = tree->ExtractTempReg(); getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, pspSymReg, compiler->lvaPSPSym, 0); } #endif @@ -1266,12 +1224,7 @@ void CodeGen::genLclHeap(GenTreePtr tree) // Since we have to zero out the allocated memory AND ensure that RSP is always valid // by tickling the pages, we will just push 0's on the stack. - assert(tmpRegsMask != RBM_NONE); - assert(genCountBits(tmpRegsMask) >= 1); - - regMaskTP regCntMask = genFindLowestBit(tmpRegsMask); - tmpRegsMask &= ~regCntMask; - regNumber regTmp = genRegNumFromMask(regCntMask); + regNumber regTmp = tree->ExtractTempReg(); instGen_Set_Reg_To_Zero(EA_PTRSIZE, regTmp); // Loop: @@ -1285,7 +1238,7 @@ void CodeGen::genLclHeap(GenTreePtr tree) // If not done, loop // Note that regCnt is the number of bytes to stack allocate. assert(genIsValidIntReg(regCnt)); - getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, regCnt, regCnt, STACK_ALIGN); + getEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, regCnt, STACK_ALIGN, INS_FLAGS_SET); emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED); inst_JMP(jmpNotEqual, loop); } @@ -1323,15 +1276,13 @@ void CodeGen::genLclHeap(GenTreePtr tree) // // Setup the regTmp - assert(tmpRegsMask != RBM_NONE); - assert(genCountBits(tmpRegsMask) == 1); - regNumber regTmp = genRegNumFromMask(tmpRegsMask); + regNumber regTmp = tree->ExtractTempReg(); BasicBlock* loop = genCreateTempLabel(); BasicBlock* done = genCreateTempLabel(); // subs regCnt, SP, regCnt // regCnt now holds ultimate SP - getEmitter()->emitIns_R_R_R(INS_sub, EA_PTRSIZE, regCnt, REG_SPBASE, regCnt); + getEmitter()->emitIns_R_R_R(INS_sub, EA_PTRSIZE, regCnt, REG_SPBASE, regCnt, INS_FLAGS_SET); inst_JMP(EJ_vc, loop); // branch if the V flag is not set diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index 7f98221df8..7de19f9043 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -906,7 +906,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block) #endif assert(block != NULL); - assert(block->bbFlags && BBF_FUNCLET_BEG); + assert(block->bbFlags & BBF_FUNCLET_BEG); ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true); @@ -1441,9 +1441,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre else { // Get a temp integer register to compute long address. - regMaskTP addrRegMask = tree->gtRsvdRegs; - regNumber addrReg = genRegNumFromMask(addrRegMask); - noway_assert(addrReg != REG_NA); + regNumber addrReg = tree->GetSingleTempReg(); // We must load the FP constant from the constant pool // Emit a data section constant for the float or double constant. @@ -2441,30 +2439,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) break; case GT_JTRUE: - { - GenTree* cmp = treeNode->gtOp.gtOp1->gtEffectiveVal(); - assert(cmp->OperIsCompare()); - assert(compiler->compCurBB->bbJumpKind == BBJ_COND); - - // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp - // is governed by a flag NOT by the inherent type of the node - emitJumpKind jumpKind[2]; - bool branchToTrueLabel[2]; - genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel); - assert(jumpKind[0] != EJ_NONE); - - // On Arm64 the branches will always branch to the true label - assert(branchToTrueLabel[0]); - inst_JMP(jumpKind[0], compiler->compCurBB->bbJumpDest); - - if (jumpKind[1] != EJ_NONE) - { - // the second conditional branch always has to be to the true label - assert(branchToTrueLabel[1]); - inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest); - } - } - break; + genCodeForJumpTrue(treeNode); + break; case GT_RETURNTRAP: { @@ -2854,7 +2830,6 @@ void CodeGen::genLclHeap(GenTreePtr tree) noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL)); regNumber targetReg = tree->gtRegNum; - regMaskTP tmpRegsMask = tree->gtRsvdRegs; regNumber regCnt = REG_NA; regNumber pspSymReg = REG_NA; var_types type = genActualType(size->gtType); @@ -2923,17 +2898,16 @@ void CodeGen::genLclHeap(GenTreePtr tree) // since we don't need any internal registers. if (!hasPspSym && compiler->info.compInitMem) { - assert(genCountBits(tmpRegsMask) == 0); + assert(tree->AvailableTempRegCount() == 0); regCnt = targetReg; } else { - assert(genCountBits(tmpRegsMask) >= 1); - regMaskTP regCntMask = genFindLowestBit(tmpRegsMask); - tmpRegsMask &= ~regCntMask; - regCnt = genRegNumFromMask(regCntMask); + regCnt = tree->ExtractTempReg(); if (regCnt != targetReg) + { inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet()); + } } // Align to STACK_ALIGN @@ -2950,10 +2924,7 @@ void CodeGen::genLclHeap(GenTreePtr tree) stackAdjustment += STACK_ALIGN; // Save a copy of PSPSym - assert(genCountBits(tmpRegsMask) >= 1); - regMaskTP pspSymRegMask = genFindLowestBit(tmpRegsMask); - tmpRegsMask &= ~pspSymRegMask; - pspSymReg = genRegNumFromMask(pspSymRegMask); + pspSymReg = tree->ExtractTempReg(); getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, pspSymReg, compiler->lvaPSPSym, 0); } #endif @@ -3021,15 +2992,12 @@ void CodeGen::genLclHeap(GenTreePtr tree) assert(regCnt == REG_NA); if (!hasPspSym && compiler->info.compInitMem) { - assert(genCountBits(tmpRegsMask) == 0); + assert(tree->AvailableTempRegCount() == 0); regCnt = targetReg; } else { - assert(genCountBits(tmpRegsMask) >= 1); - regMaskTP regCntMask = genFindLowestBit(tmpRegsMask); - tmpRegsMask &= ~regCntMask; - regCnt = genRegNumFromMask(regCntMask); + regCnt = tree->ExtractTempReg(); } genSetRegToIcon(regCnt, amount, ((int)amount == amount) ? TYP_INT : TYP_LONG); } @@ -3094,9 +3062,7 @@ void CodeGen::genLclHeap(GenTreePtr tree) // // Setup the regTmp - assert(tmpRegsMask != RBM_NONE); - assert(genCountBits(tmpRegsMask) == 1); - regNumber regTmp = genRegNumFromMask(tmpRegsMask); + regNumber regTmp = tree->GetSingleTempReg(); BasicBlock* loop = genCreateTempLabel(); BasicBlock* done = genCreateTempLabel(); @@ -3391,13 +3357,11 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) unsigned offset = 0; // Grab the integer temp register to emit the loads and stores. - regMaskTP tmpMask = genFindLowestBit(cpBlkNode->gtRsvdRegs & RBM_ALLINT); - regNumber tmpReg = genRegNumFromMask(tmpMask); + regNumber tmpReg = cpBlkNode->ExtractTempReg(RBM_ALLINT); if (size >= 2 * REGSIZE_BYTES) { - regMaskTP tmp2Mask = cpBlkNode->gtRsvdRegs & RBM_ALLINT & ~tmpMask; - regNumber tmp2Reg = genRegNumFromMask(tmp2Mask); + regNumber tmp2Reg = cpBlkNode->ExtractTempReg(RBM_ALLINT); size_t slots = size / (2 * REGSIZE_BYTES); @@ -3498,13 +3462,8 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet()); // Temp register used to perform the sequence of loads and stores. - regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs); - -#ifdef DEBUG - assert(cpObjNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(cpObjNode->gtRsvdRegs) == 1); + regNumber tmpReg = cpObjNode->GetSingleTempReg(); assert(genIsValidIntReg(tmpReg)); -#endif // DEBUG unsigned slots = cpObjNode->gtSlots; emitter* emit = getEmitter(); @@ -3571,7 +3530,7 @@ void CodeGen::genTableBasedSwitch(GenTree* treeNode) regNumber idxReg = treeNode->gtOp.gtOp1->gtRegNum; regNumber baseReg = treeNode->gtOp.gtOp2->gtRegNum; - regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + regNumber tmpReg = treeNode->GetSingleTempReg(); // load the ip-relative offset (which is relative to start of fgFirstBB) getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, baseReg, baseReg, idxReg, INS_OPTS_LSL); @@ -4022,9 +3981,7 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) if (offset != 0) { - regMaskTP tmpRegMask = lea->gtRsvdRegs; - regNumber tmpReg = genRegNumFromMask(tmpRegMask); - noway_assert(tmpReg != REG_NA); + regNumber tmpReg = lea->GetSingleTempReg(); if (emitter::emitIns_valid_imm_for_add(offset, EA_8BYTE)) { @@ -4041,7 +3998,6 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) // Then compute target reg from [tmpReg + offset] emit->emitIns_R_R_I(INS_add, size, lea->gtRegNum, tmpReg, offset); - ; } else // large offset { @@ -4091,9 +4047,7 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) else { // We require a tmpReg to hold the offset - regMaskTP tmpRegMask = lea->gtRsvdRegs; - regNumber tmpReg = genRegNumFromMask(tmpRegMask); - noway_assert(tmpReg != REG_NA); + regNumber tmpReg = lea->GetSingleTempReg(); // First load tmpReg with the large offset constant instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); @@ -4116,131 +4070,6 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea) } //------------------------------------------------------------------------------------------- -// genJumpKindsForTree: Determine the number and kinds of conditional branches -// necessary to implement the given GT_CMP node -// -// Arguments: -// cmpTree - (input) The GenTree node that is used to set the Condition codes -// - The GenTree Relop node that was used to set the Condition codes -// jmpKind[2] - (output) One or two conditional branch instructions -// jmpToTrueLabel[2] - (output) On Arm64 both branches will always branch to the true label -// -// Return Value: -// Sets the proper values into the array elements of jmpKind[] and jmpToTrueLabel[] -// -// Assumptions: -// At least one conditional branch instruction will be returned. -// Typically only one conditional branch is needed -// and the second jmpKind[] value is set to EJ_NONE -//------------------------------------------------------------------------------------------- - -// static -void CodeGen::genJumpKindsForTree(GenTreePtr cmpTree, emitJumpKind jmpKind[2], bool jmpToTrueLabel[2]) -{ - // On Arm64 both branches will always branch to the true label - jmpToTrueLabel[0] = true; - jmpToTrueLabel[1] = true; - - // For integer comparisons just use genJumpKindForOper - if (!varTypeIsFloating(cmpTree->gtOp.gtOp1->gtEffectiveVal())) - { - CompareKind compareKind = ((cmpTree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED; - jmpKind[0] = genJumpKindForOper(cmpTree->gtOper, compareKind); - jmpKind[1] = EJ_NONE; - } - else // We have a Floating Point Compare operation - { - assert(cmpTree->OperIsCompare()); - - // For details on this mapping, see the ARM64 Condition Code - // table at section C1.2.3 in the ARMV8 architecture manual - // - - // We must check the GTF_RELOP_NAN_UN to find out - // if we need to branch when we have a NaN operand. - // - if ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) != 0) - { - // Must branch if we have an NaN, unordered - switch (cmpTree->gtOper) - { - case GT_EQ: - jmpKind[0] = EJ_eq; // branch or set when equal (and no NaN's) - jmpKind[1] = EJ_vs; // branch or set when we have a NaN - break; - - case GT_NE: - jmpKind[0] = EJ_ne; // branch or set when not equal (or have NaN's) - jmpKind[1] = EJ_NONE; - break; - - case GT_LT: - jmpKind[0] = EJ_lt; // branch or set when less than (or have NaN's) - jmpKind[1] = EJ_NONE; - break; - - case GT_LE: - jmpKind[0] = EJ_le; // branch or set when less than or equal (or have NaN's) - jmpKind[1] = EJ_NONE; - break; - - case GT_GT: - jmpKind[0] = EJ_hi; // branch or set when greater than (or have NaN's) - jmpKind[1] = EJ_NONE; - break; - - case GT_GE: - jmpKind[0] = EJ_hs; // branch or set when greater than or equal (or have NaN's) - jmpKind[1] = EJ_NONE; - break; - - default: - unreached(); - } - } - else // ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) == 0) - { - // Do not branch if we have an NaN, unordered - switch (cmpTree->gtOper) - { - case GT_EQ: - jmpKind[0] = EJ_eq; // branch or set when equal (and no NaN's) - jmpKind[1] = EJ_NONE; - break; - - case GT_NE: - jmpKind[0] = EJ_gt; // branch or set when greater than (and no NaN's) - jmpKind[1] = EJ_lo; // branch or set when less than (and no NaN's) - break; - - case GT_LT: - jmpKind[0] = EJ_lo; // branch or set when less than (and no NaN's) - jmpKind[1] = EJ_NONE; - break; - - case GT_LE: - jmpKind[0] = EJ_ls; // branch or set when less than or equal (and no NaN's) - jmpKind[1] = EJ_NONE; - break; - - case GT_GT: - jmpKind[0] = EJ_gt; // branch or set when greater than (and no NaN's) - jmpKind[1] = EJ_NONE; - break; - - case GT_GE: - jmpKind[0] = EJ_ge; // branch or set when greater than or equal (and no NaN's) - jmpKind[1] = EJ_NONE; - break; - - default: - unreached(); - } - } - } -} - -//------------------------------------------------------------------------------------------- // genSetRegToCond: Set a register 'dstReg' to the appropriate one or zero value // corresponding to a binary Relational operator result. // @@ -4484,9 +4313,8 @@ void CodeGen::genCkfinite(GenTreePtr treeNode) emitter* emit = getEmitter(); // Extract exponent into a register. - regNumber intReg = genRegNumFromMask(treeNode->gtRsvdRegs); + regNumber intReg = treeNode->GetSingleTempReg(); regNumber fpReg = genConsumeReg(op1); - assert(intReg != REG_NA); emit->emitIns_R_R(ins_Copy(targetType), emitTypeSize(treeNode), intReg, fpReg); emit->emitIns_R_R_I(INS_lsr, emitTypeSize(targetType), intReg, intReg, shiftAmount); diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp index af9fdfed9c..c541472284 100644 --- a/src/jit/codegenarmarch.cpp +++ b/src/jit/codegenarmarch.cpp @@ -212,13 +212,10 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) // We will copy this struct to the stack, possibly using a ldp instruction // Setup loReg and hiReg from the internal registers that we reserved in lower. // - regNumber loReg = REG_NA; - regNumber hiReg = REG_NA; + regNumber loReg = treeNode->ExtractTempReg(); + regNumber hiReg = treeNode->GetSingleTempReg(); regNumber addrReg = REG_NA; - // In lowerArm64/TreeNodeInfoInitPutArgStk we have reserved two internal integer registers - genGetRegPairFromMask(treeNode->gtRsvdRegs, &loReg, &hiReg); - GenTreeLclVarCommon* varNode = nullptr; GenTreePtr addrNode = nullptr; @@ -709,15 +706,9 @@ void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex) regNumber tgtReg = arrIndex->gtRegNum; noway_assert(tgtReg != REG_NA); - // We will use a temp register to load the lower bound and dimension size values - // - regMaskTP tmpRegsMask = arrIndex->gtRsvdRegs; // there will be two bits set - tmpRegsMask &= ~genRegMask(tgtReg); // remove the bit for 'tgtReg' from 'tmpRegsMask' - - regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask - regNumber tmpReg = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask - noway_assert(tmpReg != REG_NA); + // We will use a temp register to load the lower bound and dimension size values. + regNumber tmpReg = arrIndex->GetSingleTempReg(); assert(tgtReg != tmpReg); unsigned dim = arrIndex->gtCurrDim; @@ -773,25 +764,17 @@ void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) noway_assert(indexReg != REG_NA); noway_assert(arrReg != REG_NA); - regMaskTP tmpRegMask = arrOffset->gtRsvdRegs; - regNumber tmpReg = genRegNumFromMask(tmpRegMask); - noway_assert(tmpReg != REG_NA); + regNumber tmpReg = arrOffset->GetSingleTempReg(); unsigned dim = arrOffset->gtCurrDim; unsigned rank = arrOffset->gtArrRank; var_types elemType = arrOffset->gtArrElemType; unsigned offset = genOffsetOfMDArrayDimensionSize(elemType, rank, dim); -// Load tmpReg with the dimension size and evaluate -// tgtReg = offsetReg*dim_size + indexReg. -#if defined(_TARGET_ARM_) - emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load - emit->emitIns_R_R_R(INS_MUL, EA_4BYTE, tgtReg, tmpReg, offsetReg); - emit->emitIns_R_R_R(INS_add, EA_4BYTE, tgtReg, tgtReg, indexReg); -#elif defined(_TARGET_ARM64_) - emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_8BYTE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load - emit->emitIns_R_R_R_R(INS_madd, EA_4BYTE, tgtReg, tmpReg, offsetReg, indexReg); -#endif // _TARGET_* + // Load tmpReg with the dimension size and evaluate + // tgtReg = offsetReg*tmpReg + indexReg. + emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_PTRSIZE, tmpReg, arrReg, offset); + emit->emitIns_R_R_R_R(INS_MULADD, EA_PTRSIZE, tgtReg, tmpReg, offsetReg, indexReg); } else { @@ -1073,12 +1056,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call) const regNumber regThis = genGetThisArgReg(call); #if defined(_TARGET_ARM_) - regMaskTP tempMask = genFindLowestBit(call->gtRsvdRegs); - const regNumber tmpReg = genRegNumFromMask(tempMask); - if (genCountBits(call->gtRsvdRegs) > 1) - { - call->gtRsvdRegs &= ~tempMask; - } + const regNumber tmpReg = call->ExtractTempReg(); getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, regThis, 0); #elif defined(_TARGET_ARM64_) getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, regThis, 0); @@ -1239,7 +1217,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call) #ifdef _TARGET_ARM_ if (!arm_Valid_Imm_For_BL((ssize_t)addr)) { - regNumber tmpReg = genRegNumFromMask(call->gtRsvdRegs); + regNumber tmpReg = call->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, tmpReg, (ssize_t)addr); genEmitCall(emitter::EC_INDIR_R, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) NULL, retSize, ilOffset, tmpReg); } @@ -1388,7 +1366,7 @@ void CodeGen::genIntToIntCast(GenTreePtr treeNode) regNumber sourceReg = castOp->gtRegNum; // For Long to Int conversion we will have a reserved integer register to hold the immediate mask - regNumber tmpReg = (treeNode->gtRsvdRegs == RBM_NONE) ? REG_NA : genRegNumFromMask(treeNode->gtRsvdRegs); + regNumber tmpReg = (treeNode->AvailableTempRegCount() == 0) ? REG_NA : treeNode->GetSingleTempReg(); assert(genIsValidIntReg(targetReg)); assert(genIsValidIntReg(sourceReg)); @@ -1428,7 +1406,20 @@ void CodeGen::genIntToIntCast(GenTreePtr treeNode) // we only have to check for any bits set in 'typeMask' noway_assert(castInfo.typeMask != 0); +#if defined(_TARGET_ARM_) + if (arm_Valid_Imm_For_Instr(INS_tst, castInfo.typeMask, INS_FLAGS_DONT_CARE)) + { + emit->emitIns_R_I(INS_tst, cmpSize, sourceReg, castInfo.typeMask); + } + else + { + noway_assert(tmpReg != REG_NA); + instGen_Set_Reg_To_Imm(cmpSize, tmpReg, castInfo.typeMask); + emit->emitIns_R_R(INS_tst, cmpSize, sourceReg, tmpReg); + } +#elif defined(_TARGET_ARM64_) emit->emitIns_R_I(INS_tst, cmpSize, sourceReg, castInfo.typeMask); +#endif // _TARGET_ARM* emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED); genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW); } @@ -1682,6 +1673,163 @@ void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface } +//------------------------------------------------------------------------------------------- +// genJumpKindsForTree: Determine the number and kinds of conditional branches +// necessary to implement the given GT_CMP node +// +// Arguments: +// cmpTree - (input) The GenTree node that is used to set the Condition codes +// - The GenTree Relop node that was used to set the Condition codes +// jmpKind[2] - (output) One or two conditional branch instructions +// jmpToTrueLabel[2] - (output) On Arm64 both branches will always branch to the true label +// +// Return Value: +// Sets the proper values into the array elements of jmpKind[] and jmpToTrueLabel[] +// +// Assumptions: +// At least one conditional branch instruction will be returned. +// Typically only one conditional branch is needed +// and the second jmpKind[] value is set to EJ_NONE +// +void CodeGen::genJumpKindsForTree(GenTreePtr cmpTree, emitJumpKind jmpKind[2], bool jmpToTrueLabel[2]) +{ + // On ARM both branches will always branch to the true label + jmpToTrueLabel[0] = true; + jmpToTrueLabel[1] = true; + + // For integer comparisons just use genJumpKindForOper + if (!varTypeIsFloating(cmpTree->gtOp.gtOp1->gtEffectiveVal())) + { + CompareKind compareKind = ((cmpTree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED; + jmpKind[0] = genJumpKindForOper(cmpTree->gtOper, compareKind); + jmpKind[1] = EJ_NONE; + } + else // We have a Floating Point Compare operation + { + assert(cmpTree->OperIsCompare()); + + // For details on this mapping, see the ARM Condition Code table + // at section A8.3 in the ARMv7 architecture manual or + // at section C1.2.3 in the ARMV8 architecture manual. + + // We must check the GTF_RELOP_NAN_UN to find out + // if we need to branch when we have a NaN operand. + // + if ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) != 0) + { + // Must branch if we have an NaN, unordered + switch (cmpTree->gtOper) + { + case GT_EQ: + jmpKind[0] = EJ_eq; // branch or set when equal (and no NaN's) + jmpKind[1] = EJ_vs; // branch or set when we have a NaN + break; + + case GT_NE: + jmpKind[0] = EJ_ne; // branch or set when not equal (or have NaN's) + jmpKind[1] = EJ_NONE; + break; + + case GT_LT: + jmpKind[0] = EJ_lt; // branch or set when less than (or have NaN's) + jmpKind[1] = EJ_NONE; + break; + + case GT_LE: + jmpKind[0] = EJ_le; // branch or set when less than or equal (or have NaN's) + jmpKind[1] = EJ_NONE; + break; + + case GT_GT: + jmpKind[0] = EJ_hi; // branch or set when greater than (or have NaN's) + jmpKind[1] = EJ_NONE; + break; + + case GT_GE: + jmpKind[0] = EJ_hs; // branch or set when greater than or equal (or have NaN's) + jmpKind[1] = EJ_NONE; + break; + + default: + unreached(); + } + } + else // ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) == 0) + { + // Do not branch if we have an NaN, unordered + switch (cmpTree->gtOper) + { + case GT_EQ: + jmpKind[0] = EJ_eq; // branch or set when equal (and no NaN's) + jmpKind[1] = EJ_NONE; + break; + + case GT_NE: + jmpKind[0] = EJ_gt; // branch or set when greater than (and no NaN's) + jmpKind[1] = EJ_lo; // branch or set when less than (and no NaN's) + break; + + case GT_LT: + jmpKind[0] = EJ_lo; // branch or set when less than (and no NaN's) + jmpKind[1] = EJ_NONE; + break; + + case GT_LE: + jmpKind[0] = EJ_ls; // branch or set when less than or equal (and no NaN's) + jmpKind[1] = EJ_NONE; + break; + + case GT_GT: + jmpKind[0] = EJ_gt; // branch or set when greater than (and no NaN's) + jmpKind[1] = EJ_NONE; + break; + + case GT_GE: + jmpKind[0] = EJ_ge; // branch or set when greater than or equal (and no NaN's) + jmpKind[1] = EJ_NONE; + break; + + default: + unreached(); + } + } + } +} + +//------------------------------------------------------------------------ +// genCodeForJumpTrue: Generates code for jmpTrue statement. +// +// Arguments: +// tree - The GT_JTRUE tree node. +// +// Return Value: +// None +// +void CodeGen::genCodeForJumpTrue(GenTreePtr tree) +{ + GenTree* cmp = tree->gtOp.gtOp1->gtEffectiveVal(); + assert(cmp->OperIsCompare()); + assert(compiler->compCurBB->bbJumpKind == BBJ_COND); + + // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp + // is governed by a flag NOT by the inherent type of the node + emitJumpKind jumpKind[2]; + bool branchToTrueLabel[2]; + genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel); + assert(jumpKind[0] != EJ_NONE); + + // On ARM the branches will always branch to the true label + assert(branchToTrueLabel[0]); + inst_JMP(jumpKind[0], compiler->compCurBB->bbJumpDest); + + if (jumpKind[1] != EJ_NONE) + { + // the second conditional branch always has to be to the true label + assert(branchToTrueLabel[1]); + inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest); + } +} + #endif // _TARGET_ARMARCH_ #endif // !LEGACY_BACKEND diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index 89d6a4ca34..64561de567 100644 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -557,30 +557,6 @@ regMaskTP CodeGenInterface::genGetRegMask(GenTreePtr tree) return regMask; } -//------------------------------------------------------------------------ -// getRegistersFromMask: Given a register mask return the two registers -// specified by the mask. -// -// Arguments: -// regPairMask: a register mask that has exactly two bits set -// Return values: -// pLoReg: the address of where to write the first register -// pHiReg: the address of where to write the second register -// -void CodeGenInterface::genGetRegPairFromMask(regMaskTP regPairMask, regNumber* pLoReg, regNumber* pHiReg) -{ - assert(genCountBits(regPairMask) == 2); - - regMaskTP loMask = genFindLowestBit(regPairMask); // set loMask to a one-bit mask - regMaskTP hiMask = regPairMask - loMask; // set hiMask to the other bit that was in tmpRegMask - - regNumber loReg = genRegNumFromMask(loMask); // set loReg from loMask - regNumber hiReg = genRegNumFromMask(hiMask); // set hiReg from hiMask - - *pLoReg = loReg; - *pHiReg = hiReg; -} - // The given lclVar is either going live (being born) or dying. // It might be both going live and dying (that is, it is a dead store) under MinOpts. // Update regSet.rsMaskVars accordingly. @@ -9917,7 +9893,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block) #endif assert(block != NULL); - assert(block->bbFlags && BBF_FUNCLET_BEG); + assert(block->bbFlags & BBF_FUNCLET_BEG); ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true); diff --git a/src/jit/codegeninterface.h b/src/jit/codegeninterface.h index 08d854eaaf..a993ddb629 100644 --- a/src/jit/codegeninterface.h +++ b/src/jit/codegeninterface.h @@ -149,8 +149,6 @@ protected: regMaskTP genLiveMask(VARSET_VALARG_TP liveSet); #endif - void genGetRegPairFromMask(regMaskTP regPairMask, regNumber* pLoReg, regNumber* pHiReg); - // The following property indicates whether the current method sets up // an explicit stack frame or not. private: diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h index fa0c85c749..715e87a944 100644 --- a/src/jit/codegenlinear.h +++ b/src/jit/codegenlinear.h @@ -268,6 +268,8 @@ void genJmpMethod(GenTreePtr jmp); BasicBlock* genCallFinally(BasicBlock* block); +void genCodeForJumpTrue(GenTreePtr tree); + #if FEATURE_EH_FUNCLETS void genEHCatchRet(BasicBlock* block); #else // !FEATURE_EH_FUNCLETS diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index 23c2a186a4..d693ff914a 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -617,7 +617,7 @@ void CodeGen::genCodeForLongUMod(GenTreeOp* node) // xor edx, edx // div divisor->gtRegNum // mov eax, temp - const regNumber tempReg = genRegNumFromMask(node->gtRsvdRegs); + const regNumber tempReg = node->GetSingleTempReg(); inst_RV_RV(INS_mov, tempReg, REG_EAX, TYP_INT); inst_RV_RV(INS_mov, REG_EAX, REG_EDX, TYP_INT); instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX); @@ -1234,6 +1234,66 @@ void CodeGen::genReturn(GenTreePtr treeNode) #endif } +//------------------------------------------------------------------------ +// genCodeForJumpTrue: Generates code for jmpTrue statement. +// +// Arguments: +// tree - The GT_JTRUE tree node. +// +// Return Value: +// None +// +void CodeGen::genCodeForJumpTrue(GenTreePtr tree) +{ + GenTree* cmp = tree->gtOp.gtOp1; + + assert(cmp->OperIsCompare()); + assert(compiler->compCurBB->bbJumpKind == BBJ_COND); + +#if !defined(_TARGET_64BIT_) + // Long-typed compares should have been handled by Lowering::LowerCompare. + assert(!varTypeIsLong(cmp->gtGetOp1())); +#endif + + // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp + // is governed by a flag NOT by the inherent type of the node + // TODO-XArch-CQ: Check if we can use the currently set flags. + emitJumpKind jumpKind[2]; + bool branchToTrueLabel[2]; + genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel); + + BasicBlock* skipLabel = nullptr; + if (jumpKind[0] != EJ_NONE) + { + BasicBlock* jmpTarget; + if (branchToTrueLabel[0]) + { + jmpTarget = compiler->compCurBB->bbJumpDest; + } + else + { + // This case arises only for ordered GT_EQ right now + assert((cmp->gtOper == GT_EQ) && ((cmp->gtFlags & GTF_RELOP_NAN_UN) == 0)); + skipLabel = genCreateTempLabel(); + jmpTarget = skipLabel; + } + + inst_JMP(jumpKind[0], jmpTarget); + } + + if (jumpKind[1] != EJ_NONE) + { + // the second conditional branch always has to be to the true label + assert(branchToTrueLabel[1]); + inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest); + } + + if (skipLabel != nullptr) + { + genDefineTempLabel(skipLabel); + } +} + /***************************************************************************** * * Generate code for a single node in the tree. @@ -1844,56 +1904,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) break; case GT_JTRUE: - { - GenTree* cmp = treeNode->gtOp.gtOp1; - - assert(cmp->OperIsCompare()); - assert(compiler->compCurBB->bbJumpKind == BBJ_COND); - -#if !defined(_TARGET_64BIT_) - // Long-typed compares should have been handled by Lowering::LowerCompare. - assert(!varTypeIsLong(cmp->gtGetOp1())); -#endif - - // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp - // is governed by a flag NOT by the inherent type of the node - // TODO-XArch-CQ: Check if we can use the currently set flags. - emitJumpKind jumpKind[2]; - bool branchToTrueLabel[2]; - genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel); - - BasicBlock* skipLabel = nullptr; - if (jumpKind[0] != EJ_NONE) - { - BasicBlock* jmpTarget; - if (branchToTrueLabel[0]) - { - jmpTarget = compiler->compCurBB->bbJumpDest; - } - else - { - // This case arises only for ordered GT_EQ right now - assert((cmp->gtOper == GT_EQ) && ((cmp->gtFlags & GTF_RELOP_NAN_UN) == 0)); - skipLabel = genCreateTempLabel(); - jmpTarget = skipLabel; - } - - inst_JMP(jumpKind[0], jmpTarget); - } - - if (jumpKind[1] != EJ_NONE) - { - // the second conditional branch always has to be to the true label - assert(branchToTrueLabel[1]); - inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest); - } - - if (skipLabel != nullptr) - { - genDefineTempLabel(skipLabel); - } - } - break; + genCodeForJumpTrue(treeNode); + break; case GT_JCC: { @@ -1924,9 +1936,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) inst_JMP(jmpEqual, skipLabel); // emit the call to the EE-helper that stops for GC (or other reasons) - assert(treeNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(treeNode->gtRsvdRegs) == 1); - regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + regNumber tmpReg = treeNode->GetSingleTempReg(); assert(genIsValidIntReg(tmpReg)); genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN, tmpReg); @@ -2431,12 +2441,11 @@ void CodeGen::genLclHeap(GenTreePtr tree) GenTreePtr size = tree->gtOp.gtOp1; noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL)); - regNumber targetReg = tree->gtRegNum; - regMaskTP tmpRegsMask = tree->gtRsvdRegs; - regNumber regCnt = REG_NA; - var_types type = genActualType(size->gtType); - emitAttr easz = emitTypeSize(type); - BasicBlock* endLabel = nullptr; + regNumber targetReg = tree->gtRegNum; + regNumber regCnt = REG_NA; + var_types type = genActualType(size->gtType); + emitAttr easz = emitTypeSize(type); + BasicBlock* endLabel = nullptr; #ifdef DEBUG // Verify ESP @@ -2494,15 +2503,12 @@ void CodeGen::genLclHeap(GenTreePtr tree) // since we don't need any internal registers. if (compiler->info.compInitMem) { - assert(genCountBits(tmpRegsMask) == 0); + assert(tree->AvailableTempRegCount() == 0); regCnt = targetReg; } else { - assert(genCountBits(tmpRegsMask) >= 1); - regMaskTP regCntMask = genFindLowestBit(tmpRegsMask); - tmpRegsMask &= ~regCntMask; - regCnt = genRegNumFromMask(regCntMask); + regCnt = tree->ExtractTempReg(); if (regCnt != targetReg) { // Above, we put the size in targetReg. Now, copy it to our new temp register if necessary. @@ -2594,15 +2600,12 @@ void CodeGen::genLclHeap(GenTreePtr tree) assert(regCnt == REG_NA); if (compiler->info.compInitMem) { - assert(genCountBits(tmpRegsMask) == 0); + assert(tree->AvailableTempRegCount() == 0); regCnt = targetReg; } else { - assert(genCountBits(tmpRegsMask) >= 1); - regMaskTP regCntMask = genFindLowestBit(tmpRegsMask); - tmpRegsMask &= ~regCntMask; - regCnt = genRegNumFromMask(regCntMask); + regCnt = tree->ExtractTempReg(); } } @@ -2717,9 +2720,7 @@ void CodeGen::genLclHeap(GenTreePtr tree) // This is a harmless trick to avoid the emitter trying to track the // decrement of the ESP - we do the subtraction in another reg instead // of adjusting ESP directly. - assert(tmpRegsMask != RBM_NONE); - assert(genCountBits(tmpRegsMask) == 1); - regNumber regTmp = genRegNumFromMask(tmpRegsMask); + regNumber regTmp = tree->GetSingleTempReg(); inst_RV_RV(INS_mov, regTmp, REG_SPBASE, TYP_I_IMPL); inst_RV_IV(INS_sub, regTmp, compiler->eeGetPageSize(), EA_PTRSIZE); @@ -2922,13 +2923,8 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode) // Perform an unroll using SSE2 loads and stores. if (size >= XMM_REGSIZE_BYTES) { - regNumber tmpReg = genRegNumFromMask(initBlkNode->gtRsvdRegs); - -#ifdef DEBUG - assert(initBlkNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(initBlkNode->gtRsvdRegs) == 1); + regNumber tmpReg = initBlkNode->GetSingleTempReg(); assert(genIsValidFloatReg(tmpReg)); -#endif // DEBUG if (initVal->gtIntCon.gtIconVal != 0) { @@ -3122,8 +3118,7 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) if (size >= XMM_REGSIZE_BYTES) { - assert(cpBlkNode->gtRsvdRegs != RBM_NONE); - regNumber xmmReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLFLOAT); + regNumber xmmReg = cpBlkNode->GetSingleTempReg(RBM_ALLFLOAT); assert(genIsValidFloatReg(xmmReg)); size_t slots = size / XMM_REGSIZE_BYTES; @@ -3144,7 +3139,7 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) if ((size & 0xf) != 0) { // Grab the integer temp register to emit the remaining loads and stores. - regNumber tmpReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLINT); + regNumber tmpReg = cpBlkNode->GetSingleTempReg(RBM_ALLINT); if ((size & 8) != 0) { @@ -3390,22 +3385,22 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode) // less than 16 bytes, we will just be using pushes if (size >= 8) { - xmmTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT); + xmmTmpReg = putArgNode->GetSingleTempReg(RBM_ALLFLOAT); longTmpReg = xmmTmpReg; } if ((size & 0x7) != 0) { - intTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT); + intTmpReg = putArgNode->GetSingleTempReg(RBM_ALLINT); } #else // !_TARGET_X86_ // On x64 we use an XMM register only for 16-byte chunks. if (size >= XMM_REGSIZE_BYTES) { - xmmTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT); + xmmTmpReg = putArgNode->GetSingleTempReg(RBM_ALLFLOAT); } if ((size & 0xf) != 0) { - intTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT); + intTmpReg = putArgNode->GetSingleTempReg(RBM_ALLINT); longTmpReg = intTmpReg; } #endif // !_TARGET_X86_ @@ -3418,7 +3413,6 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode) #ifdef _TARGET_X86_ assert(!m_pushStkArg); #endif // _TARGET_X86_ - assert(putArgNode->gtRsvdRegs != RBM_NONE); size_t slots = size / XMM_REGSIZE_BYTES; assert(putArgNode->gtGetOp1()->isContained()); @@ -3578,7 +3572,6 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) bool dstOnStack = dstAddr->OperIsLocalAddr(); #ifdef DEBUG - bool isRepMovspUsed = false; assert(dstAddr->isUsedFromReg()); @@ -3629,13 +3622,9 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) { if (slots >= CPOBJ_NONGC_SLOTS_LIMIT) { -#ifdef DEBUG // If the destination of the CpObj is on the stack, make sure we allocated // RCX to emit the movsp (alias for movsd or movsq for 32 and 64 bits respectively). assert((cpObjNode->gtRsvdRegs & RBM_RCX) != 0); - regNumber tmpReg = REG_RCX; - isRepMovspUsed = true; -#endif // DEBUG getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, slots); instGen(INS_r_movsp); @@ -3685,13 +3674,10 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) } else { -#ifdef DEBUG // Otherwise, we can save code-size and improve CQ by emitting // rep movsp (alias for movsd/movsq for x86/x64) assert((cpObjNode->gtRsvdRegs & RBM_RCX) != 0); - regNumber tmpReg = REG_RCX; - isRepMovspUsed = true; -#endif // DEBUG + getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount); instGen(INS_r_movsp); } @@ -3767,7 +3753,7 @@ void CodeGen::genTableBasedSwitch(GenTree* treeNode) regNumber idxReg = treeNode->gtOp.gtOp1->gtRegNum; regNumber baseReg = treeNode->gtOp.gtOp2->gtRegNum; - regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + regNumber tmpReg = treeNode->GetSingleTempReg(); // load the ip-relative offset (which is relative to start of fgFirstBB) getEmitter()->emitIns_R_ARX(INS_mov, EA_4BYTE, baseReg, baseReg, idxReg, 4, 0); @@ -4057,8 +4043,7 @@ void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) offsetReg = genConsumeReg(offsetNode); // We will use a temp register for the offset*scale+effectiveIndex computation. - regMaskTP tmpRegMask = arrOffset->gtRsvdRegs; - tmpReg = genRegNumFromMask(tmpRegMask); + tmpReg = arrOffset->GetSingleTempReg(); } else { @@ -5006,9 +4991,10 @@ void CodeGen::genCallInstruction(GenTreeCall* call) } #if defined(_TARGET_X86_) - bool fCallerPop = (call->gtFlags & GTF_CALL_POP_ARGS) != 0; + bool fCallerPop = call->CallerPop(); #ifdef UNIX_X86_ABI + if (!call->IsUnmanaged()) { CorInfoCallConv callConv = CORINFO_CALLCONV_DEFAULT; @@ -6371,7 +6357,7 @@ void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree) #if !defined(_TARGET_64BIT_) //------------------------------------------------------------------------ -// genIntToIntCast: Generate code for long to int casts on x86. +// genLongToIntCast: Generate code for long to int casts on x86. // // Arguments: // cast - The GT_CAST node @@ -6481,14 +6467,15 @@ void CodeGen::genIntToIntCast(GenTreePtr treeNode) GenTreePtr castOp = treeNode->gtCast.CastOp(); var_types srcType = genActualType(castOp->TypeGet()); + noway_assert(genTypeSize(srcType) >= 4); -#if !defined(_TARGET_64BIT_) +#ifdef _TARGET_X86_ if (varTypeIsLong(srcType)) { genLongToIntCast(treeNode); return; } -#endif // !defined(_TARGET_64BIT_) +#endif // _TARGET_X86_ regNumber targetReg = treeNode->gtRegNum; regNumber sourceReg = castOp->gtRegNum; @@ -6504,18 +6491,17 @@ void CodeGen::genIntToIntCast(GenTreePtr treeNode) } bool requiresOverflowCheck = false; - bool needAndAfter = false; assert(genIsValidIntReg(targetReg)); assert(genIsValidIntReg(sourceReg)); - instruction ins = INS_invalid; - emitAttr size = EA_UNKNOWN; + instruction ins = INS_invalid; + emitAttr srcSize = EA_ATTR(genTypeSize(srcType)); + emitAttr dstSize = EA_ATTR(genTypeSize(dstType)); - if (genTypeSize(srcType) < genTypeSize(dstType)) + if (srcSize < dstSize) { // Widening cast - // Is this an Overflow checking cast? // We only need to handle one case, as the other casts can never overflow. // cast from TYP_INT to TYP_ULONG @@ -6523,14 +6509,11 @@ void CodeGen::genIntToIntCast(GenTreePtr treeNode) if (treeNode->gtOverflow() && (srcType == TYP_INT) && (dstType == TYP_ULONG)) { requiresOverflowCheck = true; - size = EA_ATTR(genTypeSize(srcType)); ins = INS_mov; } else { - // we need the source size - size = EA_ATTR(genTypeSize(srcType)); - noway_assert(size < EA_PTRSIZE); + noway_assert(srcSize < EA_PTRSIZE); ins = ins_Move_Extend(srcType, castOp->InReg()); @@ -6540,7 +6523,7 @@ void CodeGen::genIntToIntCast(GenTreePtr treeNode) 64-bit, and a regular 32-bit mov clears the high 32 bits (like the non-existant movzxd), but for a sign extension from TYP_INT to TYP_LONG, we need to use movsxd opcode. */ - if (!isUnsignedSrc && !isUnsignedDst && (size == EA_4BYTE) && (genTypeSize(dstType) > EA_4BYTE)) + if (!isUnsignedSrc && !isUnsignedDst) { #ifdef _TARGET_X86_ NYI_X86("Cast to 64 bit for x86/RyuJIT"); @@ -6548,36 +6531,22 @@ void CodeGen::genIntToIntCast(GenTreePtr treeNode) ins = INS_movsxd; #endif // !_TARGET_X86_ } - - /* - Special case: for a cast of byte to char we first - have to expand the byte (w/ sign extension), then - mask off the high bits. - Use 'movsx' followed by 'and' - */ - if (!isUnsignedSrc && isUnsignedDst && (genTypeSize(dstType) < EA_4BYTE)) - { - noway_assert(genTypeSize(dstType) == EA_2BYTE && size == EA_1BYTE); - needAndAfter = true; - } } } else { // Narrowing cast, or sign-changing cast - noway_assert(genTypeSize(srcType) >= genTypeSize(dstType)); + noway_assert(srcSize >= dstSize); // Is this an Overflow checking cast? if (treeNode->gtOverflow()) { requiresOverflowCheck = true; - size = EA_ATTR(genTypeSize(srcType)); ins = INS_mov; } else { - size = EA_ATTR(genTypeSize(dstType)); - ins = ins_Move_Extend(dstType, castOp->InReg()); + ins = ins_Move_Extend(dstType, castOp->InReg()); } } @@ -6659,36 +6628,26 @@ void CodeGen::genIntToIntCast(GenTreePtr treeNode) if (signCheckOnly) { // We only need to check for a negative value in sourceReg - inst_RV_IV(INS_cmp, sourceReg, 0, size); + inst_RV_IV(INS_cmp, sourceReg, 0, srcSize); genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW); } else { - regNumber tmpReg = REG_NA; - - if (needScratchReg) - { - // We need an additional temp register - // Make sure we have exactly one allocated. - assert(treeNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(treeNode->gtRsvdRegs) == 1); - tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); - } - // When we are converting from unsigned or to unsigned, we // will only have to check for any bits set using 'typeMask' if (isUnsignedSrc || isUnsignedDst) { if (needScratchReg) { + regNumber tmpReg = treeNode->GetSingleTempReg(); inst_RV_RV(INS_mov, tmpReg, sourceReg, TYP_LONG); // Move the 64-bit value to a writeable temp reg - inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, size, tmpReg, 32); // Shift right by 32 bits - genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); // Thow if result shift is non-zero + inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, srcSize, tmpReg, 32); // Shift right by 32 bits + genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); // Throw if result shift is non-zero } else { noway_assert(typeMask != 0); - inst_RV_IV(INS_TEST, sourceReg, typeMask, size); + inst_RV_IV(INS_TEST, sourceReg, typeMask, srcSize); genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); } } @@ -6702,12 +6661,12 @@ void CodeGen::genIntToIntCast(GenTreePtr treeNode) noway_assert((typeMin != 0) && (typeMax != 0)); - inst_RV_IV(INS_cmp, sourceReg, typeMax, size); + inst_RV_IV(INS_cmp, sourceReg, typeMax, srcSize); genJumpToThrowHlpBlk(EJ_jg, SCK_OVERFLOW); // Compare with the MIN - inst_RV_IV(INS_cmp, sourceReg, typeMin, size); + inst_RV_IV(INS_cmp, sourceReg, typeMin, srcSize); genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW); } } @@ -6717,15 +6676,13 @@ void CodeGen::genIntToIntCast(GenTreePtr treeNode) // On amd64, we can hit this path for a same-register // 4-byte to 8-byte widening conversion, and need to // emit the instruction to set the high bits correctly. - || (EA_ATTR(genTypeSize(dstType)) == EA_8BYTE && EA_ATTR(genTypeSize(srcType)) == EA_4BYTE) + || (dstSize == EA_8BYTE && srcSize == EA_4BYTE) #endif // _TARGET_AMD64_ ) - inst_RV_RV(ins, targetReg, sourceReg, srcType, size); + inst_RV_RV(ins, targetReg, sourceReg, srcType, srcSize); } else // non-overflow checking cast { - noway_assert(size < EA_PTRSIZE || srcType == dstType); - // We may have code transformations that result in casts where srcType is the same as dstType. // e.g. Bug 824281, in which a comma is split by the rationalizer, leaving an assignment of a // long constant to a long lclVar. @@ -6734,7 +6691,7 @@ void CodeGen::genIntToIntCast(GenTreePtr treeNode) ins = INS_mov; } /* Is the value sitting in a non-byte-addressable register? */ - else if (castOp->InReg() && (size == EA_1BYTE) && !isByteReg(sourceReg)) + else if (castOp->InReg() && (dstSize == EA_1BYTE) && !isByteReg(sourceReg)) { if (isUnsignedDst) { @@ -6750,21 +6707,21 @@ void CodeGen::genIntToIntCast(GenTreePtr treeNode) /* Generate "mov targetReg, castOp->gtReg */ if (targetReg != sourceReg) { - inst_RV_RV(INS_mov, targetReg, sourceReg, srcType); + inst_RV_RV(INS_mov, targetReg, sourceReg, srcType, srcSize); } } if (ins == INS_AND) { - noway_assert((needAndAfter == false) && isUnsignedDst); + noway_assert(isUnsignedDst); /* Generate "and reg, MASK */ unsigned fillPattern; - if (size == EA_1BYTE) + if (dstSize == EA_1BYTE) { fillPattern = 0xff; } - else if (size == EA_2BYTE) + else if (dstSize == EA_2BYTE) { fillPattern = 0xffff; } @@ -6778,37 +6735,29 @@ void CodeGen::genIntToIntCast(GenTreePtr treeNode) #ifdef _TARGET_AMD64_ else if (ins == INS_movsxd) { - noway_assert(!needAndAfter); - inst_RV_RV(ins, targetReg, sourceReg, srcType, size); + inst_RV_RV(ins, targetReg, sourceReg, srcType, srcSize); } #endif // _TARGET_AMD64_ else if (ins == INS_mov) { - noway_assert(!needAndAfter); if (targetReg != sourceReg #ifdef _TARGET_AMD64_ // On amd64, 'mov' is the opcode used to zero-extend from // 4 bytes to 8 bytes. - || (EA_ATTR(genTypeSize(dstType)) == EA_8BYTE && EA_ATTR(genTypeSize(srcType)) == EA_4BYTE) + || (dstSize == EA_8BYTE && srcSize == EA_4BYTE) #endif // _TARGET_AMD64_ ) { - inst_RV_RV(ins, targetReg, sourceReg, srcType, size); + inst_RV_RV(ins, targetReg, sourceReg, srcType, srcSize); } } else { noway_assert(ins == INS_movsx || ins == INS_movzx); + noway_assert(srcSize >= dstSize); /* Generate "mov targetReg, castOp->gtReg */ - inst_RV_RV(ins, targetReg, sourceReg, srcType, size); - - /* Mask off high bits for cast from byte to char */ - if (needAndAfter) - { - noway_assert(genTypeSize(dstType) == 2 && ins == INS_movsx); - inst_RV_IV(INS_AND, targetReg, 0xFFFF, EA_4BYTE); - } + inst_RV_RV(ins, targetReg, sourceReg, srcType, dstSize); } } @@ -7089,9 +7038,7 @@ void CodeGen::genCkfinite(GenTreePtr treeNode) regNumber targetReg = treeNode->gtRegNum; // Extract exponent into a register. - assert(treeNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(treeNode->gtRsvdRegs) == 1); - regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + regNumber tmpReg = treeNode->GetSingleTempReg(); genConsumeReg(op1); @@ -7397,10 +7344,7 @@ void CodeGen::genSSE2BitwiseOp(GenTreePtr treeNode) } // We need an additional register for bitmask. - // Make sure we have one allocated. - assert(treeNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(treeNode->gtRsvdRegs) == 1); - regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + regNumber tmpReg = treeNode->GetSingleTempReg(); // Move operand into targetReg only if the reg reserved for // internal purpose is not the same as targetReg. @@ -7716,17 +7660,17 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk) unsigned prevFieldOffset = currentOffset; regNumber intTmpReg = REG_NA; regNumber simdTmpReg = REG_NA; - if (putArgStk->gtRsvdRegs != RBM_NONE) + if (putArgStk->AvailableTempRegCount() != 0) { regMaskTP rsvdRegs = putArgStk->gtRsvdRegs; if ((rsvdRegs & RBM_ALLINT) != 0) { - intTmpReg = genRegNumFromMask(rsvdRegs & RBM_ALLINT); + intTmpReg = putArgStk->GetSingleTempReg(RBM_ALLINT); assert(genIsValidIntReg(intTmpReg)); } if ((rsvdRegs & RBM_ALLFLOAT) != 0) { - simdTmpReg = genRegNumFromMask(rsvdRegs & RBM_ALLFLOAT); + simdTmpReg = putArgStk->GetSingleTempReg(RBM_ALLFLOAT); assert(genIsValidFloatReg(simdTmpReg)); } assert(genCountBits(rsvdRegs) == (unsigned)((intTmpReg == REG_NA) ? 0 : 1) + ((simdTmpReg == REG_NA) ? 0 : 1)); diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp index 14b2abafb6..1c24b93abe 100644 --- a/src/jit/compiler.cpp +++ b/src/jit/compiler.cpp @@ -1134,6 +1134,170 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, return useType; } +/////////////////////////////////////////////////////////////////////////////// +// +// MEASURE_NOWAY: code to measure and rank dynamic occurences of noway_assert. +// (Just the appearances of noway_assert, whether the assert is true or false.) +// This might help characterize the cost of noway_assert in non-DEBUG builds, +// or determine which noway_assert should be simple DEBUG-only asserts. +// +/////////////////////////////////////////////////////////////////////////////// + +#if MEASURE_NOWAY + +struct FileLine +{ + char* m_file; + unsigned m_line; + char* m_condStr; + + FileLine() : m_file(nullptr), m_line(0), m_condStr(nullptr) + { + } + + FileLine(const char* file, unsigned line, const char* condStr) : m_line(line) + { + size_t newSize = (strlen(file) + 1) * sizeof(char); + m_file = (char*)HostAllocator::getHostAllocator()->Alloc(newSize); + strcpy_s(m_file, newSize, file); + + newSize = (strlen(condStr) + 1) * sizeof(char); + m_condStr = (char*)HostAllocator::getHostAllocator()->Alloc(newSize); + strcpy_s(m_condStr, newSize, condStr); + } + + FileLine(const FileLine& other) + { + m_file = other.m_file; + m_line = other.m_line; + m_condStr = other.m_condStr; + } + + // GetHashCode() and Equals() are needed by SimplerHashTable + + static unsigned GetHashCode(FileLine fl) + { + assert(fl.m_file != nullptr); + unsigned code = fl.m_line; + for (const char* p = fl.m_file; *p != '\0'; p++) + { + code += *p; + } + // Could also add condStr. + return code; + } + + static bool Equals(FileLine fl1, FileLine fl2) + { + return (fl1.m_line == fl2.m_line) && (0 == strcmp(fl1.m_file, fl2.m_file)); + } +}; + +typedef SimplerHashTable<FileLine, FileLine, size_t, JitSimplerHashBehavior> FileLineToCountMap; +FileLineToCountMap* NowayAssertMap; + +void Compiler::RecordNowayAssert(const char* filename, unsigned line, const char* condStr) +{ + if (NowayAssertMap == nullptr) + { + NowayAssertMap = new (HostAllocator::getHostAllocator()) FileLineToCountMap(HostAllocator::getHostAllocator()); + } + FileLine fl(filename, line, condStr); + size_t* pCount = NowayAssertMap->LookupPointer(fl); + if (pCount == nullptr) + { + NowayAssertMap->Set(fl, 1); + } + else + { + ++(*pCount); + } +} + +void RecordNowayAssertGlobal(const char* filename, unsigned line, const char* condStr) +{ + if ((JitConfig.JitMeasureNowayAssert() == 1) && (JitTls::GetCompiler() != nullptr)) + { + JitTls::GetCompiler()->RecordNowayAssert(filename, line, condStr); + } +} + +struct NowayAssertCountMap +{ + size_t count; + FileLine fl; + + NowayAssertCountMap() : count(0) + { + } + + static int __cdecl compare(const void* elem1, const void* elem2) + { + NowayAssertCountMap* e1 = (NowayAssertCountMap*)elem1; + NowayAssertCountMap* e2 = (NowayAssertCountMap*)elem2; + return (int)((ssize_t)e2->count - (ssize_t)e1->count); // sort in descending order + } +}; + +void DisplayNowayAssertMap() +{ + if (NowayAssertMap != nullptr) + { + FILE* fout; + + LPCWSTR strJitMeasureNowayAssertFile = JitConfig.JitMeasureNowayAssertFile(); + if (strJitMeasureNowayAssertFile != nullptr) + { + fout = _wfopen(strJitMeasureNowayAssertFile, W("a")); + if (fout == nullptr) + { + fprintf(jitstdout, "Failed to open JitMeasureNowayAssertFile \"%ws\"\n", strJitMeasureNowayAssertFile); + return; + } + } + else + { + fout = jitstdout; + } + + // Iterate noway assert map, create sorted table by occurrence, dump it. + unsigned count = NowayAssertMap->GetCount(); + NowayAssertCountMap* nacp = new NowayAssertCountMap[count]; + unsigned i = 0; + + for (FileLineToCountMap::KeyIterator iter = NowayAssertMap->Begin(), end = NowayAssertMap->End(); + !iter.Equal(end); ++iter) + { + nacp[i].count = iter.GetValue(); + nacp[i].fl = iter.Get(); + ++i; + } + + qsort(nacp, count, sizeof(nacp[0]), NowayAssertCountMap::compare); + + if (fout == jitstdout) + { + // Don't output the header if writing to a file, since we'll be appending to existing dumps in that case. + fprintf(fout, "\nnoway_assert counts:\n"); + fprintf(fout, "count, file, line, text\n"); + } + + for (i = 0; i < count; i++) + { + fprintf(fout, "%u, %s, %u, \"%s\"\n", nacp[i].count, nacp[i].fl.m_file, nacp[i].fl.m_line, + nacp[i].fl.m_condStr); + } + + if (fout != jitstdout) + { + fclose(fout); + fout = nullptr; + } + } +} + +#endif // MEASURE_NOWAY + /***************************************************************************** * variables to keep track of how many iterations we go in a dataflow pass */ @@ -1222,6 +1386,10 @@ void Compiler::compShutdown() } #endif // ALT_JIT +#if MEASURE_NOWAY + DisplayNowayAssertMap(); +#endif // MEASURE_NOWAY + ArenaAllocator::shutdown(); /* Shut down the emitter */ @@ -2414,6 +2582,7 @@ bool Compiler::compShouldThrowOnNoway( #ifdef FEATURE_TRACELOGGING compJitTelemetry.NotifyNowayAssert(filename, line); #endif + // In min opts, we don't want the noway assert to go through the exception // path. Instead we want it to just silently go through codegen for // compat reasons. diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 9ca0e1a3e1..998b647702 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -1218,13 +1218,13 @@ struct fgArgTabEntry regNumber otherRegNum; // The (second) register to use when passing this argument. SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; -#elif defined(_TARGET_X86_) +#elif !defined(_TARGET_64BIT_) __declspec(property(get = getIsStruct)) bool isStruct; bool getIsStruct() { return varTypeIsStruct(node); } -#endif // _TARGET_X86_ +#endif // !_TARGET_64BIT_ #ifdef _TARGET_ARM_ void SetIsHfaRegArg(bool hfaRegArg) @@ -2875,13 +2875,10 @@ protected: bool impILConsumesAddr(const BYTE* codeAddr, CORINFO_METHOD_HANDLE fncHandle, CORINFO_MODULE_HANDLE scpHandle); void impResolveToken(const BYTE* addr, CORINFO_RESOLVED_TOKEN* pResolvedToken, CorInfoTokenKind kind); - void impPushOnStackNoType(GenTreePtr tree); void impPushOnStack(GenTreePtr tree, typeInfo ti); - void impPushNullObjRefOnStack(); - StackEntry impPopStack(); - StackEntry impPopStack(CORINFO_CLASS_HANDLE& structTypeRet); - GenTreePtr impPopStack(typeInfo& ti); + void impPushNullObjRefOnStack(); + StackEntry impPopStack(); StackEntry& impStackTop(unsigned n = 0); unsigned impStackHeight(); @@ -9074,6 +9071,10 @@ public: // Is the compilation in a full trust context? bool compIsFullTrust(); +#if MEASURE_NOWAY + void RecordNowayAssert(const char* filename, unsigned line, const char* condStr); +#endif // MEASURE_NOWAY + #ifndef FEATURE_TRACELOGGING // Should we actually fire the noway assert body and the exception handler? bool compShouldThrowOnNoway(); @@ -9475,6 +9476,7 @@ const instruction INS_XOR = INS_eor; const instruction INS_NEG = INS_rsb; const instruction INS_TEST = INS_tst; const instruction INS_MUL = INS_mul; +const instruction INS_MULADD = INS_mla; const instruction INS_SIGNED_DIVIDE = INS_sdiv; const instruction INS_UNSIGNED_DIVIDE = INS_udiv; const instruction INS_BREAKPOINT = INS_bkpt; @@ -9500,6 +9502,7 @@ const instruction INS_XOR = INS_eor; const instruction INS_NEG = INS_neg; const instruction INS_TEST = INS_tst; const instruction INS_MUL = INS_mul; +const instruction INS_MULADD = INS_madd; const instruction INS_SIGNED_DIVIDE = INS_sdiv; const instruction INS_UNSIGNED_DIVIDE = INS_udiv; const instruction INS_BREAKPOINT = INS_bkpt; diff --git a/src/jit/compiler.hpp b/src/jit/compiler.hpp index 88c082d499..959db68a94 100644 --- a/src/jit/compiler.hpp +++ b/src/jit/compiler.hpp @@ -4686,141 +4686,6 @@ inline char* regMaskIntToString(regMaskTP mask, Compiler* context) #endif // DEBUG -inline void BasicBlock::InitVarSets(Compiler* comp) -{ - VarSetOps::AssignNoCopy(comp, bbVarUse, VarSetOps::MakeEmpty(comp)); - VarSetOps::AssignNoCopy(comp, bbVarDef, VarSetOps::MakeEmpty(comp)); - VarSetOps::AssignNoCopy(comp, bbLiveIn, VarSetOps::MakeEmpty(comp)); - VarSetOps::AssignNoCopy(comp, bbLiveOut, VarSetOps::MakeEmpty(comp)); - VarSetOps::AssignNoCopy(comp, bbScope, VarSetOps::MakeEmpty(comp)); - - bbMemoryUse = emptyMemoryKindSet; - bbMemoryDef = emptyMemoryKindSet; - bbMemoryLiveIn = emptyMemoryKindSet; - bbMemoryLiveOut = emptyMemoryKindSet; -} - -// Returns true if the basic block ends with GT_JMP -inline bool BasicBlock::endsWithJmpMethod(Compiler* comp) -{ - if (comp->compJmpOpUsed && (bbJumpKind == BBJ_RETURN) && (bbFlags & BBF_HAS_JMP)) - { - GenTree* lastNode = this->lastNode(); - assert(lastNode != nullptr); - return lastNode->OperGet() == GT_JMP; - } - - return false; -} - -// Returns true if the basic block ends with either -// i) GT_JMP or -// ii) tail call (implicit or explicit) -// -// Params: -// comp - Compiler instance -// fastTailCallsOnly - Only consider fast tail calls excluding tail calls via helper. -inline bool BasicBlock::endsWithTailCallOrJmp(Compiler* comp, bool fastTailCallsOnly /*=false*/) -{ - GenTreePtr tailCall = nullptr; - bool tailCallsConvertibleToLoopOnly = false; - return endsWithJmpMethod(comp) || - endsWithTailCall(comp, fastTailCallsOnly, tailCallsConvertibleToLoopOnly, &tailCall); -} - -//------------------------------------------------------------------------------ -// endsWithTailCall : Check if the block ends with a tail call. -// -// Arguments: -// comp - compiler instance -// fastTailCallsOnly - check for fast tail calls only -// tailCallsConvertibleToLoopOnly - check for tail calls convertible to loop only -// tailCall - a pointer to a tree that will be set to the call tree if the block -// ends with a tail call and will be set to nullptr otherwise. -// -// Return Value: -// true if the block ends with a tail call; false otherwise. -// -// Notes: -// At most one of fastTailCallsOnly and tailCallsConvertibleToLoopOnly flags can be true. - -inline bool BasicBlock::endsWithTailCall(Compiler* comp, - bool fastTailCallsOnly, - bool tailCallsConvertibleToLoopOnly, - GenTree** tailCall) -{ - assert(!fastTailCallsOnly || !tailCallsConvertibleToLoopOnly); - *tailCall = nullptr; - bool result = false; - - // Is this a tail call? - // The reason for keeping this under RyuJIT is so as not to impact existing Jit32 x86 and arm - // targets. - if (comp->compTailCallUsed) - { - if (fastTailCallsOnly || tailCallsConvertibleToLoopOnly) - { - // Only fast tail calls or only tail calls convertible to loops - result = (bbFlags & BBF_HAS_JMP) && (bbJumpKind == BBJ_RETURN); - } - else - { - // Fast tail calls, tail calls convertible to loops, and tails calls dispatched via helper - result = (bbJumpKind == BBJ_THROW) || ((bbFlags & BBF_HAS_JMP) && (bbJumpKind == BBJ_RETURN)); - } - - if (result) - { - GenTree* lastNode = this->lastNode(); - if (lastNode->OperGet() == GT_CALL) - { - GenTreeCall* call = lastNode->AsCall(); - if (tailCallsConvertibleToLoopOnly) - { - result = call->IsTailCallConvertibleToLoop(); - } - else if (fastTailCallsOnly) - { - result = call->IsFastTailCall(); - } - else - { - result = call->IsTailCall(); - } - - if (result) - { - *tailCall = call; - } - } - else - { - result = false; - } - } - } - - return result; -} - -//------------------------------------------------------------------------------ -// endsWithTailCallConvertibleToLoop : Check if the block ends with a tail call convertible to loop. -// -// Arguments: -// comp - compiler instance -// tailCall - a pointer to a tree that will be set to the call tree if the block -// ends with a tail call convertible to loop and will be set to nullptr otherwise. -// -// Return Value: -// true if the block ends with a tail call convertible to loop. - -inline bool BasicBlock::endsWithTailCallConvertibleToLoop(Compiler* comp, GenTree** tailCall) -{ - bool fastTailCallsOnly = false; - bool tailCallsConvertibleToLoopOnly = true; - return endsWithTailCall(comp, fastTailCallsOnly, tailCallsConvertibleToLoopOnly, tailCall); -} - inline static bool StructHasOverlappingFields(DWORD attribs) { return ((attribs & CORINFO_FLG_OVERLAPPING_FIELDS) != 0); diff --git a/src/jit/dataflow.h b/src/jit/dataflow.h index c9803a0cc1..615a5d1494 100644 --- a/src/jit/dataflow.h +++ b/src/jit/dataflow.h @@ -70,11 +70,9 @@ void DataFlow::ForwardAnalysis(TCallback& callback) if (callback.EndMerge(block)) { - AllSuccessorIter succsBegin = block->GetAllSuccs(m_pCompiler).begin(); - AllSuccessorIter succsEnd = block->GetAllSuccs(m_pCompiler).end(); - for (AllSuccessorIter succ = succsBegin; succ != succsEnd; ++succ) + for (BasicBlock* succ : block->GetAllSuccs(m_pCompiler)) { - worklist.insert(worklist.end(), *succ); + worklist.insert(worklist.end(), succ); } } } diff --git a/src/jit/emitarm.cpp b/src/jit/emitarm.cpp index 53ee88b3a2..2b8eb25bc2 100644 --- a/src/jit/emitarm.cpp +++ b/src/jit/emitarm.cpp @@ -7568,9 +7568,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR if (offset != 0) { - regMaskTP tmpRegMask = indir->gtRsvdRegs; - regNumber tmpReg = genRegNumFromMask(tmpRegMask); - noway_assert(tmpReg != REG_NA); + regNumber tmpReg = indir->GetSingleTempReg(); if (emitIns_valid_imm_for_add(offset, INS_FLAGS_DONT_CARE)) { @@ -7632,9 +7630,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR else { // We require a tmpReg to hold the offset - regMaskTP tmpRegMask = indir->gtRsvdRegs; - regNumber tmpReg = genRegNumFromMask(tmpRegMask); - noway_assert(tmpReg != REG_NA); + regNumber tmpReg = indir->GetSingleTempReg(); // First load/store tmpReg with the large offset constant codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); @@ -7684,8 +7680,6 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src1, GenTree* src2) { - regNumber result = REG_NA; - // dst can only be a reg assert(!dst->isContained()); @@ -7732,120 +7726,85 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, assert(!src1->isContained()); } } - bool isMulOverflow = false; - bool isUnsignedMul = false; - regNumber extraReg = REG_NA; + + insFlags flags = INS_FLAGS_DONT_CARE; + bool isMulOverflow = false; if (dst->gtOverflowEx()) { - NYI_ARM("emitInsTernary overflow"); -#if 0 - if (ins == INS_add) - { - ins = INS_adds; - } - else if (ins == INS_sub) + if ((ins == INS_add) || (ins == INS_adc) || (ins == INS_sub) || (ins == INS_sbc)) { - ins = INS_subs; + flags = INS_FLAGS_SET; } else if (ins == INS_mul) { isMulOverflow = true; - isUnsignedMul = ((dst->gtFlags & GTF_UNSIGNED) != 0); assert(intConst == nullptr); // overflow format doesn't support an int constant operand } else { assert(!"Invalid ins for overflow check"); } -#endif } if (intConst != nullptr) { - emitIns_R_R_I(ins, attr, dst->gtRegNum, nonIntReg->gtRegNum, intConst->IconValue()); + emitIns_R_R_I(ins, attr, dst->gtRegNum, nonIntReg->gtRegNum, intConst->IconValue(), flags); } else { if (isMulOverflow) { - NYI_ARM("emitInsTernary overflow"); -#if 0 - // Make sure that we have an internal register - assert(genCountBits(dst->gtRsvdRegs) == 2); + regNumber extraReg = dst->GetSingleTempReg(); + assert(extraReg != dst->gtRegNum); - // There will be two bits set in tmpRegsMask. - // Remove the bit for 'dst->gtRegNum' from 'tmpRegsMask' - regMaskTP tmpRegsMask = dst->gtRsvdRegs & ~genRegMask(dst->gtRegNum); - assert(tmpRegsMask != RBM_NONE); - regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask - extraReg = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask - - if (isUnsignedMul) + if ((dst->gtFlags & GTF_UNSIGNED) != 0) { - if (attr == EA_4BYTE) - { - // Compute 8 byte results from 4 byte by 4 byte multiplication. - emitIns_R_R_R(INS_umull, EA_8BYTE, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); + // Compute 8 byte result from 4 byte by 4 byte multiplication. + emitIns_R_R_R_R(INS_umull, EA_4BYTE, dst->gtRegNum, extraReg, src1->gtRegNum, src2->gtRegNum); - // Get the high result by shifting dst. - emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->gtRegNum, 32); - } - else - { - assert(attr == EA_8BYTE); - // Compute the high result. - emitIns_R_R_R(INS_umulh, attr, extraReg, src1->gtRegNum, src2->gtRegNum); - - // Now multiply without skewing the high result. - emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); - } - - // zero-sign bit comparision to detect overflow. + // Overflow exists if the result's high word is non-zero. emitIns_R_I(INS_cmp, attr, extraReg, 0); } else { - int bitShift = 0; - if (attr == EA_4BYTE) - { - // Compute 8 byte results from 4 byte by 4 byte multiplication. - emitIns_R_R_R(INS_smull, EA_8BYTE, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); - - // Get the high result by shifting dst. - emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->gtRegNum, 32); - - bitShift = 31; - } - else - { - assert(attr == EA_8BYTE); - // Save the high result in a temporary register. - emitIns_R_R_R(INS_smulh, attr, extraReg, src1->gtRegNum, src2->gtRegNum); - - // Now multiply without skewing the high result. - emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); - - bitShift = 63; - } + // Compute 8 byte result from 4 byte by 4 byte multiplication. + emitIns_R_R_R_R(INS_smull, EA_4BYTE, dst->gtRegNum, extraReg, src1->gtRegNum, src2->gtRegNum); - // Sign bit comparision to detect overflow. - emitIns_R_R_I(INS_cmp, attr, extraReg, dst->gtRegNum, bitShift, INS_OPTS_ASR); + // Overflow exists if the result's high word is not merely a sign bit. + emitIns_R_R_I(INS_cmp, attr, extraReg, dst->gtRegNum, 31, INS_FLAGS_DONT_CARE, INS_OPTS_ASR); } -#endif } else { - // We can just multiply. - emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); + // We can just do the arithmetic, setting the flags if needed. + emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum, flags); } } if (dst->gtOverflowEx()) { - NYI_ARM("emitInsTernary overflow"); -#if 0 assert(!varTypeIsFloating(dst)); - codeGen->genCheckOverflow(dst); -#endif + + emitJumpKind jumpKind; + + if (dst->OperGet() == GT_MUL) + { + jumpKind = EJ_ne; + } + else + { + bool isUnsignedOverflow = ((dst->gtFlags & GTF_UNSIGNED) != 0); + jumpKind = isUnsignedOverflow ? EJ_lo : EJ_vs; + if (jumpKind == EJ_lo) + { + if ((dst->OperGet() != GT_SUB) && (dst->OperGet() != GT_ASG_SUB) && (dst->OperGet() != GT_SUB_HI)) + { + jumpKind = EJ_hs; + } + } + } + + // Jump to the block which will throw the exception. + codeGen->genJumpToThrowHlpBlk(jumpKind, SCK_OVERFLOW); } return dst->gtRegNum; diff --git a/src/jit/emitarm64.cpp b/src/jit/emitarm64.cpp index 93994e7918..0328cb6712 100644 --- a/src/jit/emitarm64.cpp +++ b/src/jit/emitarm64.cpp @@ -10809,9 +10809,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR if (offset != 0) { - regMaskTP tmpRegMask = indir->gtRsvdRegs; - regNumber tmpReg = genRegNumFromMask(tmpRegMask); - noway_assert(tmpReg != REG_NA); + regNumber tmpReg = indir->GetSingleTempReg(); emitAttr addType = varTypeIsGC(memBase) ? EA_BYREF : EA_PTRSIZE; @@ -10833,7 +10831,6 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR // Then load/store dataReg from/to [tmpReg + offset] emitIns_R_R_I(ins, ldstAttr, dataReg, tmpReg, offset); - ; } else // large offset { @@ -10874,9 +10871,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR else { // We require a tmpReg to hold the offset - regMaskTP tmpRegMask = indir->gtRsvdRegs; - regNumber tmpReg = genRegNumFromMask(tmpRegMask); - noway_assert(tmpReg != REG_NA); + regNumber tmpReg = indir->GetSingleTempReg(); // First load/store tmpReg with the large offset constant codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); @@ -11047,9 +11042,8 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, assert(!src1->isContained()); } } - bool isMulOverflow = false; - bool isUnsignedMul = false; - regNumber extraReg = REG_NA; + + bool isMulOverflow = false; if (dst->gtOverflowEx()) { if (ins == INS_add) @@ -11063,7 +11057,6 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, else if (ins == INS_mul) { isMulOverflow = true; - isUnsignedMul = ((dst->gtFlags & GTF_UNSIGNED) != 0); assert(intConst == nullptr); // overflow format doesn't support an int constant operand } else @@ -11079,17 +11072,10 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, { if (isMulOverflow) { - // Make sure that we have an internal register - assert(genCountBits(dst->gtRsvdRegs) == 2); - - // There will be two bits set in tmpRegsMask. - // Remove the bit for 'dst->gtRegNum' from 'tmpRegsMask' - regMaskTP tmpRegsMask = dst->gtRsvdRegs & ~genRegMask(dst->gtRegNum); - assert(tmpRegsMask != RBM_NONE); - regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask - extraReg = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask + regNumber extraReg = dst->GetSingleTempReg(); + assert(extraReg != dst->gtRegNum); - if (isUnsignedMul) + if ((dst->gtFlags & GTF_UNSIGNED) != 0) { if (attr == EA_4BYTE) { @@ -11109,7 +11095,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum); } - // zero-sign bit comparision to detect overflow. + // zero-sign bit comparison to detect overflow. emitIns_R_I(INS_cmp, attr, extraReg, 0); } else @@ -11137,7 +11123,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, bitShift = 63; } - // Sign bit comparision to detect overflow. + // Sign bit comparison to detect overflow. emitIns_R_R_I(INS_cmp, attr, extraReg, dst->gtRegNum, bitShift, INS_OPTS_ASR); } } diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index b495d015d6..760813004c 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -332,16 +332,9 @@ bool IsXMMReg(regNumber reg) unsigned RegEncoding(regNumber reg) { #ifndef LEGACY_BACKEND - // XMM registers do not share the same reg numbers as integer registers. - // But register encoding of integer and XMM registers is the same. - // Therefore, subtract XMMBASE from regNumber to get the register encoding - // in case of XMM registers. - return (unsigned)((IsXMMReg(reg) ? reg - XMMBASE : reg) & 0x7); -#else // LEGACY_BACKEND - // Legacy X86: XMM registers share the same reg numbers as integer registers and - // hence nothing to do to get reg encoding. + static_assert((REG_XMM0 & 0x7) == 0, "bad XMMBASE"); +#endif return (unsigned)(reg & 0x7); -#endif // LEGACY_BACKEND } // Utility routines that abstract the logic of adding REX.W, REX.R, REX.X, REX.B and REX prefixes diff --git a/src/jit/error.h b/src/jit/error.h index 0535601055..78f24adb38 100644 --- a/src/jit/error.h +++ b/src/jit/error.h @@ -80,7 +80,25 @@ extern void noWayAssertBodyConditional( ); extern void noWayAssertBodyConditional(const char* cond, const char* file, unsigned line); +// Define MEASURE_NOWAY to 1 to enable code to count and rank individual noway_assert calls by occurrence. +// These asserts would be dynamically executed, but not necessarily fail. The provides some insight into +// the dynamic prevalence of these (if not a direct measure of their cost), which exist in non-DEBUG as +// well as DEBUG builds. #ifdef DEBUG +#define MEASURE_NOWAY 1 +#else // !DEBUG +#define MEASURE_NOWAY 0 +#endif // !DEBUG + +#if MEASURE_NOWAY +extern void RecordNowayAssertGlobal(const char* filename, unsigned line, const char* condStr); +#define RECORD_NOWAY_ASSERT(condStr) RecordNowayAssertGlobal(__FILE__, __LINE__, condStr); +#else +#define RECORD_NOWAY_ASSERT(condStr) +#endif + +#ifdef DEBUG + #define NO_WAY(msg) (debugError(msg, __FILE__, __LINE__), noWay()) // Used for fallback stress mode #define NO_WAY_NOASSERT(msg) noWay() @@ -90,6 +108,7 @@ extern void noWayAssertBodyConditional(const char* cond, const char* file, unsig #define noway_assert(cond) \ do \ { \ + RECORD_NOWAY_ASSERT(#cond) \ if (!(cond)) \ { \ noWayAssertBodyConditional(#cond, __FILE__, __LINE__); \ @@ -99,7 +118,7 @@ extern void noWayAssertBodyConditional(const char* cond, const char* file, unsig #define NOWAY_MSG(msg) noWayAssertBodyConditional(msg, __FILE__, __LINE__) -#else +#else // !DEBUG #define NO_WAY(msg) noWay() #define BADCODE(msg) badCode() @@ -114,6 +133,7 @@ extern void noWayAssertBodyConditional(const char* cond, const char* file, unsig #define noway_assert(cond) \ do \ { \ + RECORD_NOWAY_ASSERT(#cond) \ if (!(cond)) \ { \ noWayAssertBodyConditional(NOWAY_ASSERT_BODY_ARGUMENTS); \ @@ -123,7 +143,7 @@ extern void noWayAssertBodyConditional(const char* cond, const char* file, unsig #define NOWAY_MSG(msg) noWayAssertBodyConditional(NOWAY_ASSERT_BODY_ARGUMENTS) -#endif +#endif // !DEBUG // IMPL_LIMITATION is called when we encounter valid IL that is not // supported by our current implementation because of various diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp index 3374b8c820..0c57862768 100644 --- a/src/jit/flowgraph.cpp +++ b/src/jit/flowgraph.cpp @@ -3189,9 +3189,15 @@ void Compiler::fgComputePreds() if (ehDsc->HasFilter()) { ehDsc->ebdFilter->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL; + + // The first block of a filter has an artifical extra refcount. + ehDsc->ebdFilter->bbRefs++; } ehDsc->ebdHndBeg->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL; + + // The first block of a handler has an artificial extra refcount. + ehDsc->ebdHndBeg->bbRefs++; } fgModified = false; @@ -5455,15 +5461,15 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, B if (!impIsTailCallILPattern(tailCall, opcode, codeAddr + sz, codeEndp, isRecursive, &isCallPopAndRet)) { -#ifdef _TARGET_AMD64_ +#if !defined(FEATURE_CORECLR) && defined(_TARGET_AMD64_) BADCODE3("tail call not followed by ret or pop+ret", " at offset %04X", (IL_OFFSET)(codeAddr - codeBegp)); #else BADCODE3("tail call not followed by ret", " at offset %04X", (IL_OFFSET)(codeAddr - codeBegp)); -#endif //_TARGET_AMD64_ +#endif // !FEATURE_CORECLR && _TARGET_AMD64_ } -#ifdef _TARGET_AMD64_ +#if !defined(FEATURE_CORECLR) && defined(_TARGET_AMD64_) if (isCallPopAndRet) { // By breaking here, we let pop and ret opcodes to be @@ -5472,7 +5478,7 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, B // in fgMorphCall(). break; } -#endif //_TARGET_AMD64_ +#endif // !FEATURE_CORECLR && _TARGET_AMD64_ } else { @@ -9124,7 +9130,7 @@ void Compiler::fgSimpleLowering() else { con = gtNewIconNode(arrLen->ArrLenOffset(), TYP_I_IMPL); - con->gtRsvdRegs = 0; + con->gtRsvdRegs = RBM_NONE; add = gtNewOperNode(GT_ADD, TYP_REF, arr, con); add->gtRsvdRegs = arr->gtRsvdRegs; @@ -12081,12 +12087,6 @@ void Compiler::fgInsertFuncletPrologBlock(BasicBlock* block) fgExtendEHRegionBefore(block); // Update the EH table to make the prolog block the first block in the block's EH // block. - // fgExtendEHRegionBefore mucks with the bbRefs without updating the pred list, which we will - // do below for this block. So, undo that change. - assert(newHead->bbRefs > 0); - newHead->bbRefs--; - block->bbRefs++; - // Distribute the pred list between newHead and block. Incoming edges coming from outside // the handler go to the prolog. Edges coming from with the handler are back-edges, and // go to the existing 'block'. @@ -12618,10 +12618,8 @@ bool Compiler::fgMightHaveLoop() { BitVecOps::AddElemD(&blockVecTraits, blocksSeen, block->bbNum); - AllSuccessorIter succsEnd = block->GetAllSuccs(this).end(); - for (AllSuccessorIter succs = block->GetAllSuccs(this).begin(); succs != succsEnd; ++succs) + for (BasicBlock* succ : block->GetAllSuccs(this)) { - BasicBlock* succ = (*succs); if (BitVecOps::IsMember(&blockVecTraits, blocksSeen, succ->bbNum)) { return true; @@ -16569,6 +16567,7 @@ void Compiler::fgExtendEHRegionBefore(BasicBlock* block) #endif // DEBUG HBtab->ebdTryBeg = bPrev; bPrev->bbFlags |= BBF_TRY_BEG | BBF_DONT_REMOVE | BBF_HAS_LABEL; + // clear the TryBeg flag unless it begins another try region if (!bbIsTryBeg(block)) { @@ -16591,6 +16590,16 @@ void Compiler::fgExtendEHRegionBefore(BasicBlock* block) HBtab->ebdHndBeg = bPrev; bPrev->bbFlags |= BBF_DONT_REMOVE | BBF_HAS_LABEL; + +#if FEATURE_EH_FUNCLETS + if (fgFuncletsCreated) + { + assert((block->bbFlags & BBF_FUNCLET_BEG) != 0); + bPrev->bbFlags |= BBF_FUNCLET_BEG; + block->bbFlags &= ~BBF_FUNCLET_BEG; + } +#endif // FEATURE_EH_FUNCLETS + bPrev->bbRefs++; // If this is a handler for a filter, the last block of the filter will end with @@ -16630,6 +16639,16 @@ void Compiler::fgExtendEHRegionBefore(BasicBlock* block) HBtab->ebdFilter = bPrev; bPrev->bbFlags |= BBF_DONT_REMOVE | BBF_HAS_LABEL; + +#if FEATURE_EH_FUNCLETS + if (fgFuncletsCreated) + { + assert((block->bbFlags & BBF_FUNCLET_BEG) != 0); + bPrev->bbFlags |= BBF_FUNCLET_BEG; + block->bbFlags &= ~BBF_FUNCLET_BEG; + } +#endif // FEATURE_EH_FUNCLETS + bPrev->bbRefs++; } } @@ -17036,8 +17055,8 @@ bool Compiler::fgCheckEHCanInsertAfterBlock(BasicBlock* blk, unsigned regionInde // // Return Value: // A block with the desired characteristics, so the new block will be inserted after this one. -// If there is no suitable location, return nullptr. This should basically never happen. - +// If there is no suitable location, return nullptr. This should basically never happen except in the case of +// single-block filters. BasicBlock* Compiler::fgFindInsertPoint(unsigned regionIndex, bool putInTryRegion, BasicBlock* startBlk, @@ -17069,6 +17088,13 @@ BasicBlock* Compiler::fgFindInsertPoint(unsigned regionIndex, regionIndex, dspBool(putInTryRegion), startBlk->bbNum, (endBlk == nullptr) ? 0 : endBlk->bbNum, (nearBlk == nullptr) ? 0 : nearBlk->bbNum, (jumpBlk == nullptr) ? 0 : jumpBlk->bbNum, dspBool(runRarely)); + bool insertingIntoFilter = false; + if (!putInTryRegion) + { + EHblkDsc* const dsc = ehGetDsc(regionIndex - 1); + insertingIntoFilter = dsc->HasFilter() && (startBlk == dsc->ebdFilter) && (endBlk == dsc->ebdHndBeg); + } + bool reachedNear = false; // Have we reached 'nearBlk' in our search? If not, we'll keep searching. bool inFilter = false; // Are we in a filter region that we need to skip? BasicBlock* bestBlk = @@ -17110,9 +17136,7 @@ BasicBlock* Compiler::fgFindInsertPoint(unsigned regionIndex, { // Record the fact that we entered a filter region, so we don't insert into filters... // Unless the caller actually wanted the block inserted in this exact filter region. - // Detect this by the fact that startBlk and endBlk point to the filter begin and end. - if (putInTryRegion || (blk != startBlk) || (startBlk != ehGetDsc(regionIndex - 1)->ebdFilter) || - (endBlk != ehGetDsc(regionIndex - 1)->ebdHndBeg)) + if (!insertingIntoFilter || (blk != startBlk)) { inFilter = true; } @@ -17258,7 +17282,21 @@ BasicBlock* Compiler::fgFindInsertPoint(unsigned regionIndex, bestBlk = goodBlk; } -DONE:; +DONE: + + // If we are inserting into a filter and the best block is the end of the filter region, we need to + // insert after its predecessor instead: the CLR ABI states that the terminal block of a filter region + // is its exit block. If the filter region consists of a single block, a new block cannot be inserted + // without either splitting the single block before inserting a new block or inserting the new block + // before the single block and updating the filter description such that the inserted block is marked + // as the entry block for the filter. This work must be done by the caller; this function returns + // `nullptr` to indicate this case. + if (insertingIntoFilter && (bestBlk == endBlk->bbPrev) && (bestBlk == startBlk)) + { + assert(bestBlk != nullptr); + assert(bestBlk->bbJumpKind == BBJ_EHFILTERRET); + bestBlk = nullptr; + } return bestBlk; } @@ -17437,6 +17475,21 @@ BasicBlock* Compiler::fgNewBBinRegion(BBjumpKinds jumpKind, // Now find the insertion point. afterBlk = fgFindInsertPoint(regionIndex, putInTryRegion, startBlk, endBlk, nearBlk, nullptr, runRarely); + // If afterBlk is nullptr, we must be inserting into a single-block filter region. Because the CLR ABI requires + // that control exits a filter via the last instruction in the filter range, this situation requires logically + // splitting the single block. In practice, we simply insert a new block at the beginning of the filter region + // that transfers control flow to the existing single block. + if (afterBlk == nullptr) + { + assert(putInFilter); + + BasicBlock* newFilterEntryBlock = fgNewBBbefore(BBJ_ALWAYS, startBlk, true); + newFilterEntryBlock->bbJumpDest = startBlk; + fgAddRefPred(startBlk, newFilterEntryBlock); + + afterBlk = newFilterEntryBlock; + } + _FoundAfterBlk:; /* We have decided to insert the block after 'afterBlk'. */ @@ -20508,7 +20561,28 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef } /* Check the bbRefs */ - noway_assert(!checkBBRefs || block->bbRefs == blockRefs); + if (checkBBRefs) + { + if (block->bbRefs != blockRefs) + { + // Check to see if this block is the beginning of a filter or a handler and adjust the ref count + // appropriately. + for (EHblkDsc *HBtab = compHndBBtab, *HBtabEnd = &compHndBBtab[compHndBBtabCount]; HBtab != HBtabEnd; + HBtab++) + { + if (HBtab->ebdHndBeg == block) + { + blockRefs++; + } + if (HBtab->HasFilter() && (HBtab->ebdFilter == block)) + { + blockRefs++; + } + } + } + + assert(block->bbRefs == blockRefs); + } /* Check that BBF_HAS_HANDLER is valid bbTryIndex */ if (block->hasTryIndex()) @@ -21809,8 +21883,8 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* inlineRe noway_assert(opts.OptEnabled(CLFLG_INLINING)); // This is the InlineInfo struct representing a method to be inlined. - InlineInfo inlineInfo = {nullptr}; - + InlineInfo inlineInfo; + memset(&inlineInfo, 0, sizeof(inlineInfo)); CORINFO_METHOD_HANDLE fncHandle = call->gtCallMethHnd; inlineInfo.fncHandle = fncHandle; @@ -21850,7 +21924,8 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* inlineRe CORINFO_METHOD_HANDLE fncHandle; InlineCandidateInfo* inlineCandidateInfo; InlineInfo* inlineInfo; - } param = {nullptr}; + } param; + memset(¶m, 0, sizeof(param)); param.pThis = this; param.call = call; diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index c5733b81e4..a2156d035e 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -450,7 +450,7 @@ bool GenTree::IsNodeProperlySized() const #define BASH_HASH_SIZE 211 -inline hashme(genTreeOps op1, genTreeOps op2) +inline unsigned hashme(genTreeOps op1, genTreeOps op2) { return ((op1 * 104729) ^ (op2 * 56569)) % BASH_HASH_SIZE; } @@ -15026,390 +15026,6 @@ bool Compiler::gtComplexityExceeds(GenTreePtr* tree, unsigned limit) } } -/* -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -XX XX -XX BasicBlock XX -XX XX -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -*/ - -#if MEASURE_BLOCK_SIZE -/* static */ -size_t BasicBlock::s_Size; -/* static */ -size_t BasicBlock::s_Count; -#endif // MEASURE_BLOCK_SIZE - -#ifdef DEBUG -// The max # of tree nodes in any BB -/* static */ -unsigned BasicBlock::s_nMaxTrees; -#endif // DEBUG - -/***************************************************************************** - * - * Allocate a basic block but don't append it to the current BB list. - */ - -BasicBlock* Compiler::bbNewBasicBlock(BBjumpKinds jumpKind) -{ - BasicBlock* block; - - /* Allocate the block descriptor and zero it out */ - assert(fgSafeBasicBlockCreation); - - block = new (this, CMK_BasicBlock) BasicBlock; - -#if MEASURE_BLOCK_SIZE - BasicBlock::s_Count += 1; - BasicBlock::s_Size += sizeof(*block); -#endif - -#ifdef DEBUG - // fgLookupBB() is invalid until fgInitBBLookup() is called again. - fgBBs = (BasicBlock**)0xCDCD; -#endif - - // TODO-Throughput: The following memset is pretty expensive - do something else? - // Note that some fields have to be initialized to 0 (like bbFPStateX87) - memset(block, 0, sizeof(*block)); - - // scopeInfo needs to be able to differentiate between blocks which - // correspond to some instrs (and so may have some LocalVarInfo - // boundaries), or have been inserted by the JIT - block->bbCodeOffs = BAD_IL_OFFSET; - block->bbCodeOffsEnd = BAD_IL_OFFSET; - - /* Give the block a number, set the ancestor count and weight */ - - ++fgBBcount; - - if (compIsForInlining()) - { - block->bbNum = ++impInlineInfo->InlinerCompiler->fgBBNumMax; - } - else - { - block->bbNum = ++fgBBNumMax; - } - -#ifndef LEGACY_BACKEND - if (compRationalIRForm) - { - block->bbFlags |= BBF_IS_LIR; - } -#endif // !LEGACY_BACKEND - - block->bbRefs = 1; - block->bbWeight = BB_UNITY_WEIGHT; - - block->bbStkTempsIn = NO_BASE_TMP; - block->bbStkTempsOut = NO_BASE_TMP; - - block->bbEntryState = nullptr; - - /* Record the jump kind in the block */ - - block->bbJumpKind = jumpKind; - - if (jumpKind == BBJ_THROW) - { - block->bbSetRunRarely(); - } - -#ifdef DEBUG - if (verbose) - { - printf("New Basic Block BB%02u [%p] created.\n", block->bbNum, dspPtr(block)); - } -#endif - - // We will give all the blocks var sets after the number of tracked variables - // is determined and frozen. After that, if we dynamically create a basic block, - // we will initialize its var sets. - if (fgBBVarSetsInited) - { - VarSetOps::AssignNoCopy(this, block->bbVarUse, VarSetOps::MakeEmpty(this)); - VarSetOps::AssignNoCopy(this, block->bbVarDef, VarSetOps::MakeEmpty(this)); - VarSetOps::AssignNoCopy(this, block->bbLiveIn, VarSetOps::MakeEmpty(this)); - VarSetOps::AssignNoCopy(this, block->bbLiveOut, VarSetOps::MakeEmpty(this)); - VarSetOps::AssignNoCopy(this, block->bbScope, VarSetOps::MakeEmpty(this)); - } - else - { - VarSetOps::AssignNoCopy(this, block->bbVarUse, VarSetOps::UninitVal()); - VarSetOps::AssignNoCopy(this, block->bbVarDef, VarSetOps::UninitVal()); - VarSetOps::AssignNoCopy(this, block->bbLiveIn, VarSetOps::UninitVal()); - VarSetOps::AssignNoCopy(this, block->bbLiveOut, VarSetOps::UninitVal()); - VarSetOps::AssignNoCopy(this, block->bbScope, VarSetOps::UninitVal()); - } - - block->bbMemoryUse = emptyMemoryKindSet; - block->bbMemoryDef = emptyMemoryKindSet; - block->bbMemoryLiveIn = emptyMemoryKindSet; - block->bbMemoryLiveOut = emptyMemoryKindSet; - - for (MemoryKind memoryKind : allMemoryKinds()) - { - block->bbMemorySsaPhiFunc[memoryKind] = nullptr; - block->bbMemorySsaNumIn[memoryKind] = 0; - block->bbMemorySsaNumOut[memoryKind] = 0; - } - - // Make sure we reserve a NOT_IN_LOOP value that isn't a legal table index. - static_assert_no_msg(MAX_LOOP_NUM < BasicBlock::NOT_IN_LOOP); - - block->bbNatLoopNum = BasicBlock::NOT_IN_LOOP; - - return block; -} - -//------------------------------------------------------------------------------ -// containsStatement - return true if the block contains the given statement -//------------------------------------------------------------------------------ - -bool BasicBlock::containsStatement(GenTree* statement) -{ - assert(statement->gtOper == GT_STMT); - - GenTree* curr = bbTreeList; - do - { - if (curr == statement) - { - break; - } - curr = curr->gtNext; - } while (curr); - return curr != nullptr; -} - -GenTreeStmt* BasicBlock::FirstNonPhiDef() -{ - GenTreePtr stmt = bbTreeList; - if (stmt == nullptr) - { - return nullptr; - } - GenTreePtr tree = stmt->gtStmt.gtStmtExpr; - while ((tree->OperGet() == GT_ASG && tree->gtOp.gtOp2->OperGet() == GT_PHI) || - (tree->OperGet() == GT_STORE_LCL_VAR && tree->gtOp.gtOp1->OperGet() == GT_PHI)) - { - stmt = stmt->gtNext; - if (stmt == nullptr) - { - return nullptr; - } - tree = stmt->gtStmt.gtStmtExpr; - } - return stmt->AsStmt(); -} - -GenTreePtr BasicBlock::FirstNonPhiDefOrCatchArgAsg() -{ - GenTreePtr stmt = FirstNonPhiDef(); - if (stmt == nullptr) - { - return nullptr; - } - GenTreePtr tree = stmt->gtStmt.gtStmtExpr; - if ((tree->OperGet() == GT_ASG && tree->gtOp.gtOp2->OperGet() == GT_CATCH_ARG) || - (tree->OperGet() == GT_STORE_LCL_VAR && tree->gtOp.gtOp1->OperGet() == GT_CATCH_ARG)) - { - stmt = stmt->gtNext; - } - return stmt; -} - -/***************************************************************************** - * - * Mark a block as rarely run, we also don't want to have a loop in a - * rarely run block, and we set it's weight to zero. - */ - -void BasicBlock::bbSetRunRarely() -{ - setBBWeight(BB_ZERO_WEIGHT); - if (bbWeight == BB_ZERO_WEIGHT) - { - bbFlags |= BBF_RUN_RARELY; // This block is never/rarely run - } -} - -/***************************************************************************** - * - * Can a BasicBlock be inserted after this without altering the flowgraph - */ - -bool BasicBlock::bbFallsThrough() -{ - switch (bbJumpKind) - { - - case BBJ_THROW: - case BBJ_EHFINALLYRET: - case BBJ_EHFILTERRET: - case BBJ_EHCATCHRET: - case BBJ_RETURN: - case BBJ_ALWAYS: - case BBJ_LEAVE: - case BBJ_SWITCH: - return false; - - case BBJ_NONE: - case BBJ_COND: - return true; - - case BBJ_CALLFINALLY: - return ((bbFlags & BBF_RETLESS_CALL) == 0); - - default: - assert(!"Unknown bbJumpKind in bbFallsThrough()"); - return true; - } -} - -unsigned BasicBlock::NumSucc(Compiler* comp) -{ - // As described in the spec comment of NumSucc at its declaration, whether "comp" is null determines - // whether NumSucc and GetSucc yield successors of finally blocks. - - switch (bbJumpKind) - { - - case BBJ_THROW: - case BBJ_RETURN: - return 0; - - case BBJ_EHFILTERRET: - if (comp == nullptr) - { - return 0; - } - else - { - return 1; - } - - case BBJ_EHFINALLYRET: - { - if (comp == nullptr) - { - return 0; - } - else - { - // The first block of the handler is labelled with the catch type. - BasicBlock* hndBeg = comp->fgFirstBlockOfHandler(this); - if (hndBeg->bbCatchTyp == BBCT_FINALLY) - { - return comp->fgNSuccsOfFinallyRet(this); - } - else - { - assert(hndBeg->bbCatchTyp == BBCT_FAULT); // We can only BBJ_EHFINALLYRET from FINALLY and FAULT. - // A FAULT block has no successors. - return 0; - } - } - } - case BBJ_CALLFINALLY: - case BBJ_ALWAYS: - case BBJ_EHCATCHRET: - case BBJ_LEAVE: - case BBJ_NONE: - return 1; - case BBJ_COND: - if (bbJumpDest == bbNext) - { - return 1; - } - else - { - return 2; - } - case BBJ_SWITCH: - if (comp == nullptr) - { - return bbJumpSwt->bbsCount; - } - else - { - Compiler::SwitchUniqueSuccSet sd = comp->GetDescriptorForSwitch(this); - return sd.numDistinctSuccs; - } - - default: - unreached(); - } -} - -BasicBlock* BasicBlock::GetSucc(unsigned i, Compiler* comp) -{ - // As described in the spec comment of GetSucc at its declaration, whether "comp" is null determines - // whether NumSucc and GetSucc yield successors of finally blocks. - - assert(i < NumSucc(comp)); // Index bounds check. - // printf("bbjk=%d\n", bbJumpKind); - switch (bbJumpKind) - { - - case BBJ_THROW: - case BBJ_RETURN: - unreached(); // Should have been covered by assert above. - - case BBJ_EHFILTERRET: - { - assert(comp != nullptr); // Or else we're not looking for successors. - BasicBlock* result = comp->fgFirstBlockOfHandler(this); - noway_assert(result == bbJumpDest); - // Handler is the (sole) normal successor of the filter. - return result; - } - - case BBJ_EHFINALLYRET: - return comp->fgSuccOfFinallyRet(this, i); - - case BBJ_CALLFINALLY: - case BBJ_ALWAYS: - case BBJ_EHCATCHRET: - case BBJ_LEAVE: - return bbJumpDest; - - case BBJ_NONE: - return bbNext; - case BBJ_COND: - if (i == 0) - { - return bbNext; - } - else - { - assert(i == 1); - return bbJumpDest; - }; - case BBJ_SWITCH: - if (comp == nullptr) - { - assert(i < bbJumpSwt->bbsCount); // Range check. - return bbJumpSwt->bbsDstTab[i]; - } - else - { - // Remove duplicates. - Compiler::SwitchUniqueSuccSet sd = comp->GetDescriptorForSwitch(this); - assert(i < sd.numDistinctSuccs); // Range check. - return sd.nonDuplicates[i]; - } - - default: - unreached(); - } -} - // ------------------------------------------------------------------------- // IsRegOptional: Returns true if this gentree node is marked by lowering to // indicate that codegen can still generate code even if it wasn't allocated @@ -17403,3 +17019,68 @@ regMaskTP ReturnTypeDesc::GetABIReturnRegs() return resultMask; } + +#ifndef LEGACY_BACKEND + +//------------------------------------------------------------------------ +// The following functions manage the gtRsvdRegs set of temporary registers +// created by LSRA during code generation. + +//------------------------------------------------------------------------ +// AvailableTempRegCount: return the number of available temporary registers in the (optional) given set +// (typically, RBM_ALLINT or RBM_ALLFLOAT). +// +// Arguments: +// mask - (optional) Check for available temporary registers only in this set. +// +// Return Value: +// Count of available temporary registers in given set. +// +unsigned GenTree::AvailableTempRegCount(regMaskTP mask /* = (regMaskTP)-1 */) const +{ + return genCountBits(gtRsvdRegs & mask); +} + +//------------------------------------------------------------------------ +// GetSingleTempReg: There is expected to be exactly one available temporary register +// in the given mask in the gtRsvdRegs set. Get that register. No future calls to get +// a temporary register are expected. Removes the register from the set, but only in +// DEBUG to avoid doing unnecessary work in non-DEBUG builds. +// +// Arguments: +// mask - (optional) Get an available temporary register only in this set. +// +// Return Value: +// Available temporary register in given mask. +// +regNumber GenTree::GetSingleTempReg(regMaskTP mask /* = (regMaskTP)-1 */) +{ + regMaskTP availableSet = gtRsvdRegs & mask; + assert(genCountBits(availableSet) == 1); + regNumber tempReg = genRegNumFromMask(availableSet); + INDEBUG(gtRsvdRegs &= ~availableSet;) // Remove the register from the set, so it can't be used again. + return tempReg; +} + +//------------------------------------------------------------------------ +// ExtractTempReg: Find the lowest number temporary register from the gtRsvdRegs set +// that is also in the optional given mask (typically, RBM_ALLINT or RBM_ALLFLOAT), +// and return it. Remove this register from the temporary register set, so it won't +// be returned again. +// +// Arguments: +// mask - (optional) Extract an available temporary register only in this set. +// +// Return Value: +// Available temporary register in given mask. +// +regNumber GenTree::ExtractTempReg(regMaskTP mask /* = (regMaskTP)-1 */) +{ + regMaskTP availableSet = gtRsvdRegs & mask; + assert(genCountBits(availableSet) >= 1); + regMaskTP tempRegMask = genFindLowestBit(availableSet); + gtRsvdRegs &= ~tempRegMask; + return genRegNumFromMask(tempRegMask); +} + +#endif // !LEGACY_BACKEND
\ No newline at end of file diff --git a/src/jit/gentree.h b/src/jit/gentree.h index 1d52248657..d3a03ee1b7 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -733,6 +733,13 @@ public: ValueNumPair gtVNPair; regMaskSmall gtRsvdRegs; // set of fixed trashed registers + +#ifndef LEGACY_BACKEND + unsigned AvailableTempRegCount(regMaskTP mask = (regMaskTP)-1) const; + regNumber GetSingleTempReg(regMaskTP mask = (regMaskTP)-1); + regNumber ExtractTempReg(regMaskTP mask = (regMaskTP)-1); +#endif // !LEGACY_BACKEND + #ifdef LEGACY_BACKEND regMaskSmall gtUsedRegs; // set of used (trashed) registers #endif // LEGACY_BACKEND diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp index 54427ba4dd..a991598258 100644 --- a/src/jit/importer.cpp +++ b/src/jit/importer.cpp @@ -145,24 +145,6 @@ void Compiler::impPushOnStack(GenTreePtr tree, typeInfo ti) } } -/******************************************************************************/ -// used in the inliner, where we can assume typesafe code. please don't use in the importer!! -inline void Compiler::impPushOnStackNoType(GenTreePtr tree) -{ - assert(verCurrentState.esStackDepth < impStkSize); - INDEBUG(verCurrentState.esStack[verCurrentState.esStackDepth].seTypeInfo = typeInfo()); - verCurrentState.esStack[verCurrentState.esStackDepth++].val = tree; - - if ((tree->gtType == TYP_LONG) && (compLongUsed == false)) - { - compLongUsed = true; - } - else if (((tree->gtType == TYP_FLOAT) || (tree->gtType == TYP_DOUBLE)) && (compFloatingPointUsed == false)) - { - compFloatingPointUsed = true; - } -} - inline void Compiler::impPushNullObjRefOnStack() { impPushOnStack(gtNewIconNode(0, TYP_REF), typeInfo(TI_NULL)); @@ -322,20 +304,6 @@ StackEntry Compiler::impPopStack() return verCurrentState.esStack[--verCurrentState.esStackDepth]; } -StackEntry Compiler::impPopStack(CORINFO_CLASS_HANDLE& structType) -{ - StackEntry ret = impPopStack(); - structType = verCurrentState.esStack[verCurrentState.esStackDepth].seTypeInfo.GetClassHandle(); - return (ret); -} - -GenTreePtr Compiler::impPopStack(typeInfo& ti) -{ - StackEntry ret = impPopStack(); - ti = ret.seTypeInfo; - return (ret.val); -} - /***************************************************************************** * * Peep at n'th (0-based) tree on the top of the stack. @@ -3652,6 +3620,87 @@ GenTreePtr Compiler::impIntrinsic(GenTreePtr newobjThis, retNode = field; break; } + case CORINFO_INTRINSIC_Span_GetItem: + case CORINFO_INTRINSIC_ReadOnlySpan_GetItem: + { + // Have index, stack pointer-to Span<T> s on the stack. Expand to: + // + // For Span<T> + // Comma + // BoundsCheck(index, s->_length) + // s->_pointer + index * sizeof(T) + // + // For ReadOnlySpan<T> + // Comma + // BoundsCheck(index, s->_length) + // *(s->_pointer + index * sizeof(T)) + // + // Signature should show one class type parameter, which + // we need to examine. + assert(sig->sigInst.classInstCount == 1); + CORINFO_CLASS_HANDLE spanElemHnd = sig->sigInst.classInst[0]; + const unsigned elemSize = info.compCompHnd->getClassSize(spanElemHnd); + assert(elemSize > 0); + + const bool isReadOnly = (intrinsicID == CORINFO_INTRINSIC_ReadOnlySpan_GetItem); + + JITDUMP("\nimpIntrinsic: Expanding %sSpan<T>.get_Item, T=%s, sizeof(T)=%u\n", isReadOnly ? "ReadOnly" : "", + info.compCompHnd->getClassName(spanElemHnd), elemSize); + + GenTreePtr index = impPopStack().val; + GenTreePtr ptrToSpan = impPopStack().val; + GenTreePtr indexClone = nullptr; + GenTreePtr ptrToSpanClone = nullptr; + +#if defined(DEBUG) + if (verbose) + { + printf("with ptr-to-span\n"); + gtDispTree(ptrToSpan); + printf("and index\n"); + gtDispTree(index); + } +#endif // defined(DEBUG) + + // We need to use both index and ptr-to-span twice, so clone or spill. + index = impCloneExpr(index, &indexClone, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Span.get_Item index")); + ptrToSpan = impCloneExpr(ptrToSpan, &ptrToSpanClone, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Span.get_Item ptrToSpan")); + + // Bounds check + CORINFO_FIELD_HANDLE lengthHnd = info.compCompHnd->getFieldInClass(clsHnd, 1); + const unsigned lengthOffset = info.compCompHnd->getFieldOffset(lengthHnd); + GenTreePtr length = gtNewFieldRef(TYP_INT, lengthHnd, ptrToSpan, lengthOffset, false); + GenTreePtr boundsCheck = new (this, GT_ARR_BOUNDS_CHECK) + GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, index, length, SCK_RNGCHK_FAIL); + + // Element access + GenTreePtr indexIntPtr = impImplicitIorI4Cast(indexClone, TYP_I_IMPL); + GenTreePtr sizeofNode = gtNewIconNode(elemSize); + GenTreePtr mulNode = gtNewOperNode(GT_MUL, TYP_I_IMPL, indexIntPtr, sizeofNode); + CORINFO_FIELD_HANDLE ptrHnd = info.compCompHnd->getFieldInClass(clsHnd, 0); + const unsigned ptrOffset = info.compCompHnd->getFieldOffset(ptrHnd); + GenTreePtr data = gtNewFieldRef(TYP_BYREF, ptrHnd, ptrToSpanClone, ptrOffset, false); + GenTreePtr result = gtNewOperNode(GT_ADD, TYP_BYREF, data, mulNode); + + // Prepare result + var_types resultType = JITtype2varType(sig->retType); + + if (isReadOnly) + { + result = gtNewOperNode(GT_IND, resultType, result); + } + else + { + assert(resultType == result->TypeGet()); + } + + retNode = gtNewOperNode(GT_COMMA, resultType, boundsCheck, result); + + break; + } + default: /* Unknown intrinsic */ break; @@ -5093,8 +5142,9 @@ void Compiler::impImportAndPushBox(CORINFO_RESOLVED_TOKEN* pResolvedToken) impSpillSpecialSideEff(); // Now get the expression to box from the stack. - CORINFO_CLASS_HANDLE operCls; - GenTreePtr exprToBox = impPopStack(operCls).val; + StackEntry se = impPopStack(); + CORINFO_CLASS_HANDLE operCls = se.seTypeInfo.GetClassHandle(); + GenTreePtr exprToBox = se.val; CorInfoHelpFunc boxHelper = info.compCompHnd->getBoxHelper(pResolvedToken->hClass); if (boxHelper == CORINFO_HELP_BOX) @@ -6166,6 +6216,12 @@ bool Compiler::impTailCallRetTypeCompatible(var_types callerRetType, return true; } + // If the class handles are the same and not null, the return types are compatible. + if ((callerRetTypeClass != nullptr) && (callerRetTypeClass == calleeRetTypeClass)) + { + return true; + } + #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) // Jit64 compat: if (callerRetType == TYP_VOID) @@ -6271,7 +6327,7 @@ bool Compiler::impIsTailCallILPattern(bool tailPrefixed, int cntPop = 0; OPCODE nextOpcode; -#ifdef _TARGET_AMD64_ +#if !defined(FEATURE_CORECLR) && defined(_TARGET_AMD64_) do { nextOpcode = (OPCODE)getU1LittleEndian(codeAddrOfNextOpcode); @@ -6282,7 +6338,7 @@ bool Compiler::impIsTailCallILPattern(bool tailPrefixed, // one pop seen so far. #else nextOpcode = (OPCODE)getU1LittleEndian(codeAddrOfNextOpcode); -#endif +#endif // !FEATURE_CORECLR && _TARGET_AMD64_ if (isCallPopAndRet) { @@ -6290,15 +6346,15 @@ bool Compiler::impIsTailCallILPattern(bool tailPrefixed, *isCallPopAndRet = (nextOpcode == CEE_RET) && (cntPop == 1); } -#ifdef _TARGET_AMD64_ +#if !defined(FEATURE_CORECLR) && defined(_TARGET_AMD64_) // Jit64 Compat: // Tail call IL pattern could be either of the following // 1) call/callvirt/calli + ret // 2) call/callvirt/calli + pop + ret in a method returning void. return (nextOpcode == CEE_RET) && ((cntPop == 0) || ((cntPop == 1) && (info.compRetType == TYP_VOID))); -#else //!_TARGET_AMD64_ +#else return (nextOpcode == CEE_RET) && (cntPop == 0); -#endif +#endif // !FEATURE_CORECLR && _TARGET_AMD64_ } /***************************************************************************** @@ -10052,7 +10108,8 @@ void Compiler::impImportBlockCode(BasicBlock* block) /* Pop the value being assigned */ { - StackEntry se = impPopStack(clsHnd); + StackEntry se = impPopStack(); + clsHnd = se.seTypeInfo.GetClassHandle(); op1 = se.val; tiRetVal = se.seTypeInfo; } @@ -11916,14 +11973,12 @@ void Compiler::impImportBlockCode(BasicBlock* block) break; case CEE_POP: - if (tiVerificationNeeded) - { - impStackTop(0); - } - + { /* Pull the top value from the stack */ - op1 = impPopStack(clsHnd).val; + StackEntry se = impPopStack(); + clsHnd = se.seTypeInfo.GetClassHandle(); + op1 = se.val; /* Get hold of the type of the value being duplicated */ @@ -11974,10 +12029,11 @@ void Compiler::impImportBlockCode(BasicBlock* block) } /* No side effects - just throw the <BEEP> thing away */ - break; + } + break; case CEE_DUP: - + { if (tiVerificationNeeded) { // Dup could start the begining of delegate creation sequence, remember that @@ -11988,7 +12044,9 @@ void Compiler::impImportBlockCode(BasicBlock* block) // If the expression to dup is simple, just clone it. // Otherwise spill it to a temp, and reload the temp // twice. - op1 = impPopStack(tiRetVal); + StackEntry se = impPopStack(); + tiRetVal = se.seTypeInfo; + op1 = se.val; if (!opts.compDbgCode && !op1->IsIntegralConst(0) && !op1->IsFPZero() && !op1->IsLocal()) { @@ -12010,8 +12068,8 @@ void Compiler::impImportBlockCode(BasicBlock* block) assert(!(op1->gtFlags & GTF_GLOB_EFFECT) && !(op2->gtFlags & GTF_GLOB_EFFECT)); impPushOnStack(op1, tiRetVal); impPushOnStack(op2, tiRetVal); - - break; + } + break; case CEE_STIND_I1: lclTyp = TYP_BYTE; @@ -12928,8 +12986,10 @@ void Compiler::impImportBlockCode(BasicBlock* block) if (opcode == CEE_LDFLD || opcode == CEE_LDFLDA) { - tiObj = &impStackTop().seTypeInfo; - obj = impPopStack(objType).val; + tiObj = &impStackTop().seTypeInfo; + StackEntry se = impPopStack(); + objType = se.seTypeInfo.GetClassHandle(); + obj = se.val; if (impIsThis(obj)) { @@ -13311,8 +13371,10 @@ void Compiler::impImportBlockCode(BasicBlock* block) typeInfo tiVal; /* Pull the value from the stack */ - op2 = impPopStack(tiVal); - clsHnd = tiVal.GetClassHandle(); + StackEntry se = impPopStack(); + op2 = se.val; + tiVal = se.seTypeInfo; + clsHnd = tiVal.GetClassHandle(); if (opcode == CEE_STFLD) { @@ -14552,7 +14614,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) op1 = gtNewOperNode(GT_IND, TYP_REF, op1); op1->gtFlags |= GTF_EXCEPT | GTF_GLOB_REF; - impPushOnStackNoType(op1); + impPushOnStack(op1, typeInfo()); opcode = CEE_STIND_REF; lclTyp = TYP_REF; goto STIND_POST_VERIFY; @@ -14995,7 +15057,7 @@ void Compiler::impMarkLclDstNotPromotable(unsigned tmpNum, GenTreePtr src, CORIN GenTreePtr Compiler::impAssignMultiRegTypeToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass) { unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for multireg return.")); - impAssignTempGen(tmpNum, op, hClass, (unsigned)CHECK_SPILL_NONE); + impAssignTempGen(tmpNum, op, hClass, (unsigned)CHECK_SPILL_ALL); GenTreePtr ret = gtNewLclvNode(tmpNum, op->gtType); // TODO-1stClassStructs: Handle constant propagation and CSE-ing of multireg returns. @@ -15050,7 +15112,8 @@ bool Compiler::impReturnInstruction(BasicBlock* block, int prefixFlags, OPCODE& if (info.compRetType != TYP_VOID) { - StackEntry se = impPopStack(retClsHnd); + StackEntry se = impPopStack(); + retClsHnd = se.seTypeInfo.GetClassHandle(); op2 = se.val; if (!compIsForInlining()) @@ -15398,14 +15461,14 @@ bool Compiler::impReturnInstruction(BasicBlock* block, int prefixFlags, OPCODE& // We must have imported a tailcall and jumped to RET if (prefixFlags & PREFIX_TAILCALL) { -#ifndef _TARGET_AMD64_ +#if defined(FEATURE_CORECLR) || !defined(_TARGET_AMD64_) // Jit64 compat: // This cannot be asserted on Amd64 since we permit the following IL pattern: // tail.call // pop // ret assert(verCurrentState.esStackDepth == 0 && impOpcodeIsCallOpcode(opcode)); -#endif +#endif // FEATURE_CORECLR || !_TARGET_AMD64_ opcode = CEE_RET; // To prevent trying to spill if CALL_SITE_BOUNDARIES @@ -15450,7 +15513,8 @@ inline void Compiler::impReimportMarkBlock(BasicBlock* block) void Compiler::impReimportMarkSuccessors(BasicBlock* block) { - for (unsigned i = 0; i < block->NumSucc(); i++) + const unsigned numSuccs = block->NumSucc(); + for (unsigned i = 0; i < numSuccs; i++) { impReimportMarkBlock(block->GetSucc(i)); } @@ -15625,7 +15689,8 @@ void Compiler::impImportBlock(BasicBlock* block) JITDUMP("Marking BBF_INTERNAL block BB%02u as BBF_IMPORTED\n", block->bbNum); block->bbFlags |= BBF_IMPORTED; - for (unsigned i = 0; i < block->NumSucc(); i++) + const unsigned numSuccs = block->NumSucc(); + for (unsigned i = 0; i < numSuccs; i++) { impImportBlockPending(block->GetSucc(i)); } @@ -16052,7 +16117,8 @@ SPILLSTACK: impReimportSpillClique(block); // For blocks that haven't been imported yet, we still need to mark them as pending import. - for (unsigned i = 0; i < block->NumSucc(); i++) + const unsigned numSuccs = block->NumSucc(); + for (unsigned i = 0; i < numSuccs; i++) { BasicBlock* succ = block->GetSucc(i); if ((succ->bbFlags & BBF_IMPORTED) == 0) @@ -16066,7 +16132,8 @@ SPILLSTACK: // otherwise just import the successors of block /* Does this block jump to any other blocks? */ - for (unsigned i = 0; i < block->NumSucc(); i++) + const unsigned numSuccs = block->NumSucc(); + for (unsigned i = 0; i < numSuccs; i++) { impImportBlockPending(block->GetSucc(i)); } @@ -16323,7 +16390,8 @@ void Compiler::impWalkSpillCliqueFromPred(BasicBlock* block, SpillCliqueWalker* BasicBlock* blk = node->m_blk; FreeBlockListNode(node); - for (unsigned succNum = 0; succNum < blk->NumSucc(); succNum++) + const unsigned numSuccs = blk->NumSucc(); + for (unsigned succNum = 0; succNum < numSuccs; succNum++) { BasicBlock* succ = blk->GetSucc(succNum); // If it's not already in the clique, add it, and also add it @@ -17107,7 +17175,8 @@ void Compiler::impCheckCanInline(GenTreePtr call, CORINFO_CONTEXT_HANDLE exactContextHnd; InlineResult* result; InlineCandidateInfo** ppInlineCandidateInfo; - } param = {nullptr}; + } param; + memset(¶m, 0, sizeof(param)); param.pThis = this; param.call = call; diff --git a/src/jit/instr.cpp b/src/jit/instr.cpp index e2435cab28..5bbfdde3bc 100644 --- a/src/jit/instr.cpp +++ b/src/jit/instr.cpp @@ -244,8 +244,15 @@ void CodeGen::inst_JMP(emitJumpKind jmp, BasicBlock* tgtBlock) // // Thus only on x86 do we need to assert that the stack level at the target block matches the current stack level. // - assert(tgtBlock->bbTgtStkDepth * sizeof(int) == genStackLevel || compiler->rpFrameType != FT_ESP_FRAME); + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef UNIX_X86_ABI + // bbTgtStkDepth is a (pure) argument count (stack alignment padding should be excluded). + assert((tgtBlock->bbTgtStkDepth * sizeof(int) == (genStackLevel - curNestedAlignment)) || isFramePointerUsed()); +#else + assert((tgtBlock->bbTgtStkDepth * sizeof(int) == genStackLevel) || isFramePointerUsed()); #endif +#endif // !FEATURE_FIXED_OUT_ARGS getEmitter()->emitIns_J(emitter::emitJumpKindToIns(jmp), tgtBlock); } diff --git a/src/jit/jit.h b/src/jit/jit.h index ee3f8c9117..5120449411 100644 --- a/src/jit/jit.h +++ b/src/jit/jit.h @@ -276,14 +276,14 @@ #define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(x) #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) -#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || (defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)) +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || (!defined(_TARGET_64BIT_) && !defined(LEGACY_BACKEND)) #define FEATURE_PUT_STRUCT_ARG_STK 1 #define PUT_STRUCT_ARG_STK_ONLY_ARG(x) , x #define PUT_STRUCT_ARG_STK_ONLY(x) x -#else // !(defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)|| (defined(_TARGET_X86_) && !defined(LEGACY_BACKEND))) +#else // !(defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)|| (!defined(_TARGET_64BIT_) && !defined(LEGACY_BACKEND))) #define PUT_STRUCT_ARG_STK_ONLY_ARG(x) #define PUT_STRUCT_ARG_STK_ONLY(x) -#endif // !(defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)|| (defined(_TARGET_X86_) && !defined(LEGACY_BACKEND))) +#endif // !(defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)|| (!defined(_TARGET_64BIT_) && !defined(LEGACY_BACKEND))) #if defined(UNIX_AMD64_ABI) #define UNIX_AMD64_ABI_ONLY_ARG(x) , x diff --git a/src/jit/jitconfigvalues.h b/src/jit/jitconfigvalues.h index 624ad1a191..3657696afc 100644 --- a/src/jit/jitconfigvalues.h +++ b/src/jit/jitconfigvalues.h @@ -279,6 +279,12 @@ CONFIG_STRING(JitTimeLogCsv, W("JitTimeLogCsv")) // If set, gather JIT throughpu // mode must be used in internal retail builds. CONFIG_STRING(TailCallOpt, W("TailCallOpt")) +CONFIG_INTEGER(JitMeasureNowayAssert, W("JitMeasureNowayAssert"), 0) // Set to 1 to measure noway_assert usage. Only + // valid if MEASURE_NOWAY is defined. +CONFIG_STRING(JitMeasureNowayAssertFile, + W("JitMeasureNowayAssertFile")) // Set to file to write noway_assert usage to a file (if not + // set: stdout). Only valid if MEASURE_NOWAY is defined. + #if defined(DEBUG) || defined(INLINE_DATA) CONFIG_INTEGER(JitInlineDumpData, W("JitInlineDumpData"), 0) CONFIG_INTEGER(JitInlineDumpXml, W("JitInlineDumpXml"), 0) // 1 = full xml (all methods), 2 = minimal xml (only method diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp index 5bcb1c8f77..e64b5a1645 100644 --- a/src/jit/lclvars.cpp +++ b/src/jit/lclvars.cpp @@ -1459,7 +1459,7 @@ void Compiler::lvaCanPromoteStructType(CORINFO_CLASS_HANDLE typeHnd, StructPromotionInfo->canPromote = false; unsigned structSize = info.compCompHnd->getClassSize(typeHnd); - if (structSize >= MaxOffset) + if (structSize > MaxOffset) { return; // struct is too large } diff --git a/src/jit/liveness.cpp b/src/jit/liveness.cpp index 47950aee63..d498a6f419 100644 --- a/src/jit/liveness.cpp +++ b/src/jit/liveness.cpp @@ -1167,12 +1167,10 @@ class LiveVarAnalysis } // Additionally, union in all the live-in tracked vars of successors. - AllSuccessorIter succsEnd = block->GetAllSuccs(m_compiler).end(); - for (AllSuccessorIter succs = block->GetAllSuccs(m_compiler).begin(); succs != succsEnd; ++succs) + for (BasicBlock* succ : block->GetAllSuccs(m_compiler)) { - BasicBlock* succ = (*succs); VarSetOps::UnionD(m_compiler, m_liveOut, succ->bbLiveIn); - m_memoryLiveOut |= (*succs)->bbMemoryLiveIn; + m_memoryLiveOut |= succ->bbMemoryLiveIn; if (succ->bbNum <= block->bbNum) { m_hasPossibleBackEdge = true; diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp index e7c1c839d1..3718ddfb8a 100644 --- a/src/jit/lsra.cpp +++ b/src/jit/lsra.cpp @@ -1378,7 +1378,8 @@ void LinearScan::setBlockSequence() assert(!"Switch with single successor"); } - for (unsigned succIndex = 0; succIndex < block->NumSucc(compiler); succIndex++) + const unsigned numSuccs = block->NumSucc(compiler); + for (unsigned succIndex = 0; succIndex < numSuccs; succIndex++) { BasicBlock* succ = block->GetSucc(succIndex, compiler); if (checkForCriticalOutEdge && succ->GetUniquePred(compiler) == nullptr) @@ -4697,11 +4698,13 @@ void LinearScan::buildIntervals() { VarSetOps::DiffD(compiler, expUseSet, nextBlock->bbLiveIn); } - AllSuccessorIter succsEnd = block->GetAllSuccs(compiler).end(); - for (AllSuccessorIter succs = block->GetAllSuccs(compiler).begin(); - succs != succsEnd && !VarSetOps::IsEmpty(compiler, expUseSet); ++succs) + for (BasicBlock* succ : block->GetAllSuccs(compiler)) { - BasicBlock* succ = (*succs); + if (VarSetOps::IsEmpty(compiler, expUseSet)) + { + break; + } + if (isBlockVisited(succ)) { continue; @@ -9676,10 +9679,12 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, // What interval is this register associated with? // (associated with incoming reg) - Interval* sourceIntervals[REG_COUNT] = {nullptr}; + Interval* sourceIntervals[REG_COUNT]; + memset(&sourceIntervals, 0, sizeof(sourceIntervals)); // Intervals for vars that need to be loaded from the stack - Interval* stackToRegIntervals[REG_COUNT] = {nullptr}; + Interval* stackToRegIntervals[REG_COUNT]; + memset(&stackToRegIntervals, 0, sizeof(stackToRegIntervals)); // Get the starting insertion point for the "to" resolution GenTreePtr insertionPoint = nullptr; diff --git a/src/jit/lsraarm.cpp b/src/jit/lsraarm.cpp index e35e57908a..e83f50c051 100644 --- a/src/jit/lsraarm.cpp +++ b/src/jit/lsraarm.cpp @@ -131,7 +131,7 @@ void Lowering::TreeNodeInfoInitLclHeap(GenTree* tree) // // Size? Init Memory? # temp regs // 0 - 0 - // const and <=4 ptr words - hasPspSym ? 1 : 0 + // const and <=4 str instr - hasPspSym ? 1 : 0 // const and <PageSize No hasPspSym ? 1 : 0 // >4 ptr words Yes hasPspSym ? 2 : 1 // Non-const Yes hasPspSym ? 2 : 1 @@ -173,16 +173,12 @@ void Lowering::TreeNodeInfoInitLclHeap(GenTree* tree) } else { - // target (regCnt) + tmp + [psp] - info->internalIntCount = 1; - info->isInternalRegDelayFree = true; + info->internalIntCount = 1; } } else { - // target (regCnt) + tmp + [psp] - info->internalIntCount = 1; - info->isInternalRegDelayFree = true; + info->internalIntCount = 1; } if (hasPspSym) @@ -194,7 +190,13 @@ void Lowering::TreeNodeInfoInitLclHeap(GenTree* tree) else { // target (regCnt) + tmp + [psp] - info->internalIntCount = hasPspSym ? 2 : 1; + info->internalIntCount = hasPspSym ? 2 : 1; + } + + // If we are needed in temporary registers we should be sure that + // it's different from target (regCnt) + if (info->internalIntCount > 0) + { info->isInternalRegDelayFree = true; } } @@ -330,16 +332,33 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) var_types srcType = castOp->TypeGet(); emitAttr cmpSize = EA_ATTR(genTypeSize(srcType)); - // If we cannot store the comparisons in an immediate for either - // comparing against the max or min value, then we will need to - // reserve a temporary register. - - bool canStoreMaxValue = emitter::emitIns_valid_imm_for_cmp(castInfo.typeMax, INS_FLAGS_DONT_CARE); - bool canStoreMinValue = emitter::emitIns_valid_imm_for_cmp(castInfo.typeMin, INS_FLAGS_DONT_CARE); + // If we cannot store data in an immediate for instructions, + // then we will need to reserve a temporary register. - if (!canStoreMaxValue || !canStoreMinValue) + if (!castInfo.signCheckOnly) // In case of only sign check, temp regs are not needeed. { - info->internalIntCount = 1; + if (castInfo.unsignedSource || castInfo.unsignedDest) + { + // check typeMask + bool canStoreTypeMask = emitter::emitIns_valid_imm_for_alu(castInfo.typeMask); + if (!canStoreTypeMask) + { + info->internalIntCount = 1; + } + } + else + { + // For comparing against the max or min value + bool canStoreMaxValue = + emitter::emitIns_valid_imm_for_cmp(castInfo.typeMax, INS_FLAGS_DONT_CARE); + bool canStoreMinValue = + emitter::emitIns_valid_imm_for_cmp(castInfo.typeMin, INS_FLAGS_DONT_CARE); + + if (!canStoreMaxValue || !canStoreMinValue) + { + info->internalIntCount = 1; + } + } } } } @@ -425,7 +444,8 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) if (tree->gtOverflow()) { // Need a register different from target reg to check for overflow. - info->internalIntCount = 2; + info->internalIntCount = 1; + info->isInternalRegDelayFree = true; } __fallthrough; @@ -522,14 +542,10 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) break; case GT_ARR_INDEX: - info->srcCount = 2; - info->dstCount = 1; - - // We need one internal register when generating code for GT_ARR_INDEX, however the - // register allocator always may just give us the same one as it gives us for the 'dst' - // as a workaround we will just ask for two internal registers. - // - info->internalIntCount = 2; + info->srcCount = 2; + info->dstCount = 1; + info->internalIntCount = 1; + info->isInternalRegDelayFree = true; // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple // times while the result is being computed. diff --git a/src/jit/lsraarm64.cpp b/src/jit/lsraarm64.cpp index 3b2d465495..6de00f4bda 100644 --- a/src/jit/lsraarm64.cpp +++ b/src/jit/lsraarm64.cpp @@ -289,7 +289,8 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) if (tree->gtOverflow()) { // Need a register different from target reg to check for overflow. - info->internalIntCount = 2; + info->internalIntCount = 1; + info->isInternalRegDelayFree = true; } __fallthrough; @@ -602,14 +603,10 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) break; case GT_ARR_INDEX: - info->srcCount = 2; - info->dstCount = 1; - - // We need one internal register when generating code for GT_ARR_INDEX, however the - // register allocator always may just give us the same one as it gives us for the 'dst' - // as a workaround we will just ask for two internal registers. - // - info->internalIntCount = 2; + info->srcCount = 2; + info->dstCount = 1; + info->internalIntCount = 1; + info->isInternalRegDelayFree = true; // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple // times while the result is being computed. diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index 92d5e0967e..f63496b686 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -60,7 +60,8 @@ GenTreePtr Compiler::fgMorphCastIntoHelper(GenTreePtr tree, int helper, GenTreeP GenTreePtr Compiler::fgMorphIntoHelperCall(GenTreePtr tree, int helper, GenTreeArgList* args) { - tree->ChangeOper(GT_CALL); + // The helper call ought to be semantically equivalent to the original node, so preserve its VN. + tree->ChangeOper(GT_CALL, GenTree::PRESERVE_VN); tree->gtFlags |= GTF_CALL; if (args) @@ -3384,10 +3385,19 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), TARGET_POINTER_SIZE)) / TARGET_POINTER_SIZE; + if (isHfaArg) + { + hasMultiregStructArgs = true; + } + else if (size > 1 && size <= 4) + { + hasMultiregStructArgs = true; + } } else { // The typical case + // long/double type argument(s) will be changed to GT_FIELD_LIST in lower phase size = genTypeStSz(argx->gtType); } #elif defined(_TARGET_X86_) @@ -3399,7 +3409,8 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) #ifdef _TARGET_ARM_ else if (isHfaArg) { - size = GetHfaCount(argx); + size = GetHfaCount(argx); + hasMultiregStructArgs = true; } #endif // _TARGET_ARM_ else // struct type @@ -3759,14 +3770,25 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) } } -#ifndef _TARGET_X86_ - // TODO-Arm: Does this apply for _TARGET_ARM_, where structs passed by value can be split between - // registers and stack? +#ifdef _TARGET_64BIT_ if (size > 1) { hasMultiregStructArgs = true; } -#endif // !_TARGET_X86_ +#elif defined(_TARGET_ARM_) + // TODO-Arm: Need to handle the case + // where structs passed by value can be split between registers and stack. + if (size > 1 && size <= 4) + { + hasMultiregStructArgs = true; + } +#ifndef LEGACY_BACKEND + else if (size > 4 && passUsingIntRegs) + { + NYI_ARM("Struct can be split between registers and stack"); + } +#endif // !LEGACY_BACKEND +#endif // _TARGET_ARM_ } // The 'size' value has now must have been set. (the original value of zero is an invalid value) @@ -4058,6 +4080,9 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) #ifdef _TARGET_ARM_ if (fltArgRegNum > MAX_FLOAT_REG_ARG) { +#ifndef LEGACY_BACKEND + NYI_ARM("Struct split between float registers and stack"); +#endif // !LEGACY_BACKEND // This indicates a partial enregistration of a struct type assert(varTypeIsStruct(argx)); unsigned numRegsPartial = size - (fltArgRegNum - MAX_FLOAT_REG_ARG); @@ -4087,6 +4112,9 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) #ifdef _TARGET_ARM_ if (intArgRegNum > MAX_REG_ARG) { +#ifndef LEGACY_BACKEND + NYI_ARM("Struct split between integer registers and stack"); +#endif // !LEGACY_BACKEND // This indicates a partial enregistration of a struct type assert((isStructArg) || argx->OperIsCopyBlkOp() || (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG))); @@ -4145,7 +4173,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) // 'Lower' the MKREFANY tree and insert it. noway_assert(!reMorphing); -#ifdef _TARGET_X86_ +#ifndef _TARGET_64BIT_ // Build the mkrefany as a GT_FIELD_LIST GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) @@ -4156,7 +4184,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) fp->node = fieldList; args->gtOp.gtOp1 = fieldList; -#else // !_TARGET_X86_ +#else // _TARGET_64BIT_ // Get a new temp // Here we don't need unsafe value cls check since the addr of temp is used only in mkrefany @@ -4182,7 +4210,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) // EvalArgsToTemps will cause tmp to actually get loaded as the argument call->fgArgInfo->EvalToTmp(argIndex, tmp, asg); lvaSetVarAddrExposed(tmp); -#endif // !_TARGET_X86_ +#endif // _TARGET_64BIT_ } #endif // !LEGACY_BACKEND @@ -4221,7 +4249,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) } } } -#endif // defined (_TARGET_X86_) && !defined(LEGACY_BACKEND) +#endif // _TARGET_X86_ && !LEGACY_BACKEND #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING if (isStructArg && !isRegArg) @@ -4601,14 +4629,10 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) unsigned flagsSummary = 0; fgArgInfoPtr allArgInfo = call->fgArgInfo; - // Currently only ARM64 is using this method to morph the MultiReg struct args - // in the future AMD64_UNIX and for HFAs ARM32, will also use this method - // + // Currently ARM64/ARM is using this method to morph the MultiReg struct args + // in the future AMD64_UNIX will also use this method CLANG_FORMAT_COMMENT_ANCHOR; -#ifdef _TARGET_ARM_ - NYI_ARM("fgMorphMultiregStructArgs"); -#endif #ifdef _TARGET_X86_ assert(!"Logic error: no MultiregStructArgs for X86"); #endif @@ -4704,13 +4728,13 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) // this also forces the struct to be stack allocated into the local frame. // For the GT_OBJ case will clone the address expression and generate two (or more) // indirections. -// Currently the implementation only handles ARM64 and will NYI for other architectures. +// Currently the implementation handles ARM64/ARM and will NYI for other architectures. // GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr fgEntryPtr) { assert(arg->TypeGet() == TYP_STRUCT); -#ifndef _TARGET_ARM64_ +#ifndef _TARGET_ARMARCH_ NYI("fgMorphMultiregStructArg requires implementation for this target"); #endif @@ -4766,21 +4790,36 @@ GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr f } else { +#ifdef _TARGET_ARM64_ assert(structSize <= 2 * TARGET_POINTER_SIZE); +#elif defined(_TARGET_ARM_) + assert(structSize <= 4 * TARGET_POINTER_SIZE); +#endif + +#ifdef _TARGET_ARM64_ BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE}; info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]); elemCount = 2; type[0] = getJitGCType(gcPtrs[0]); type[1] = getJitGCType(gcPtrs[1]); +#elif defined(_TARGET_ARM_) + BYTE gcPtrs[4] = {TYPE_GC_NONE, TYPE_GC_NONE, TYPE_GC_NONE, TYPE_GC_NONE}; + elemCount = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE; + info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]); + for (unsigned inx = 0; inx < elemCount; inx++) + { + type[inx] = getJitGCType(gcPtrs[inx]); + } +#endif // _TARGET_ARM_ if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR)) { - // We can safely widen this to 16 bytes since we are loading from + elemSize = TARGET_POINTER_SIZE; + // We can safely widen this to aligned bytes since we are loading from // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and // lives in the stack frame or will be a promoted field. // - elemSize = TARGET_POINTER_SIZE; - structSize = 2 * TARGET_POINTER_SIZE; + structSize = elemCount * TARGET_POINTER_SIZE; } else // we must have a GT_OBJ { @@ -4788,21 +4827,25 @@ GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr f // We need to load the struct from an arbitrary address // and we can't read past the end of the structSize - // We adjust the second load type here + // We adjust the last load type here // - if (structSize < 2 * TARGET_POINTER_SIZE) + unsigned remainingBytes = structSize % TARGET_POINTER_SIZE; + unsigned lastElem = elemCount - 1; + if (remainingBytes != 0) { - switch (structSize - TARGET_POINTER_SIZE) + switch (remainingBytes) { case 1: - type[1] = TYP_BYTE; + type[lastElem] = TYP_BYTE; break; case 2: - type[1] = TYP_SHORT; + type[lastElem] = TYP_SHORT; break; +#ifdef _TARGET_ARM64_ case 4: - type[1] = TYP_INT; + type[lastElem] = TYP_INT; break; +#endif // _TARGET_ARM64_ default: noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg"); break; @@ -4824,10 +4867,10 @@ GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr f assert(varNum < lvaCount); LclVarDsc* varDsc = &lvaTable[varNum]; - // At this point any TYP_STRUCT LclVar must be a 16-byte struct + // At this point any TYP_STRUCT LclVar must be an aligned struct // or an HFA struct, both which are passed by value. // - assert((varDsc->lvSize() == 2 * TARGET_POINTER_SIZE) || varDsc->lvIsHfa()); + assert((varDsc->lvSize() == elemCount * TARGET_POINTER_SIZE) || varDsc->lvIsHfa()); varDsc->lvIsMultiRegArg = true; @@ -4855,8 +4898,12 @@ GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr f } else { +#ifdef _TARGET_ARM64_ // We must have a 16-byte struct (non-HFA) noway_assert(elemCount == 2); +#elif defined(_TARGET_ARM_) + noway_assert(elemCount <= 4); +#endif for (unsigned inx = 0; inx < elemCount; inx++) { @@ -4878,6 +4925,7 @@ GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr f } } +#ifdef _TARGET_ARM64_ // Is this LclVar a promoted struct with exactly 2 fields? // TODO-ARM64-CQ: Support struct promoted HFA types here if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2) && !varDsc->lvIsHfa()) @@ -4929,6 +4977,78 @@ GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr f // lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField)); } +#elif defined(_TARGET_ARM_) + // Is this LclVar a promoted struct with exactly same size? + if (varDsc->lvPromoted && (varDsc->lvFieldCnt == elemCount) && !varDsc->lvIsHfa()) + { + // See if we have promoted fields? + unsigned varNums[4]; + bool hasBadVarNum = false; + for (unsigned inx = 0; inx < elemCount; inx++) + { + varNums[inx] = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE * inx); + if (varNums[inx] == BAD_VAR_NUM) + { + hasBadVarNum = true; + break; + } + } + + // Did we find the promoted fields at the necessary offsets? + if (!hasBadVarNum) + { + LclVarDsc* varDscs[4]; + var_types varType[4]; + bool varIsFloat = false; + + for (unsigned inx = 0; inx < elemCount; inx++) + { + varDscs[inx] = &lvaTable[varNums[inx]]; + varType[inx] = varDscs[inx]->lvType; + if (varTypeIsFloating(varType[inx])) + { + // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the + // integer + // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered) + // + JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n", + varNum); + // + // we call lvaSetVarDoNotEnregister and do the proper transformation below. + // + varIsFloat = true; + break; + } + } + + if (!varIsFloat) + { + unsigned offset = 0; + GenTreeFieldList* listEntry = nullptr; + // We can use the struct promoted field as arguments + for (unsigned inx = 0; inx < elemCount; inx++) + { + GenTreePtr lclVar = gtNewLclvNode(varNums[inx], varType[inx], varNums[inx]); + // Create a new tree for 'arg' + // replace the existing LDOBJ(ADDR(LCLVAR)) + listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(lclVar, offset, varType[inx], listEntry); + if (newArg == nullptr) + { + newArg = listEntry; + } + offset += TARGET_POINTER_SIZE; + } + } + } + } + else + { + // + // We will create a list of GT_LCL_FLDs nodes to pass this struct + // + lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField)); + } +#endif // _TARGET_ARM_ } // If we didn't set newarg to a new List Node tree @@ -7862,7 +7982,7 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) // the call. GenTreeStmt* nextMorphStmt = fgMorphStmt->gtNextStmt; -#ifdef _TARGET_AMD64_ +#if !defined(FEATURE_CORECLR) && defined(_TARGET_AMD64_) // Legacy Jit64 Compat: // There could be any number of GT_NOPs between tail call and GT_RETURN. // That is tail call pattern could be one of the following: @@ -7929,7 +8049,7 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) fgRemoveStmt(compCurBB, morphStmtToRemove); } } -#endif // _TARGET_AMD64_ +#endif // !FEATURE_CORECLR && _TARGET_AMD64_ // Delete GT_RETURN if any if (nextMorphStmt != nullptr) @@ -11416,6 +11536,20 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) } } + // If gtOp1 is a GT_FIELD, we need to pass down the mac if + // its parent is GT_ADDR, since the address of the field + // is part of an ongoing address computation. Otherwise + // op1 represents the value of the field and so any address + // calculations it does are in a new context. + if ((op1->gtOper == GT_FIELD) && (tree->gtOper != GT_ADDR)) + { + subMac1 = nullptr; + + // The impact of this field's value to any ongoing + // address computation is handled below when looking + // at op2. + } + tree->gtOp.gtOp1 = op1 = fgMorphTree(op1, subMac1); #if LOCAL_ASSERTION_PROP @@ -11496,7 +11630,6 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) // (These are used to convey parent context about how addresses being calculated // will be used; see the specification comment for MorphAddrContext for full details.) // Assume it's an Ind context to start. - MorphAddrContext subIndMac2(MACK_Ind); switch (tree->gtOper) { case GT_ADD: @@ -11517,6 +11650,17 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) default: break; } + + // If gtOp2 is a GT_FIELD, we must be taking its value, + // so it should evaluate its address in a new context. + if (op2->gtOper == GT_FIELD) + { + // The impact of this field's value to any ongoing + // address computation is handled above when looking + // at op1. + mac = nullptr; + } + tree->gtOp.gtOp2 = op2 = fgMorphTree(op2, mac); /* Propagate the side effect flags from op2 */ @@ -13676,20 +13820,20 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree) /* Make sure we have the operator range right */ - noway_assert(GT_SUB == GT_ADD + 1); - noway_assert(GT_MUL == GT_ADD + 2); - noway_assert(GT_DIV == GT_ADD + 3); - noway_assert(GT_MOD == GT_ADD + 4); - noway_assert(GT_UDIV == GT_ADD + 5); - noway_assert(GT_UMOD == GT_ADD + 6); + static_assert(GT_SUB == GT_ADD + 1, "bad oper value"); + static_assert(GT_MUL == GT_ADD + 2, "bad oper value"); + static_assert(GT_DIV == GT_ADD + 3, "bad oper value"); + static_assert(GT_MOD == GT_ADD + 4, "bad oper value"); + static_assert(GT_UDIV == GT_ADD + 5, "bad oper value"); + static_assert(GT_UMOD == GT_ADD + 6, "bad oper value"); - noway_assert(GT_OR == GT_ADD + 7); - noway_assert(GT_XOR == GT_ADD + 8); - noway_assert(GT_AND == GT_ADD + 9); + static_assert(GT_OR == GT_ADD + 7, "bad oper value"); + static_assert(GT_XOR == GT_ADD + 8, "bad oper value"); + static_assert(GT_AND == GT_ADD + 9, "bad oper value"); - noway_assert(GT_LSH == GT_ADD + 10); - noway_assert(GT_RSH == GT_ADD + 11); - noway_assert(GT_RSZ == GT_ADD + 12); + static_assert(GT_LSH == GT_ADD + 10, "bad oper value"); + static_assert(GT_RSH == GT_ADD + 11, "bad oper value"); + static_assert(GT_RSZ == GT_ADD + 12, "bad oper value"); /* Check for a suitable operator on the RHS */ diff --git a/src/jit/optimizer.cpp b/src/jit/optimizer.cpp index c18ebc55d0..710dac540c 100644 --- a/src/jit/optimizer.cpp +++ b/src/jit/optimizer.cpp @@ -7638,6 +7638,15 @@ bool Compiler::optExtractArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lh { return false; } + + // For span we may see gtArrLen is a local var or local field. + // We won't try and extract those. + const genTreeOps arrayOp = arrBndsChk->gtArrLen->gtOper; + + if ((arrayOp == GT_LCL_VAR) || (arrayOp == GT_LCL_FLD)) + { + return false; + } if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR) { return false; diff --git a/src/jit/protononjit/CMakeLists.txt b/src/jit/protononjit/CMakeLists.txt index e209e4cd36..6adf4b1b1b 100644 --- a/src/jit/protononjit/CMakeLists.txt +++ b/src/jit/protononjit/CMakeLists.txt @@ -13,10 +13,12 @@ if (CLR_CMAKE_PLATFORM_ARCH_I386) remove_definitions(-D_TARGET_X86_=1) add_definitions(-D_TARGET_ARM_) set(JIT_ARCH_ALTJIT_SOURCES ${JIT_ARM_SOURCES}) + set(JIT_ARCH_LINK_LIBRARIES gcinfo_arm) elseif(CLR_CMAKE_PLATFORM_ARCH_AMD64) remove_definitions(-D_TARGET_AMD64_=1) add_definitions(-D_TARGET_ARM64_) set(JIT_ARCH_ALTJIT_SOURCES ${JIT_ARM64_SOURCES}) + set(JIT_ARCH_LINK_LIBRARIES gcinfo_arm64) else() clr_unknown_arch() endif() @@ -49,7 +51,7 @@ set_property(TARGET protononjit APPEND_STRING PROPERTY LINK_DEPENDS ${JIT_EXPORT set(RYUJIT_LINK_LIBRARIES utilcodestaticnohost - gcinfo + ${JIT_ARCH_LINK_LIBRARIES} ) if(CLR_CMAKE_PLATFORM_UNIX) diff --git a/src/jit/simdcodegenxarch.cpp b/src/jit/simdcodegenxarch.cpp index 468d302d17..940ba5f9e8 100644 --- a/src/jit/simdcodegenxarch.cpp +++ b/src/jit/simdcodegenxarch.cpp @@ -694,9 +694,7 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) ins = ins_CopyIntToFloat(TYP_INT, TYP_FLOAT); inst_RV_RV(ins, targetReg, op1loReg, TYP_INT, emitTypeSize(TYP_INT)); - assert(simdNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(simdNode->gtRsvdRegs) == 1); - regNumber tmpReg = genRegNumFromMask(simdNode->gtRsvdRegs); + regNumber tmpReg = simdNode->GetSingleTempReg(); regNumber op1hiReg = genConsumeReg(op1hi); ins = ins_CopyIntToFloat(TYP_INT, TYP_FLOAT); @@ -863,9 +861,7 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) // Note that we cannot use targetReg before consumed all source operands. Therefore, // Need an internal register to stitch together all the values into a single vector // in an XMM reg. - assert(simdNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(simdNode->gtRsvdRegs) == 1); - regNumber vectorReg = genRegNumFromMask(simdNode->gtRsvdRegs); + regNumber vectorReg = simdNode->GetSingleTempReg(); // Zero out vectorReg if we are constructing a vector whose size is not equal to targetType vector size. // For example in case of Vector4f we don't need to zero when using SSE2. @@ -992,14 +988,9 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) { // We need a temporary register that is NOT the same as the target, // and we MAY need another. - assert(simdNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(simdNode->gtRsvdRegs) == 2); - - regMaskTP tmpRegsMask = simdNode->gtRsvdRegs; - regMaskTP tmpReg1Mask = genFindLowestBit(tmpRegsMask); - tmpRegsMask &= ~tmpReg1Mask; - regNumber tmpReg = genRegNumFromMask(tmpReg1Mask); - regNumber tmpReg2 = genRegNumFromMask(tmpRegsMask); + regNumber tmpReg = simdNode->ExtractTempReg(); + regNumber tmpReg2 = simdNode->GetSingleTempReg(); + // The register allocator guarantees the following conditions: // - the only registers that may be the same among op1Reg, op2Reg, tmpReg // and tmpReg2 are op1Reg and op2Reg. @@ -1269,7 +1260,7 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) else { // We need one additional SIMD register to store the result of the SIMD compare. - regNumber tmpReg1 = genRegNumFromMask(simdNode->gtRsvdRegs & RBM_ALLFLOAT); + regNumber tmpReg1 = simdNode->GetSingleTempReg(RBM_ALLFLOAT); // tmpReg1 = (op1Reg == op2Reg) // Call this value of tmpReg1 as 'compResult' for further reference below. @@ -1305,7 +1296,7 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) { // If we are not materializing result into a register, // we would have reserved an int type internal register. - intReg = genRegNumFromMask(simdNode->gtRsvdRegs & RBM_ALLINT); + intReg = simdNode->GetSingleTempReg(RBM_ALLINT); } else { @@ -1313,7 +1304,7 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) intReg = targetReg; // Must have not reserved any int type internal registers. - assert(genCountBits(simdNode->gtRsvdRegs & RBM_ALLINT) == 0); + assert(simdNode->AvailableTempRegCount(RBM_ALLINT) == 0); } inst_RV_RV(INS_pmovmskb, intReg, tmpReg1, simdType, emitActualTypeSize(simdType)); @@ -1430,16 +1421,12 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) { if ((compiler->getSIMDInstructionSet() == InstructionSet_SSE2) || (simdEvalType == TYP_SIMD32)) { - assert(simdNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(simdNode->gtRsvdRegs) == 1); - - tmpReg1 = genRegNumFromMask(simdNode->gtRsvdRegs); - assert(tmpReg1 != REG_NA); + tmpReg1 = simdNode->GetSingleTempReg(); assert(tmpReg1 != targetReg); } else { - assert(simdNode->gtRsvdRegs == RBM_NONE); + assert(simdNode->AvailableTempRegCount() == 0); } } else @@ -1449,17 +1436,12 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) if (iset == InstructionSet_SSE3_4) { - // Must have reserved 1 scratch register. - assert(genCountBits(simdNode->gtRsvdRegs) == 1); - tmpReg1 = genRegNumFromMask(simdNode->gtRsvdRegs); + tmpReg1 = simdNode->GetSingleTempReg(); } else { - // Must have reserved 2 scratch registers. - assert(genCountBits(simdNode->gtRsvdRegs) == 2); - regMaskTP tmpRegMask = genFindLowestBit(simdNode->gtRsvdRegs); - tmpReg1 = genRegNumFromMask(tmpRegMask); - tmpReg2 = genRegNumFromMask(simdNode->gtRsvdRegs & ~tmpRegMask); + tmpReg1 = simdNode->ExtractTempReg(); + tmpReg2 = simdNode->GetSingleTempReg(); } } @@ -1803,10 +1785,9 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) } regNumber tmpReg = REG_NA; - if (simdNode->gtRsvdRegs != RBM_NONE) + if (simdNode->AvailableTempRegCount() != 0) { - assert(genCountBits(simdNode->gtRsvdRegs) == 1); - tmpReg = genRegNumFromMask(simdNode->gtRsvdRegs); + tmpReg = simdNode->GetSingleTempReg(); } else { @@ -2011,9 +1992,7 @@ void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode) if (compiler->getSIMDInstructionSet() == InstructionSet_SSE2) { // We need one additional int register as scratch - assert(simdNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(simdNode->gtRsvdRegs) == 1); - regNumber tmpReg = genRegNumFromMask(simdNode->gtRsvdRegs); + regNumber tmpReg = simdNode->GetSingleTempReg(); assert(genIsValidIntReg(tmpReg)); // Move the value from xmm reg to an int reg @@ -2103,9 +2082,7 @@ void CodeGen::genStoreIndTypeSIMD12(GenTree* treeNode) #endif // Need an addtional Xmm register to extract upper 4 bytes from data. - assert(treeNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(treeNode->gtRsvdRegs) == 1); - regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + regNumber tmpReg = treeNode->GetSingleTempReg(); genConsumeOperands(treeNode->AsOp()); @@ -2141,10 +2118,7 @@ void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode) regNumber operandReg = genConsumeReg(op1); // Need an addtional Xmm register to read upper 4 bytes, which is different from targetReg - assert(treeNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(treeNode->gtRsvdRegs) == 1); - - regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + regNumber tmpReg = treeNode->GetSingleTempReg(); assert(tmpReg != targetReg); // Load upper 4 bytes in tmpReg @@ -2188,9 +2162,7 @@ void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode) regNumber operandReg = genConsumeReg(op1); // Need an addtional Xmm register to extract upper 4 bytes from data. - assert(treeNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(treeNode->gtRsvdRegs) == 1); - regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + regNumber tmpReg = treeNode->GetSingleTempReg(); // store lower 8 bytes getEmitter()->emitIns_S_R(ins_Store(TYP_DOUBLE), EA_8BYTE, operandReg, varNum, offs); @@ -2227,12 +2199,8 @@ void CodeGen::genLoadLclTypeSIMD12(GenTree* treeNode) offs = treeNode->gtLclFld.gtLclOffs; } - // Need an additional Xmm register that is different from - // targetReg to read upper 4 bytes. - assert(treeNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(treeNode->gtRsvdRegs) == 1); - - regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + // Need an additional Xmm register that is different from targetReg to read upper 4 bytes. + regNumber tmpReg = treeNode->GetSingleTempReg(); assert(tmpReg != targetReg); // Read upper 4 bytes to tmpReg @@ -2298,9 +2266,7 @@ void CodeGen::genPutArgStkSIMD12(GenTree* treeNode) regNumber operandReg = genConsumeReg(op1); // Need an addtional Xmm register to extract upper 4 bytes from data. - assert(treeNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(treeNode->gtRsvdRegs) == 1); - regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + regNumber tmpReg = treeNode->GetSingleTempReg(); genStoreSIMD12ToStack(operandReg, tmpReg); } diff --git a/src/jit/ssabuilder.cpp b/src/jit/ssabuilder.cpp index 3d74234b26..5cbe12af3f 100644 --- a/src/jit/ssabuilder.cpp +++ b/src/jit/ssabuilder.cpp @@ -1391,11 +1391,8 @@ void SsaBuilder::BlockRenameVariables(BasicBlock* block, SsaRenameState* pRename */ void SsaBuilder::AssignPhiNodeRhsVariables(BasicBlock* block, SsaRenameState* pRenameState) { - BasicBlock::AllSuccs allSuccs = block->GetAllSuccs(m_pCompiler); - AllSuccessorIter allSuccsEnd = allSuccs.end(); - for (AllSuccessorIter allSuccsIter = allSuccs.begin(); allSuccsIter != allSuccsEnd; ++allSuccsIter) + for (BasicBlock* succ : block->GetAllSuccs(m_pCompiler)) { - BasicBlock* succ = (*allSuccsIter); // Walk the statements for phi nodes. for (GenTreePtr stmt = succ->bbTreeList; stmt != nullptr && stmt->IsPhiDefnStmt(); stmt = stmt->gtNext) { diff --git a/src/jit/valuenum.cpp b/src/jit/valuenum.cpp index 03bc204070..5b40122f1e 100644 --- a/src/jit/valuenum.cpp +++ b/src/jit/valuenum.cpp @@ -4371,10 +4371,8 @@ struct ValueNumberState SetVisitBit(blk->bbNum, BVB_complete); - AllSuccessorIter succsEnd = blk->GetAllSuccs(m_comp).end(); - for (AllSuccessorIter succs = blk->GetAllSuccs(m_comp).begin(); succs != succsEnd; ++succs) + for (BasicBlock* succ : blk->GetAllSuccs(m_comp)) { - BasicBlock* succ = (*succs); #ifdef DEBUG_VN_VISIT JITDUMP(" Succ(BB%02u).\n", succ->bbNum); #endif // DEBUG_VN_VISIT |