summaryrefslogtreecommitdiff
path: root/src/jit/lower.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit/lower.cpp')
-rw-r--r--src/jit/lower.cpp724
1 files changed, 449 insertions, 275 deletions
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index a6e50b304c..0316a34a21 100644
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -167,8 +167,13 @@ GenTree* Lowering::LowerNode(GenTree* node)
case GT_STORE_BLK:
case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
- LowerBlockStore(node->AsBlk());
- break;
+ {
+ // TODO-Cleanup: Consider moving this code to LowerBlockStore, which is currently
+ // called from TreeNodeInfoInitBlockStore, and calling that method here.
+ GenTreeBlk* blkNode = node->AsBlk();
+ TryCreateAddrMode(LIR::Use(BlockRange(), &blkNode->Addr(), blkNode), false);
+ }
+ break;
#ifdef FEATURE_SIMD
case GT_SIMD:
@@ -236,20 +241,14 @@ GenTree* Lowering::LowerNode(GenTree* node)
unsigned varNum = node->AsLclVarCommon()->GetLclNum();
LclVarDsc* varDsc = &comp->lvaTable[varNum];
-#if defined(_TARGET_64BIT_)
- assert(varDsc->lvSize() == 16);
- node->gtType = TYP_SIMD16;
-#else // !_TARGET_64BIT_
- if (varDsc->lvSize() == 16)
+ if (comp->lvaMapSimd12ToSimd16(varDsc))
{
+ JITDUMP("Mapping TYP_SIMD12 lclvar node to TYP_SIMD16:\n");
+ DISPNODE(node);
+ JITDUMP("============");
+
node->gtType = TYP_SIMD16;
}
- else
- {
- // The following assert is guaranteed by lvSize().
- assert(varDsc->lvIsParam);
- }
-#endif // !_TARGET_64BIT_
}
#endif // FEATURE_SIMD
__fallthrough;
@@ -549,7 +548,7 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
// If the number of possible destinations is small enough, we proceed to expand the switch
// into a series of conditional branches, otherwise we follow the jump table based switch
// transformation.
- else if (jumpCnt < minSwitchTabJumpCnt)
+ else if ((jumpCnt < minSwitchTabJumpCnt) || comp->compStressCompile(Compiler::STRESS_SWITCH_CMP_BR_EXPANSION, 50))
{
// Lower the switch into a series of compare and branch IR trees.
//
@@ -639,7 +638,7 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
GenTreePtr gtCaseBranch = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, gtCaseCond);
LIR::Range caseRange = LIR::SeqTree(comp, gtCaseBranch);
- currentBBRange->InsertAtEnd(std::move(condRange));
+ currentBBRange->InsertAtEnd(std::move(caseRange));
}
}
@@ -757,16 +756,6 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
GenTreePtr putArg = nullptr;
bool updateArgTable = true;
-#if !defined(_TARGET_64BIT_)
- if (varTypeIsLong(type))
- {
- // For TYP_LONG, we leave the GT_LONG as the arg, and put the putArg below it.
- // Therefore, we don't update the arg table entry.
- updateArgTable = false;
- type = TYP_INT;
- }
-#endif // !defined(_TARGET_64BIT_)
-
bool isOnStack = true;
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
if (varTypeIsStruct(type))
@@ -954,6 +943,11 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots) DEBUGARG(call));
#endif
+#if defined(UNIX_X86_ABI)
+ assert((info->padStkAlign > 0 && info->numSlots > 0) || (info->padStkAlign == 0));
+ putArg->AsPutArgStk()->setArgPadding(info->padStkAlign);
+#endif
+
#ifdef FEATURE_PUT_STRUCT_ARG_STK
// If the ArgTabEntry indicates that this arg is a struct
// get and store the number of slots that are references.
@@ -1084,25 +1078,22 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg)
NYI("Lowering of long register argument");
}
- // For longs, we will create two PUTARG_STKs below the GT_LONG. The hi argument needs to
- // be pushed first, so the hi PUTARG_STK will precede the lo PUTARG_STK in execution order.
+ // For longs, we will replace the GT_LONG with a GT_FIELD_LIST, and put that under a PUTARG_STK.
+ // Although the hi argument needs to be pushed first, that will be handled by the general case,
+ // in which the fields will be reversed.
noway_assert(arg->OperGet() == GT_LONG);
- GenTreePtr argLo = arg->gtGetOp1();
- GenTreePtr argHi = arg->gtGetOp2();
-
- GenTreePtr putArgLo = NewPutArg(call, argLo, info, type);
- GenTreePtr putArgHi = NewPutArg(call, argHi, info, type);
-
- arg->gtOp.gtOp1 = putArgLo;
- arg->gtOp.gtOp2 = putArgHi;
-
- BlockRange().InsertBefore(arg, putArgHi, putArgLo);
-
- // The execution order now looks like this:
- // argLoPrev <-> argLoFirst ... argLo <-> argHiFirst ... argHi <-> putArgHi <-> putArgLo <-> arg(GT_LONG)
-
- assert((arg->gtFlags & GTF_REVERSE_OPS) == 0);
- arg->gtFlags |= GTF_REVERSE_OPS; // We consume the high arg (op2) first.
+ assert(info->numSlots == 2);
+ GenTreePtr argLo = arg->gtGetOp1();
+ GenTreePtr argHi = arg->gtGetOp2();
+ GenTreeFieldList* fieldList = new (comp, GT_FIELD_LIST) GenTreeFieldList(argLo, 0, TYP_INT, nullptr);
+ // Only the first fieldList node (GTF_FIELD_LIST_HEAD) is in the instruction sequence.
+ (void)new (comp, GT_FIELD_LIST) GenTreeFieldList(argHi, 4, TYP_INT, fieldList);
+ putArg = NewPutArg(call, fieldList, info, TYP_VOID);
+
+ // We can't call ReplaceArgWithPutArgOrCopy here because it presumes that we are keeping the original arg.
+ BlockRange().InsertBefore(arg, fieldList, putArg);
+ BlockRange().Remove(arg);
+ *ppArg = putArg;
}
else
#endif // !defined(_TARGET_64BIT_)
@@ -1872,6 +1863,7 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget
bool isClosed;
LIR::ReadOnlyRange secondArgRange = BlockRange().GetTreeRange(arg0, &isClosed);
assert(isClosed);
+ BlockRange().Remove(std::move(secondArgRange));
argEntry->node->gtOp.gtOp1 = callTarget;
@@ -1935,251 +1927,439 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget
}
//------------------------------------------------------------------------
-// Lowering::LowerCompare: lowers a compare node.
-//
-// For 64-bit targets, this doesn't do much of anything: all comparisons
-// that we support can be handled in code generation on such targets.
-//
-// For 32-bit targets, however, any comparison that feeds a `GT_JTRUE`
-// node must be lowered such that the liveness of the operands to the
-// comparison is properly visible to the rest of the backend. As such,
-// a 64-bit comparison is lowered from something like this:
-//
-// ------------ BB02 [004..014) -> BB02 (cond), preds={BB02,BB01} succs={BB03,BB02}
-// N001 ( 1, 1) [000006] ------------ t6 = lclVar int V02 loc0 u:5 $148
-//
-// /--* t6 int
-// N002 ( 2, 3) [000007] ---------U-- t7 = * cast long <- ulong <- uint $3c0
+// Lowering::LowerCompare: Lowers a compare node.
//
-// N003 ( 3, 10) [000009] ------------ t9 = lconst long 0x0000000000000003 $101
-//
-// /--* t7 long
-// +--* t9 long
-// N004 ( 9, 17) [000010] N------N-U-- t10 = * < int $149
-//
-// /--* t10 int
-// N005 ( 11, 19) [000011] ------------ * jmpTrue void
-//
-// To something like this:
-//
-// ------------ BB02 [004..014) -> BB03 (cond), preds={BB06,BB07,BB01} succs={BB06,BB03}
-// [000099] ------------ t99 = const int 0
-//
-// [000101] ------------ t101 = const int 0
-//
-// /--* t99 int
-// +--* t101 int
-// N004 ( 9, 17) [000010] N------N-U-- t10 = * > int $149
-//
-// /--* t10 int
-// N005 ( 11, 19) [000011] ------------ * jmpTrue void
-//
-//
-// ------------ BB06 [???..???) -> BB02 (cond), preds={BB02} succs={BB07,BB02}
-// [000105] -------N-U-- jcc void cond=<
-//
-//
-// ------------ BB07 [???..???) -> BB02 (cond), preds={BB06} succs={BB03,BB02}
-// N001 ( 1, 1) [000006] ------------ t6 = lclVar int V02 loc0 u:5 $148
-//
-// N003 ( 3, 10) [000009] ------------ t9 = const int 3
-//
-// /--* t6 int
-// +--* t9 int
-// [000106] N------N-U-- t106 = * < int
-//
-// /--* t106 int
-// [000107] ------------ * jmpTrue void
-//
-// Which will eventually generate code similar to the following:
-//
-// 33DB xor ebx, ebx
-// 85DB test ebx, ebx
-// 7707 ja SHORT G_M50523_IG04
-// 72E7 jb SHORT G_M50523_IG03
-// 83F803 cmp eax, 3
-// 72E2 jb SHORT G_M50523_IG03
+// Arguments:
+// cmp - the compare node
//
+// Notes:
+// - Decomposes long comparisons that feed a GT_JTRUE (32 bit specific).
+// - Ensures that we don't have a mix of int/long operands (XARCH specific).
+// - Narrow operands to enable memory operand containment (XARCH specific).
+// - Transform cmp(and(x, y), 0) into test(x, y) (XARCH specific but could
+// be used for ARM as well if support for GT_TEST_EQ/GT_TEST_NE is added).
+
void Lowering::LowerCompare(GenTree* cmp)
{
#ifndef _TARGET_64BIT_
- if (cmp->gtGetOp1()->TypeGet() != TYP_LONG)
- {
- return;
- }
-
LIR::Use cmpUse;
- if (!BlockRange().TryGetUse(cmp, &cmpUse) || cmpUse.User()->OperGet() != GT_JTRUE)
+ if ((cmp->gtGetOp1()->TypeGet() == TYP_LONG) && BlockRange().TryGetUse(cmp, &cmpUse) &&
+ cmpUse.User()->OperIs(GT_JTRUE))
{
- return;
- }
+ // For 32-bit targets any comparison that feeds a `GT_JTRUE` node must be lowered such that
+ // the liveness of the operands to the comparison is properly visible to the rest of the
+ // backend. As such, a 64-bit comparison is lowered from something like this:
+ //
+ // ------------ BB02 [004..014) -> BB02 (cond), preds={BB02,BB01} succs={BB03,BB02}
+ // N001 ( 1, 1) [000006] ------------ t6 = lclVar int V02 loc0 u:5 $148
+ //
+ // /--* t6 int
+ // N002 ( 2, 3) [000007] ---------U-- t7 = * cast long <- ulong <- uint $3c0
+ //
+ // N003 ( 3, 10) [000009] ------------ t9 = lconst long 0x0000000000000003 $101
+ //
+ // /--* t7 long
+ // +--* t9 long
+ // N004 ( 9, 17) [000010] N------N-U-- t10 = * < int $149
+ //
+ // /--* t10 int
+ // N005 ( 11, 19) [000011] ------------ * jmpTrue void
+ //
+ // To something like this:
+ //
+ // ------------ BB02 [004..014) -> BB03 (cond), preds={BB06,BB07,BB01} succs={BB06,BB03}
+ // [000099] ------------ t99 = const int 0
+ //
+ // [000101] ------------ t101 = const int 0
+ //
+ // /--* t99 int
+ // +--* t101 int
+ // N004 ( 9, 17) [000010] N------N-U-- t10 = * > int $149
+ //
+ // /--* t10 int
+ // N005 ( 11, 19) [000011] ------------ * jmpTrue void
+ //
+ //
+ // ------------ BB06 [???..???) -> BB02 (cond), preds={BB02} succs={BB07,BB02}
+ // [000105] -------N-U-- jcc void cond=<
+ //
+ //
+ // ------------ BB07 [???..???) -> BB02 (cond), preds={BB06} succs={BB03,BB02}
+ // N001 ( 1, 1) [000006] ------------ t6 = lclVar int V02 loc0 u:5 $148
+ //
+ // N003 ( 3, 10) [000009] ------------ t9 = const int 3
+ //
+ // /--* t6 int
+ // +--* t9 int
+ // [000106] N------N-U-- t106 = * < int
+ //
+ // /--* t106 int
+ // [000107] ------------ * jmpTrue void
+ //
+ // Which will eventually generate code similar to the following:
+ //
+ // 33DB xor ebx, ebx
+ // 85DB test ebx, ebx
+ // 7707 ja SHORT G_M50523_IG04
+ // 72E7 jb SHORT G_M50523_IG03
+ // 83F803 cmp eax, 3
+ // 72E2 jb SHORT G_M50523_IG03
+ //
- GenTree* src1 = cmp->gtGetOp1();
- GenTree* src2 = cmp->gtGetOp2();
- unsigned weight = m_block->getBBWeight(comp);
+ GenTree* src1 = cmp->gtGetOp1();
+ GenTree* src2 = cmp->gtGetOp2();
+ unsigned weight = m_block->getBBWeight(comp);
- LIR::Use loSrc1(BlockRange(), &(src1->gtOp.gtOp1), src1);
- LIR::Use loSrc2(BlockRange(), &(src2->gtOp.gtOp1), src2);
+ LIR::Use loSrc1(BlockRange(), &(src1->gtOp.gtOp1), src1);
+ LIR::Use loSrc2(BlockRange(), &(src2->gtOp.gtOp1), src2);
- if (loSrc1.Def()->OperGet() != GT_CNS_INT && loSrc1.Def()->OperGet() != GT_LCL_VAR)
- {
- loSrc1.ReplaceWithLclVar(comp, weight);
- }
+ // TODO-CQ-32bit: We should move more code to the new basic block, currently we're only moving
+ // constants and lclvars. In particular, it would be nice to move GT_AND nodes as that would
+ // enable the and-cmp to test transform that happens later in this function. Though that's not
+ // exactly ideal, the and-cmp to test transform should run before this code but:
+ // - it would need to run before decomposition otherwise it won't recognize the 0 constant
+ // because after decomposition it is packed in a GT_LONG
+ // - this code would also need to handle GT_TEST_EQ/GT_TEST_NE
- if (loSrc2.Def()->OperGet() != GT_CNS_INT && loSrc2.Def()->OperGet() != GT_LCL_VAR)
- {
- loSrc2.ReplaceWithLclVar(comp, weight);
- }
+ if (!loSrc1.Def()->OperIs(GT_CNS_INT, GT_LCL_VAR))
+ {
+ loSrc1.ReplaceWithLclVar(comp, weight);
+ }
+
+ if (!loSrc2.Def()->OperIs(GT_CNS_INT, GT_LCL_VAR))
+ {
+ loSrc2.ReplaceWithLclVar(comp, weight);
+ }
- BasicBlock* jumpDest = m_block->bbJumpDest;
- BasicBlock* nextDest = m_block->bbNext;
- BasicBlock* newBlock = comp->fgSplitBlockAtEnd(m_block);
+ BasicBlock* jumpDest = m_block->bbJumpDest;
+ BasicBlock* nextDest = m_block->bbNext;
+ BasicBlock* newBlock = comp->fgSplitBlockAtEnd(m_block);
- cmp->gtType = TYP_INT;
- cmp->gtOp.gtOp1 = src1->gtOp.gtOp2;
- cmp->gtOp.gtOp2 = src2->gtOp.gtOp2;
+ cmp->gtType = TYP_INT;
+ cmp->gtOp.gtOp1 = src1->gtOp.gtOp2;
+ cmp->gtOp.gtOp2 = src2->gtOp.gtOp2;
- if (cmp->OperGet() == GT_EQ || cmp->OperGet() == GT_NE)
- {
- // 64-bit equality comparisons (no matter the polarity) require two 32-bit comparisons: one for the upper 32
- // bits and one for the lower 32 bits. As such, we update the flow graph like so:
- //
- // Before:
- // BB0: cond
- // / \
- // false true
- // | |
- // BB1 BB2
- //
- // After:
- // BB0: cond(hi)
- // / \
- // false true
- // | |
- // | BB3: cond(lo)
- // | / \
- // | false true
- // \ / |
- // BB1 BB2
- //
+ if (cmp->OperIs(GT_EQ, GT_NE))
+ {
+ // 64-bit equality comparisons (no matter the polarity) require two 32-bit comparisons: one for the upper 32
+ // bits and one for the lower 32 bits. As such, we update the flow graph like so:
+ //
+ // Before:
+ // BB0: cond
+ // / \
+ // false true
+ // | |
+ // BB1 BB2
+ //
+ // After:
+ // BB0: cond(hi)
+ // / \
+ // false true
+ // | |
+ // | BB3: cond(lo)
+ // | / \
+ // | false true
+ // \ / |
+ // BB1 BB2
+ //
- BlockRange().Remove(loSrc1.Def());
- BlockRange().Remove(loSrc2.Def());
- GenTree* loCmp = comp->gtNewOperNode(cmp->OperGet(), TYP_INT, loSrc1.Def(), loSrc2.Def());
- loCmp->gtFlags = cmp->gtFlags;
- GenTree* loJtrue = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, loCmp);
- LIR::AsRange(newBlock).InsertAfter(nullptr, loSrc1.Def(), loSrc2.Def(), loCmp, loJtrue);
+ BlockRange().Remove(loSrc1.Def());
+ BlockRange().Remove(loSrc2.Def());
+ GenTree* loCmp = comp->gtNewOperNode(cmp->OperGet(), TYP_INT, loSrc1.Def(), loSrc2.Def());
+ loCmp->gtFlags = cmp->gtFlags;
+ GenTree* loJtrue = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, loCmp);
+ LIR::AsRange(newBlock).InsertAfter(nullptr, loSrc1.Def(), loSrc2.Def(), loCmp, loJtrue);
- m_block->bbJumpKind = BBJ_COND;
+ m_block->bbJumpKind = BBJ_COND;
+
+ if (cmp->OperIs(GT_EQ))
+ {
+ cmp->gtOper = GT_NE;
+ m_block->bbJumpDest = nextDest;
+ nextDest->bbFlags |= BBF_JMP_TARGET;
+ comp->fgAddRefPred(nextDest, m_block);
+ }
+ else
+ {
+ m_block->bbJumpDest = jumpDest;
+ comp->fgAddRefPred(jumpDest, m_block);
+ }
- if (cmp->OperGet() == GT_EQ)
+ assert(newBlock->bbJumpKind == BBJ_COND);
+ assert(newBlock->bbJumpDest == jumpDest);
+ }
+ else
{
- cmp->gtOper = GT_NE;
+ // 64-bit ordinal comparisons are more complicated: they require two comparisons for the upper 32 bits and
+ // one comparison for the lower 32 bits. We update the flowgraph as such:
+ //
+ // Before:
+ // BB0: cond
+ // / \
+ // false true
+ // | |
+ // BB1 BB2
+ //
+ // After:
+ // BB0: (!cond(hi) && !eq(hi))
+ // / \
+ // true false
+ // | |
+ // | BB3: (cond(hi) && !eq(hi))
+ // | / \
+ // | false true
+ // | | |
+ // | BB4: cond(lo) |
+ // | / \ |
+ // | false true |
+ // \ / \ /
+ // BB1 BB2
+ //
+ //
+ // Note that the actual comparisons used to implement "(!cond(hi) && !eq(hi))" and "(cond(hi) && !eq(hi))"
+ // differ based on the original condition, and all consist of a single node. The switch statement below
+ // performs the necessary mapping.
+ //
+
+ genTreeOps hiCmpOper;
+ genTreeOps loCmpOper;
+
+ switch (cmp->OperGet())
+ {
+ case GT_LT:
+ cmp->gtOper = GT_GT;
+ hiCmpOper = GT_LT;
+ loCmpOper = GT_LT;
+ break;
+ case GT_LE:
+ cmp->gtOper = GT_GT;
+ hiCmpOper = GT_LT;
+ loCmpOper = GT_LE;
+ break;
+ case GT_GT:
+ cmp->gtOper = GT_LT;
+ hiCmpOper = GT_GT;
+ loCmpOper = GT_GT;
+ break;
+ case GT_GE:
+ cmp->gtOper = GT_LT;
+ hiCmpOper = GT_GT;
+ loCmpOper = GT_GE;
+ break;
+ default:
+ unreached();
+ }
+
+ BasicBlock* newBlock2 = comp->fgSplitBlockAtEnd(newBlock);
+
+ GenTree* hiJcc = new (comp, GT_JCC) GenTreeJumpCC(hiCmpOper);
+ hiJcc->gtFlags = cmp->gtFlags;
+ LIR::AsRange(newBlock).InsertAfter(nullptr, hiJcc);
+
+ BlockRange().Remove(loSrc1.Def());
+ BlockRange().Remove(loSrc2.Def());
+ GenTree* loCmp = comp->gtNewOperNode(loCmpOper, TYP_INT, loSrc1.Def(), loSrc2.Def());
+ loCmp->gtFlags = cmp->gtFlags | GTF_UNSIGNED;
+ GenTree* loJtrue = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, loCmp);
+ LIR::AsRange(newBlock2).InsertAfter(nullptr, loSrc1.Def(), loSrc2.Def(), loCmp, loJtrue);
+
+ m_block->bbJumpKind = BBJ_COND;
m_block->bbJumpDest = nextDest;
nextDest->bbFlags |= BBF_JMP_TARGET;
comp->fgAddRefPred(nextDest, m_block);
+
+ newBlock->bbJumpKind = BBJ_COND;
+ newBlock->bbJumpDest = jumpDest;
+ comp->fgAddRefPred(jumpDest, newBlock);
+
+ assert(newBlock2->bbJumpKind == BBJ_COND);
+ assert(newBlock2->bbJumpDest == jumpDest);
}
- else
+
+ BlockRange().Remove(src1);
+ BlockRange().Remove(src2);
+ }
+#endif
+
+#ifdef _TARGET_XARCH_
+#ifdef _TARGET_AMD64_
+ if (cmp->gtGetOp1()->TypeGet() != cmp->gtGetOp2()->TypeGet())
+ {
+ bool op1Is64Bit = (genTypeSize(cmp->gtGetOp1()->TypeGet()) == 8);
+ bool op2Is64Bit = (genTypeSize(cmp->gtGetOp2()->TypeGet()) == 8);
+
+ if (op1Is64Bit != op2Is64Bit)
{
- m_block->bbJumpDest = jumpDest;
- comp->fgAddRefPred(jumpDest, m_block);
- }
+ //
+ // Normally this should not happen. IL allows comparing int32 to native int but the importer
+ // automatically inserts a cast from int32 to long on 64 bit architectures. However, the JIT
+ // accidentally generates int/long comparisons internally:
+ // - loop cloning compares int (and even small int) index limits against long constants
+ // - switch lowering compares a 64 bit switch value against a int32 constant
+ //
+ // TODO-Cleanup: The above mentioned issues should be fixed and then the code below may be
+ // replaced with an assert or at least simplified. The special casing of constants in code
+ // below is only necessary to prevent worse code generation for switches and loop cloning.
+ //
- assert(newBlock->bbJumpKind == BBJ_COND);
- assert(newBlock->bbJumpDest == jumpDest);
+ GenTree* longOp = op1Is64Bit ? cmp->gtOp.gtOp1 : cmp->gtOp.gtOp2;
+ GenTree** smallerOpUse = op2Is64Bit ? &cmp->gtOp.gtOp1 : &cmp->gtOp.gtOp2;
+ var_types smallerType = (*smallerOpUse)->TypeGet();
+
+ assert(genTypeSize(smallerType) < 8);
+
+ if (longOp->IsCnsIntOrI() && genTypeCanRepresentValue(smallerType, longOp->AsIntCon()->IconValue()))
+ {
+ longOp->gtType = smallerType;
+ }
+ else if ((*smallerOpUse)->IsCnsIntOrI())
+ {
+ (*smallerOpUse)->gtType = TYP_LONG;
+ }
+ else
+ {
+ GenTree* cast = comp->gtNewCastNode(TYP_LONG, *smallerOpUse, TYP_LONG);
+ *smallerOpUse = cast;
+ BlockRange().InsertAfter(cast->gtGetOp1(), cast);
+ }
+ }
}
- else
+#endif // _TARGET_AMD64_
+
+ if (cmp->gtGetOp2()->IsIntegralConst())
{
- // 64-bit ordinal comparisons are more complicated: they require two comparisons for the upper 32 bits and one
- // comparison for the lower 32 bits. We update the flowgraph as such:
- //
- // Before:
- // BB0: cond
- // / \
- // false true
- // | |
- // BB1 BB2
- //
- // After:
- // BB0: (!cond(hi) && !eq(hi))
- // / \
- // true false
- // | |
- // | BB3: (cond(hi) && !eq(hi))
- // | / \
- // | false true
- // | | |
- // | BB4: cond(lo) |
- // | / \ |
- // | false true |
- // \ / \ /
- // BB1 BB2
- //
- //
- // Note that the actual comparisons used to implement "(!cond(hi) && !eq(hi))" and "(cond(hi) && !eq(hi))"
- // differ based on the original condition, and all consist of a single node. The switch statement below
- // performs the necessary mapping.
- //
+ GenTree* op1 = cmp->gtGetOp1();
+ var_types op1Type = op1->TypeGet();
+ GenTreeIntCon* op2 = cmp->gtGetOp2()->AsIntCon();
+ ssize_t op2Value = op2->IconValue();
- genTreeOps hiCmpOper;
- genTreeOps loCmpOper;
+ if (op1->isMemoryOp() && varTypeIsSmall(op1Type) && genTypeCanRepresentValue(op1Type, op2Value))
+ {
+ //
+ // If op1's type is small then try to narrow op2 so it has the same type as op1.
+ // Small types are usually used by memory loads and if both compare operands have
+ // the same type then the memory load can be contained. In certain situations
+ // (e.g "cmp ubyte, 200") we also get a smaller instruction encoding.
+ //
- switch (cmp->OperGet())
+ op2->gtType = op1Type;
+ }
+ else if (op1->OperIs(GT_CAST) && !op1->gtOverflow())
{
- case GT_LT:
- cmp->gtOper = GT_GT;
- hiCmpOper = GT_LT;
- loCmpOper = GT_LT;
- break;
- case GT_LE:
- cmp->gtOper = GT_GT;
- hiCmpOper = GT_LT;
- loCmpOper = GT_LE;
- break;
- case GT_GT:
- cmp->gtOper = GT_LT;
- hiCmpOper = GT_GT;
- loCmpOper = GT_GT;
- break;
- case GT_GE:
- cmp->gtOper = GT_LT;
- hiCmpOper = GT_GT;
- loCmpOper = GT_GE;
- break;
- default:
- unreached();
+ GenTreeCast* cast = op1->AsCast();
+ var_types castToType = cast->CastToType();
+ GenTree* castOp = cast->gtGetOp1();
+
+ if (((castToType == TYP_BOOL) || (castToType == TYP_UBYTE)) && FitsIn<UINT8>(op2Value))
+ {
+ //
+ // Since we're going to remove the cast we need to be able to narrow the cast operand
+ // to the cast type. This can be done safely only for certain opers (e.g AND, OR, XOR).
+ // Some opers just can't be narrowed (e.g DIV, MUL) while other could be narrowed but
+ // doing so would produce incorrect results (e.g. RSZ, RSH).
+ //
+ // The below list of handled opers is conservative but enough to handle the most common
+ // situations. In particular this include CALL, sometimes the JIT unnecessarilly widens
+ // the result of bool returning calls.
+ //
+
+ if (castOp->OperIs(GT_CALL, GT_LCL_VAR) || castOp->OperIsLogical() || castOp->isMemoryOp())
+ {
+ assert(!castOp->gtOverflowEx()); // Must not be an overflow checking operation
+
+ castOp->gtType = castToType;
+ cmp->gtOp.gtOp1 = castOp;
+ op2->gtType = castToType;
+
+ BlockRange().Remove(cast);
+ }
+ }
}
+ else if (op1->OperIs(GT_AND) && cmp->OperIs(GT_EQ, GT_NE))
+ {
+ //
+ // Transform ((x AND y) EQ|NE 0) into (x TEST_EQ|TEST_NE y) when possible.
+ //
- BasicBlock* newBlock2 = comp->fgSplitBlockAtEnd(newBlock);
+ GenTree* andOp1 = op1->gtGetOp1();
+ GenTree* andOp2 = op1->gtGetOp2();
- GenTree* hiJcc = new (comp, GT_JCC) GenTreeJumpCC(hiCmpOper);
- hiJcc->gtFlags = cmp->gtFlags;
- LIR::AsRange(newBlock).InsertAfter(nullptr, hiJcc);
+ if (op2Value != 0)
+ {
+ //
+ // If we don't have a 0 compare we can get one by transforming ((x AND mask) EQ|NE mask)
+ // into ((x AND mask) NE|EQ 0) when mask is a single bit.
+ //
- BlockRange().Remove(loSrc1.Def());
- BlockRange().Remove(loSrc2.Def());
- GenTree* loCmp = comp->gtNewOperNode(loCmpOper, TYP_INT, loSrc1.Def(), loSrc2.Def());
- loCmp->gtFlags = cmp->gtFlags | GTF_UNSIGNED;
- GenTree* loJtrue = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, loCmp);
- LIR::AsRange(newBlock2).InsertAfter(nullptr, loSrc1.Def(), loSrc2.Def(), loCmp, loJtrue);
+ if (isPow2(static_cast<size_t>(op2Value)) && andOp2->IsIntegralConst(op2Value))
+ {
+ op2Value = 0;
+ op2->SetIconValue(0);
+ cmp->SetOperRaw(GenTree::ReverseRelop(cmp->OperGet()));
+ }
+ }
- m_block->bbJumpKind = BBJ_COND;
- m_block->bbJumpDest = nextDest;
- nextDest->bbFlags |= BBF_JMP_TARGET;
- comp->fgAddRefPred(nextDest, m_block);
+ if (op2Value == 0)
+ {
+ BlockRange().Remove(op1);
+ BlockRange().Remove(op2);
- newBlock->bbJumpKind = BBJ_COND;
- newBlock->bbJumpDest = jumpDest;
- comp->fgAddRefPred(jumpDest, newBlock);
+ cmp->SetOperRaw(cmp->OperIs(GT_EQ) ? GT_TEST_EQ : GT_TEST_NE);
+ cmp->gtOp.gtOp1 = andOp1;
+ cmp->gtOp.gtOp2 = andOp2;
- assert(newBlock2->bbJumpKind == BBJ_COND);
- assert(newBlock2->bbJumpDest == jumpDest);
+ if (andOp1->isMemoryOp() && andOp2->IsIntegralConst())
+ {
+ //
+ // For "test" we only care about the bits that are set in the second operand (mask).
+ // If the mask fits in a small type then we can narrow both operands to generate a "test"
+ // instruction with a smaller encoding ("test" does not have a r/m32, imm8 form) and avoid
+ // a widening load in some cases.
+ //
+ // For 16 bit operands we narrow only if the memory operand is already 16 bit. This matches
+ // the behavior of a previous implementation and avoids adding more cases where we generate
+ // 16 bit instructions that require a length changing prefix (0x66). These suffer from
+ // significant decoder stalls on Intel CPUs.
+ //
+ // We could also do this for 64 bit masks that fit into 32 bit but it doesn't help.
+ // In such cases morph narrows down the existing GT_AND by inserting a cast between it and
+ // the memory operand so we'd need to add more code to recognize and eliminate that cast.
+ //
+
+ size_t mask = static_cast<size_t>(andOp2->AsIntCon()->IconValue());
+
+ if (FitsIn<UINT8>(mask))
+ {
+ andOp1->gtType = TYP_UBYTE;
+ andOp2->gtType = TYP_UBYTE;
+ }
+ else if (FitsIn<UINT16>(mask) && genTypeSize(andOp1) == 2)
+ {
+ andOp1->gtType = TYP_CHAR;
+ andOp2->gtType = TYP_CHAR;
+ }
+ }
+ }
+ }
}
- BlockRange().Remove(src1);
- BlockRange().Remove(src2);
-#endif
+ if (cmp->gtGetOp1()->TypeGet() == cmp->gtGetOp2()->TypeGet())
+ {
+ if (varTypeIsSmall(cmp->gtGetOp1()->TypeGet()) && varTypeIsUnsigned(cmp->gtGetOp1()->TypeGet()))
+ {
+ //
+ // If both operands have the same type then codegen will use the common operand type to
+ // determine the instruction type. For small types this would result in performing a
+ // signed comparison of two small unsigned values without zero extending them to TYP_INT
+ // which is incorrect. Note that making the comparison unsigned doesn't imply that codegen
+ // has to generate a small comparison, it can still correctly generate a TYP_INT comparison.
+ //
+
+ cmp->gtFlags |= GTF_UNSIGNED;
+ }
+ }
+#endif // _TARGET_XARCH_
}
// Lower "jmp <method>" tail call to insert PInvoke method epilog if required.
@@ -3498,18 +3678,19 @@ GenTree* Lowering::TryCreateAddrMode(LIR::Use&& use, bool isIndir)
// make sure there are not any side effects between def of leaves and use
if (!doAddrMode || AreSourcesPossiblyModifiedLocals(addr, base, index))
{
- JITDUMP(" No addressing mode\n");
+ JITDUMP("No addressing mode:\n ");
+ DISPNODE(addr);
return addr;
}
GenTreePtr arrLength = nullptr;
JITDUMP("Addressing mode:\n");
- JITDUMP(" Base\n");
+ JITDUMP(" Base\n ");
DISPNODE(base);
if (index != nullptr)
{
- JITDUMP(" + Index * %u + %u\n", scale, offset);
+ JITDUMP(" + Index * %u + %u\n ", scale, offset);
DISPNODE(index);
}
else
@@ -4023,12 +4204,6 @@ void Lowering::LowerStoreInd(GenTree* node)
node->AsStoreInd()->SetRMWStatusDefault();
}
-void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
-{
- GenTree* src = blkNode->Data();
- TryCreateAddrMode(LIR::Use(BlockRange(), &blkNode->Addr(), blkNode), false);
-}
-
//------------------------------------------------------------------------
// LowerArrElem: Lower a GT_ARR_ELEM node
//
@@ -4303,13 +4478,12 @@ void Lowering::DoPhase()
m_block = block;
for (GenTree* node : BlockRange().NonPhiNodes())
{
-/* We increment the number position of each tree node by 2 to
-* simplify the logic when there's the case of a tree that implicitly
-* does a dual-definition of temps (the long case). In this case
-* is easier to already have an idle spot to handle a dual-def instead
-* of making some messy adjustments if we only increment the
-* number position by one.
-*/
+ // We increment the number position of each tree node by 2 to simplify the logic when there's the case of
+ // a tree that implicitly does a dual-definition of temps (the long case). In this case it is easier to
+ // already have an idle spot to handle a dual-def instead of making some messy adjustments if we only
+ // increment the number position by one.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef DEBUG
node->gtSeqNum = currentLoc;
#endif
@@ -4633,13 +4807,8 @@ bool Lowering::NodesAreEquivalentLeaves(GenTreePtr tree1, GenTreePtr tree2)
}
}
-#ifdef _TARGET_64BIT_
/**
* Get common information required to handle a cast instruction
- *
- * Right now only supports 64 bit targets. In order to support 32 bit targets the
- * switch statement needs work.
- *
*/
void Lowering::getCastDescription(GenTreePtr treeNode, CastInfo* castInfo)
{
@@ -4675,7 +4844,6 @@ void Lowering::getCastDescription(GenTreePtr treeNode, CastInfo* castInfo)
bool signCheckOnly = false;
// Do we need to compare the value, or just check masks
-
switch (dstType)
{
default:
@@ -4709,9 +4877,13 @@ void Lowering::getCastDescription(GenTreePtr treeNode, CastInfo* castInfo)
}
else
{
+#ifdef _TARGET_64BIT_
typeMask = 0xFFFFFFFF80000000LL;
- typeMin = INT_MIN;
- typeMax = INT_MAX;
+#else
+ typeMask = 0x80000000;
+#endif
+ typeMin = INT_MIN;
+ typeMax = INT_MAX;
}
break;
@@ -4722,7 +4894,11 @@ void Lowering::getCastDescription(GenTreePtr treeNode, CastInfo* castInfo)
}
else
{
+#ifdef _TARGET_64BIT_
typeMask = 0xFFFFFFFF00000000LL;
+#else
+ typeMask = 0x00000000;
+#endif
}
break;
@@ -4746,8 +4922,6 @@ void Lowering::getCastDescription(GenTreePtr treeNode, CastInfo* castInfo)
}
}
-#endif // _TARGET_64BIT_
-
#ifdef DEBUG
void Lowering::DumpNodeInfoMap()
{