summaryrefslogtreecommitdiff
path: root/src/jit
diff options
context:
space:
mode:
authorMike Danes <onemihaid@hotmail.com>2017-09-17 12:49:10 +0300
committerMike Danes <onemihaid@hotmail.com>2017-10-02 08:30:07 +0300
commit0b637a639e8b8972b0a17322cecb50bf0d99b7c6 (patch)
treed8911bef1bd51c97053e825f324d376efb2c82fd /src/jit
parent9b49593f5ca1b3168a4ff947cbfc3d52a97671cb (diff)
downloadcoreclr-0b637a639e8b8972b0a17322cecb50bf0d99b7c6.tar.gz
coreclr-0b637a639e8b8972b0a17322cecb50bf0d99b7c6.tar.bz2
coreclr-0b637a639e8b8972b0a17322cecb50bf0d99b7c6.zip
Simplify SIMD EQ/NE optimization
Diffstat (limited to 'src/jit')
-rw-r--r--src/jit/codegenxarch.cpp21
-rw-r--r--src/jit/lower.cpp50
-rw-r--r--src/jit/lowerxarch.cpp67
-rw-r--r--src/jit/lsra.cpp2
-rw-r--r--src/jit/lsraxarch.cpp51
-rw-r--r--src/jit/simdcodegenxarch.cpp45
6 files changed, 85 insertions, 151 deletions
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index 58c6c10222..43327afc55 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -6282,27 +6282,6 @@ void CodeGen::genCompareInt(GenTreePtr treeNode)
return;
}
-#ifdef FEATURE_SIMD
- // If we have GT_JTRUE(GT_EQ/NE(GT_SIMD((in)Equality, v1, v2), true/false)),
- // then we don't need to generate code for GT_EQ/GT_NE, since SIMD (in)Equality intrinsic
- // would set or clear Zero flag.
- if ((targetReg == REG_NA) && tree->OperIs(GT_EQ, GT_NE))
- {
- // Is it a SIMD (in)Equality that doesn't need to materialize result into a register?
- if ((op1->gtRegNum == REG_NA) && op1->IsSIMDEqualityOrInequality())
- {
- // Must be comparing against true or false.
- assert(op2->IsIntegralConst(0) || op2->IsIntegralConst(1));
- assert(op2->isContainedIntOrIImmed());
-
- // In this case SIMD (in)Equality will set or clear
- // Zero flag, based on which GT_JTRUE would generate
- // the right conditional jump.
- return;
- }
- }
-#endif // FEATURE_SIMD
-
genConsumeOperands(tree);
assert(!op1->isContainedIntOrIImmed());
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index d82676e540..0b9f9b22f7 100644
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -5606,56 +5606,6 @@ void Lowering::ContainCheckJTrue(GenTreeOp* node)
// The compare does not need to be generated into a register.
GenTree* cmp = node->gtGetOp1();
cmp->gtLsraInfo.isNoRegCompare = true;
-
-#ifdef FEATURE_SIMD
- assert(node->OperIs(GT_JTRUE));
-
- // Say we have the following IR
- // simdCompareResult = GT_SIMD((In)Equality, v1, v2)
- // integerCompareResult = GT_EQ/NE(simdCompareResult, true/false)
- // GT_JTRUE(integerCompareResult)
- //
- // In this case we don't need to generate code for GT_EQ_/NE, since SIMD (In)Equality
- // intrinsic will set or clear the Zero flag.
- genTreeOps cmpOper = cmp->OperGet();
- if (cmpOper == GT_EQ || cmpOper == GT_NE)
- {
- GenTree* cmpOp1 = cmp->gtGetOp1();
- GenTree* cmpOp2 = cmp->gtGetOp2();
-
- if (cmpOp1->IsSIMDEqualityOrInequality() && (cmpOp2->IsIntegralConst(0) || cmpOp2->IsIntegralConst(1)))
- {
- // We always generate code for a SIMD equality comparison, though it produces no value.
- // Neither the GT_JTRUE nor the immediate need to be evaluated.
- MakeSrcContained(cmp, cmpOp2);
- cmpOp1->gtLsraInfo.isNoRegCompare = true;
- // We have to reverse compare oper in the following cases:
- // 1) SIMD Equality: Sets Zero flag on equal otherwise clears it.
- // Therefore, if compare oper is == or != against false(0), we will
- // be checking opposite of what is required.
- //
- // 2) SIMD inEquality: Clears Zero flag on true otherwise sets it.
- // Therefore, if compare oper is == or != against true(1), we will
- // be checking opposite of what is required.
- GenTreeSIMD* simdNode = cmpOp1->AsSIMD();
- if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality)
- {
- if (cmpOp2->IsIntegralConst(0))
- {
- cmp->SetOper(GenTree::ReverseRelop(cmpOper));
- }
- }
- else
- {
- assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality);
- if (cmpOp2->IsIntegralConst(1))
- {
- cmp->SetOper(GenTree::ReverseRelop(cmpOper));
- }
- }
- }
- }
-#endif // FEATURE_SIMD
}
#endif // !LEGACY_BACKEND
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index bb45279887..834cd34fda 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -794,6 +794,73 @@ void Lowering::LowerSIMD(GenTreeSIMD* simdNode)
// the addr of SIMD vector with the given index.
simdNode->gtOp1->gtFlags |= GTF_IND_REQ_ADDR_IN_REG;
}
+ else if (simdNode->IsSIMDEqualityOrInequality())
+ {
+ LIR::Use simdUse;
+
+ if (BlockRange().TryGetUse(simdNode, &simdUse))
+ {
+ //
+ // Try to transform JTRUE(EQ|NE(SIMD<OpEquality|OpInEquality>(x, y), 0|1)) into
+ // JCC(SIMD<OpEquality|OpInEquality>(x, y)). SIMD<OpEquality|OpInEquality>(x, y)
+ // is expected to set the Zero flag appropriately.
+ // All the involved nodes must form a continuous range, there's no other way to
+ // guarantee that condition flags aren't changed between the SIMD node and the JCC
+ // node.
+ //
+
+ bool transformed = false;
+ GenTree* simdUser = simdUse.User();
+
+ if (simdUser->OperIs(GT_EQ, GT_NE) && simdUser->gtGetOp2()->IsCnsIntOrI() &&
+ (simdNode->gtNext == simdUser->gtGetOp2()) && (simdUser->gtGetOp2()->gtNext == simdUser))
+ {
+ ssize_t relopOp2Value = simdUser->gtGetOp2()->AsIntCon()->IconValue();
+
+ if ((relopOp2Value == 0) || (relopOp2Value == 1))
+ {
+ GenTree* jtrue = simdUser->gtNext;
+
+ if ((jtrue != nullptr) && jtrue->OperIs(GT_JTRUE) && (jtrue->gtGetOp1() == simdUser))
+ {
+ if ((simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality) != simdUser->OperIs(GT_EQ))
+ {
+ relopOp2Value ^= 1;
+ }
+
+ jtrue->ChangeOper(GT_JCC);
+ GenTreeCC* jcc = jtrue->AsCC();
+ jcc->gtFlags |= GTF_USE_FLAGS;
+ jcc->gtCondition = (relopOp2Value == 0) ? GT_NE : GT_EQ;
+
+ BlockRange().Remove(simdUser->gtGetOp2());
+ BlockRange().Remove(simdUser);
+ transformed = true;
+ }
+ }
+ }
+
+ if (!transformed)
+ {
+ //
+ // The code generated for SIMD SIMD<OpEquality|OpInEquality>(x, y) nodes sets
+ // the Zero flag like integer compares do so we can simply use SETCC<EQ|NE>
+ // to produce the desired result. This avoids the need for subsequent phases
+ // to have to handle 2 cases (set flags/set destination register).
+ //
+
+ genTreeOps condition = (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality) ? GT_EQ : GT_NE;
+ GenTreeCC* setcc = new (comp, GT_SETCC) GenTreeCC(GT_SETCC, condition, simdNode->TypeGet());
+ setcc->gtFlags |= GTF_USE_FLAGS;
+ BlockRange().InsertAfter(simdNode, setcc);
+ simdUse.ReplaceWith(comp, setcc);
+ }
+ }
+
+ simdNode->gtFlags |= GTF_SET_FLAGS;
+ simdNode->SetUnusedValue();
+ simdNode->gtLsraInfo.isNoRegCompare = true;
+ }
#endif
ContainCheckSIMD(simdNode);
}
diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp
index db6e699134..1ace2f0da6 100644
--- a/src/jit/lsra.cpp
+++ b/src/jit/lsra.cpp
@@ -4842,7 +4842,7 @@ void LinearScan::buildIntervals()
TreeNodeInfoInit(node);
// If the node produces an unused value, mark it as a local def-use
- if (node->IsValue() && node->IsUnusedValue())
+ if (node->IsValue() && node->IsUnusedValue() && !node->gtLsraInfo.isNoRegCompare)
{
node->gtLsraInfo.isLocalDefUse = true;
node->gtLsraInfo.dstCount = 0;
diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp
index 8ac8e235b5..4fff111506 100644
--- a/src/jit/lsraxarch.cpp
+++ b/src/jit/lsraxarch.cpp
@@ -254,20 +254,6 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree)
GenTree* cmp = tree->gtGetOp1();
assert(cmp->gtLsraInfo.dstCount == 0);
-
-#ifdef FEATURE_SIMD
- GenTree* cmpOp1 = cmp->gtGetOp1();
- GenTree* cmpOp2 = cmp->gtGetOp2();
- if (cmpOp1->IsSIMDEqualityOrInequality() && cmpOp2->isContained())
- {
- genTreeOps cmpOper = cmp->OperGet();
-
- // We always generate code for a SIMD equality comparison, but the compare itself produces no value.
- // Neither the SIMD node nor the immediate need to be evaluated into a register.
- assert(cmpOp1->gtLsraInfo.dstCount == 0);
- assert(cmpOp2->gtLsraInfo.dstCount == 0);
- }
-#endif // FEATURE_SIMD
}
break;
@@ -2136,41 +2122,26 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
case SIMDIntrinsicOpEquality:
case SIMDIntrinsicOpInEquality:
-
- // On SSE4/AVX, we can generate optimal code for (in)equality
- // against zero using ptest. We can safely do this optimization
- // for integral vectors but not for floating-point for the reason
- // that we have +0.0 and -0.0 and +0.0 == -0.0
if (simdTree->gtGetOp2()->isContained())
{
+ // If the second operand is contained then ContainCheckSIMD has determined
+ // that PTEST can be used. We only need a single source register and no
+ // internal registers.
info->srcCount = 1;
}
else
{
- info->srcCount = 2;
- // Need one SIMD register as scratch.
- // See genSIMDIntrinsicRelOp() for details on code sequence generated and
- // the need for one scratch register.
- //
- // Note these intrinsics produce a BOOL result, hence internal float
- // registers reserved are guaranteed to be different from target
- // integer register without explicitly specifying.
+ // Can't use PTEST so we need 2 source registers, 1 internal SIMD register
+ // (to hold the result of PCMPEQD or other similar SIMD compare instruction)
+ // and one internal INT register (to hold the result of PMOVMSKB).
+ info->srcCount = 2;
info->internalFloatCount = 1;
info->setInternalCandidates(this, allSIMDRegs());
+ info->internalIntCount = 1;
+ info->addInternalCandidates(this, allRegs(TYP_INT));
}
- if (info->isNoRegCompare)
- {
- info->dstCount = 0;
- // Codegen of SIMD (in)Equality uses target integer reg only for setting flags.
- // A target reg is not needed on AVX when comparing against Vector Zero.
- // In all other cases we need to reserve an int type internal register if we
- // don't have a target register on the compare.
- if (!compiler->canUseAVX() || !simdTree->gtGetOp2()->IsIntegralConstVector(0))
- {
- info->internalIntCount = 1;
- info->addInternalCandidates(this, allRegs(TYP_INT));
- }
- }
+ // These SIMD nodes only set the condition flags.
+ info->dstCount = 0;
break;
case SIMDIntrinsicDotProduct:
diff --git a/src/jit/simdcodegenxarch.cpp b/src/jit/simdcodegenxarch.cpp
index b7c9b29088..429cff0bb3 100644
--- a/src/jit/simdcodegenxarch.cpp
+++ b/src/jit/simdcodegenxarch.cpp
@@ -2049,6 +2049,9 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
case SIMDIntrinsicOpEquality:
case SIMDIntrinsicOpInEquality:
{
+ // We're only setting condition flags, if a 0/1 value is desired then Lowering should have inserted a SETCC.
+ assert(targetReg == REG_NA);
+
var_types simdType = op1->TypeGet();
// TODO-1stClassStructs: Temporary to minimize asmDiffs
if (simdType == TYP_DOUBLE)
@@ -2064,9 +2067,9 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
}
// On SSE4/AVX, we can generate optimal code for (in)equality against zero using ptest.
- if ((compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4) && op2->IsIntegralConstVector(0))
+ if (op2->isContained())
{
- assert(op2->isContained());
+ assert((compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4) && op2->IsIntegralConstVector(0));
inst_RV_RV(INS_ptest, op1->gtRegNum, op1->gtRegNum, simdType, emitActualTypeSize(simdType));
}
else
@@ -2103,22 +2106,7 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
inst_RV_RV(ins, tmpReg1, otherReg, simdType, emitActualTypeSize(simdType));
}
- regNumber intReg;
- if (targetReg == REG_NA)
- {
- // If we are not materializing result into a register,
- // we would have reserved an int type internal register.
- intReg = simdNode->GetSingleTempReg(RBM_ALLINT);
- }
- else
- {
- // We can use targetReg for setting flags.
- intReg = targetReg;
-
- // Must have not reserved any int type internal registers.
- assert(simdNode->AvailableTempRegCount(RBM_ALLINT) == 0);
- }
-
+ regNumber intReg = simdNode->GetSingleTempReg(RBM_ALLINT);
inst_RV_RV(INS_pmovmskb, intReg, tmpReg1, simdType, emitActualTypeSize(simdType));
// There's no pmovmskw/pmovmskd/pmovmskq but they're not needed anyway. Vector compare
// instructions produce "all ones"/"all zeroes" components and pmovmskb extracts a
@@ -2147,27 +2135,6 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
}
getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, intReg, mask);
}
-
- if (targetReg != REG_NA)
- {
- // If we need to materialize result into a register, targetReg needs to
- // be set to 1 on true and zero on false.
- // Equality:
- // cmp targetReg, 0xFFFFFFFF or 0xFFFF
- // sete targetReg
- // movzx targetReg, targetReg
- //
- // InEquality:
- // cmp targetReg, 0xFFFFFFFF or 0xFFFF
- // setne targetReg
- // movzx targetReg, targetReg
- //
- assert(simdNode->TypeGet() == TYP_INT);
- inst_RV((simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality) ? INS_sete : INS_setne, targetReg,
- TYP_INT, EA_1BYTE);
- // Set the higher bytes to 0
- inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
- }
}
break;