diff options
author | Mike Danes <onemihaid@hotmail.com> | 2017-09-18 00:26:58 +0300 |
---|---|---|
committer | Mike Danes <onemihaid@hotmail.com> | 2017-10-02 08:30:14 +0300 |
commit | f1f8cd25e0b79bfdf4e3b7a595fa22bbf7bdae26 (patch) | |
tree | da05e71a0fb6b44833b9b7751b56eb2dc5df137e | |
parent | 0b637a639e8b8972b0a17322cecb50bf0d99b7c6 (diff) | |
download | coreclr-f1f8cd25e0b79bfdf4e3b7a595fa22bbf7bdae26.tar.gz coreclr-f1f8cd25e0b79bfdf4e3b7a595fa22bbf7bdae26.tar.bz2 coreclr-f1f8cd25e0b79bfdf4e3b7a595fa22bbf7bdae26.zip |
Reimplement compare flags reuse using SETCC/JCC
-rw-r--r-- | src/jit/codegenarm64.cpp | 34 | ||||
-rw-r--r-- | src/jit/codegenxarch.cpp | 39 | ||||
-rw-r--r-- | src/jit/compiler.cpp | 2 | ||||
-rw-r--r-- | src/jit/gentree.h | 4 | ||||
-rw-r--r-- | src/jit/lower.cpp | 105 | ||||
-rw-r--r-- | src/jit/lower.h | 2 | ||||
-rw-r--r-- | src/jit/lowerarmarch.cpp | 24 | ||||
-rw-r--r-- | src/jit/lowerxarch.cpp | 38 |
8 files changed, 84 insertions, 164 deletions
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index 8d21ae3b56..cf2d858d3d 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -3298,40 +3298,6 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree) assert(!op1->isUsedFromMemory()); assert(!op2->isUsedFromMemory()); - // Case of op1 == 0 or op1 != 0: - // Optimize generation of 'test' instruction if op1 sets flags. - // - // This behavior is designed to match the unexpected behavior - // of XARCH genCompareInt(); - // - // TODO-Cleanup Review GTF_USE_FLAGS usage - // https://github.com/dotnet/coreclr/issues/14093 - if ((tree->gtFlags & GTF_USE_FLAGS) != 0) - { - // op1 must set flags - assert(op1->gtSetFlags()); - - // Must be compare against zero. - assert(!tree->OperIs(GT_TEST_EQ, GT_TEST_NE)); - assert(op2->IsIntegralConst(0)); - assert(op2->isContained()); - - // Just consume the operands - genConsumeOperands(tree); - - // No need to generate compare instruction since - // op1 sets flags - - // Are we evaluating this into a register? - if (targetReg != REG_NA) - { - genSetRegToCond(targetReg, tree); - genProduceReg(tree); - } - - return; - } - genConsumeOperands(tree); emitAttr cmpSize = EA_ATTR(genTypeSize(op1Type)); diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index 43327afc55..28db51162d 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -6243,45 +6243,6 @@ void CodeGen::genCompareInt(GenTreePtr treeNode) var_types op2Type = op2->TypeGet(); regNumber targetReg = tree->gtRegNum; - // Case of op1 == 0 or op1 != 0: - // Optimize generation of 'test' instruction if op1 sets flags. - // - // TODO-Cleanup Review GTF_USE_FLAGS usage - // https://github.com/dotnet/coreclr/issues/14093 - // - // Note that if LSRA has inserted any GT_RELOAD/GT_COPY before - // op1, it will not modify the flags set by codegen of op1. - // Similarly op1 could also be reg-optional at its use and - // it was spilled after producing its result in a register. - // Spill code too will not modify the flags set by op1. - GenTree* realOp1 = op1->gtSkipReloadOrCopy(); - if ((tree->gtFlags & GTF_USE_FLAGS) != 0) - { - // op1 must set ZF and SF flags - assert(realOp1->gtSetZSFlags()); - assert(realOp1->gtSetFlags()); - - // Must be (in)equality against zero. - assert(tree->OperIs(GT_EQ, GT_NE)); - assert(op2->IsIntegralConst(0)); - assert(op2->isContained()); - - // Just consume the operands - genConsumeOperands(tree); - - // No need to generate test instruction since - // op1 sets flags - - // Are we evaluating this into a register? - if (targetReg != REG_NA) - { - genSetRegToCond(targetReg, tree); - genProduceReg(tree); - } - - return; - } - genConsumeOperands(tree); assert(!op1->isContainedIntOrIImmed()); diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp index 552a6dff94..6118e45489 100644 --- a/src/jit/compiler.cpp +++ b/src/jit/compiler.cpp @@ -9781,11 +9781,11 @@ int cTreeFlagsIR(Compiler* comp, GenTree* tree) { chars += printf("[SPILLED_OP2]"); } -#endif if (tree->gtFlags & GTF_ZSF_SET) { chars += printf("[ZSF_SET]"); } +#endif #if FEATURE_SET_FLAGS if (tree->gtFlags & GTF_SET_FLAGS) { diff --git a/src/jit/gentree.h b/src/jit/gentree.h index 82e5d70b94..e8803e0e1d 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -856,11 +856,11 @@ public: #ifdef LEGACY_BACKEND #define GTF_SPILLED_OPER 0x00000100 // op1 has been spilled #define GTF_SPILLED_OP2 0x00000200 // op2 has been spilled +#define GTF_ZSF_SET 0x00000400 // the zero(ZF) and sign(SF) flags set to the operand #else // !LEGACY_BACKEND #define GTF_NOREG_AT_USE 0x00000100 // tree node is in memory at the point of use #endif // !LEGACY_BACKEND -#define GTF_ZSF_SET 0x00000400 // the zero(ZF) and sign(SF) flags set to the operand #define GTF_SET_FLAGS 0x00000800 // Requires that codegen for this node set the flags. Use gtSetFlags() to check this flag. #define GTF_USE_FLAGS 0x00001000 // Indicates that this node uses the flags bits. @@ -2126,12 +2126,14 @@ public: bool gtSetFlags() const; bool gtRequestSetFlags(); +#ifdef LEGACY_BACKEND // Returns true if the codegen of this tree node // sets ZF and SF flags. bool gtSetZSFlags() const { return (gtFlags & GTF_ZSF_SET) != 0; } +#endif #ifdef DEBUG bool gtIsValid64RsltMul(); diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp index 0b9f9b22f7..f6592f67a4 100644 --- a/src/jit/lower.cpp +++ b/src/jit/lower.cpp @@ -142,16 +142,6 @@ GenTree* Lowering::LowerNode(GenTree* node) ContainCheckBinary(node->AsOp()); break; -#ifdef _TARGET_XARCH_ - case GT_NEG: - // Codegen of this tree node sets ZF and SF flags. - if (!varTypeIsFloating(node)) - { - node->gtFlags |= GTF_ZSF_SET; - } - break; -#endif // _TARGET_XARCH_ - case GT_MUL: case GT_MULHI: #if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) @@ -189,8 +179,7 @@ GenTree* Lowering::LowerNode(GenTree* node) case GT_TEST_NE: case GT_CMP: case GT_JCMP: - LowerCompare(node); - break; + return LowerCompare(node); case GT_JTRUE: ContainCheckJTrue(node->AsOp()); @@ -2148,6 +2137,9 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget // Arguments: // cmp - the compare node // +// Return Value: +// The next node to lower. +// // Notes: // - Decomposes long comparisons that feed a GT_JTRUE (32 bit specific). // - Decomposes long comparisons that produce a value (X86 specific). @@ -2156,8 +2148,11 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget // - Transform cmp(and(x, y), 0) into test(x, y) (XARCH/Arm64 specific but could // be used for ARM as well if support for GT_TEST_EQ/GT_TEST_NE is added). // - Transform TEST(x, LSH(1, y)) into BT(x, y) (XARCH specific) +// - Transform RELOP(OP, 0) into SETCC(OP) or JCC(OP) if OP can set the +// condition flags appropriately (XARCH/ARM64 specific but could be extended +// to ARM32 as well if ARM32 codegen supports GTF_SET_FLAGS). -void Lowering::LowerCompare(GenTree* cmp) +GenTree* Lowering::LowerCompare(GenTree* cmp) { #ifndef _TARGET_64BIT_ if (cmp->gtGetOp1()->TypeGet() == TYP_LONG) @@ -2363,7 +2358,7 @@ void Lowering::LowerCompare(GenTree* cmp) cmp->AsCC()->gtCondition = condition; } - return; + return cmp->gtNext; } #endif @@ -2567,11 +2562,10 @@ void Lowering::LowerCompare(GenTree* cmp) } } } -#endif // defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_) -#ifdef _TARGET_XARCH_ if (cmp->OperIs(GT_TEST_EQ, GT_TEST_NE)) { +#ifdef _TARGET_XARCH_ // // Transform TEST_EQ|NE(x, LSH(1, y)) into BT(x, y) when possible. Using BT // results in smaller and faster code. It also doesn't have special register @@ -2614,10 +2608,76 @@ void Lowering::LowerCompare(GenTree* cmp) cc->gtFlags |= GTF_USE_FLAGS | GTF_UNSIGNED; - return; + return cmp->gtNext; + } +#endif // _TARGET_XARCH_ + } +#ifdef _TARGET_XARCH_ + else if (cmp->OperIs(GT_EQ, GT_NE)) +#else // _TARGET_ARM64_ + else +#endif + { + GenTree* op1 = cmp->gtGetOp1(); + GenTree* op2 = cmp->gtGetOp2(); + + // TODO-CQ: right now the below peep is inexpensive and gets the benefit in most + // cases because in majority of cases op1, op2 and cmp would be in that order in + // execution. In general we should be able to check that all the nodes that come + // after op1 do not modify the flags so that it is safe to avoid generating a + // test instruction. + + if (op2->IsIntegralConst(0) && (op1->gtNext == op2) && (op2->gtNext == cmp) && +#ifdef _TARGET_XARCH_ + op1->OperIs(GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_NEG)) +#else // _TARGET_ARM64_ + op1->OperIs(GT_AND, GT_ADD, GT_SUB)) +#endif + { + op1->gtFlags |= GTF_SET_FLAGS; + op1->SetUnusedValue(); + + BlockRange().Remove(op2); + + GenTree* next = cmp->gtNext; + GenTree* cc; + genTreeOps ccOp; + LIR::Use cmpUse; + + // Fast check for the common case - relop used by a JTRUE that immediately follows it. + if ((next != nullptr) && next->OperIs(GT_JTRUE) && (next->gtGetOp1() == cmp)) + { + cc = next; + ccOp = GT_JCC; + next = nullptr; + BlockRange().Remove(cmp); + } + else if (BlockRange().TryGetUse(cmp, &cmpUse) && cmpUse.User()->OperIs(GT_JTRUE)) + { + cc = cmpUse.User(); + ccOp = GT_JCC; + next = nullptr; + BlockRange().Remove(cmp); + } + else // The relop is not used by a JTRUE or it is not used at all. + { + // Transform the relop node it into a SETCC. If it's not used we could remove + // it completely but that means doing more work to handle a rare case. + cc = cmp; + ccOp = GT_SETCC; + } + + genTreeOps condition = cmp->OperGet(); + cc->ChangeOper(ccOp); + cc->AsCC()->gtCondition = condition; + cc->gtFlags |= GTF_USE_FLAGS | (cmp->gtFlags & GTF_UNSIGNED); + + return next; } } +#endif // defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_) +#ifdef _TARGET_XARCH_ if (cmp->gtGetOp1()->TypeGet() == cmp->gtGetOp2()->TypeGet()) { if (varTypeIsSmall(cmp->gtGetOp1()->TypeGet()) && varTypeIsUnsigned(cmp->gtGetOp1()->TypeGet())) @@ -2635,6 +2695,7 @@ void Lowering::LowerCompare(GenTree* cmp) } #endif // _TARGET_XARCH_ ContainCheckCompare(cmp->AsOp()); + return cmp->gtNext; } // Lower "jmp <method>" tail call to insert PInvoke method epilog if required. @@ -5377,16 +5438,6 @@ void Lowering::ContainCheckNode(GenTree* node) ContainCheckBinary(node->AsOp()); break; -#ifdef _TARGET_XARCH_ - case GT_NEG: - // Codegen of this tree node sets ZF and SF flags. - if (!varTypeIsFloating(node)) - { - node->gtFlags |= GTF_ZSF_SET; - } - break; -#endif // _TARGET_XARCH_ - #if defined(_TARGET_X86_) case GT_MUL_LONG: #endif diff --git a/src/jit/lower.h b/src/jit/lower.h index ce6bb94831..2f8219eb9d 100644 --- a/src/jit/lower.h +++ b/src/jit/lower.h @@ -137,7 +137,7 @@ private: // Call Lowering // ------------------------------ void LowerCall(GenTree* call); - void LowerCompare(GenTree* tree); + GenTree* LowerCompare(GenTree* tree); void LowerJmpMethod(GenTree* jmp); void LowerRet(GenTree* ret); GenTree* LowerDelegateInvoke(GenTreeCall* call); diff --git a/src/jit/lowerarmarch.cpp b/src/jit/lowerarmarch.cpp index 8986f2f23e..22573407b4 100644 --- a/src/jit/lowerarmarch.cpp +++ b/src/jit/lowerarmarch.cpp @@ -700,29 +700,7 @@ void Lowering::ContainCheckCompare(GenTreeOp* cmp) GenTreePtr op1 = cmp->gtOp.gtOp1; GenTreePtr op2 = cmp->gtOp.gtOp2; - // If op1 codegen can set flags op2 is an immediate 0 - // we don't need to generate cmp instruction, - // provided we don't have another GenTree node between op1 - // and cmp that could potentially modify flags. - // - // TODO-CQ: right now the below peep is inexpensive and - // gets the benefit in most of cases because in majority - // of cases op1, op2 and cmp would be in that order in - // execution. In general we should be able to check that all - // the nodes that come after op1 in execution order do not - // modify the flags so that it is safe to avoid generating a - // test instruction. Such a check requires that on each - // GenTree node we need to set the info whether its codegen - // will modify flags. - if (op2->IsIntegralConst(0) && (op1->gtNext == op2) && (op2->gtNext == cmp) && - !cmp->OperIs(GT_TEST_EQ, GT_TEST_NE) && op1->OperIs(GT_ADD, GT_AND, GT_SUB)) - { - assert(!op1->gtSetFlags()); - op1->gtFlags |= GTF_SET_FLAGS; - cmp->gtFlags |= GTF_USE_FLAGS; - } - - if (!varTypeIsFloating(cmp) && op2->IsCnsIntOrI() && ((cmp->gtFlags & GTF_USE_FLAGS) == 0)) + if (!varTypeIsFloating(cmp) && op2->IsCnsIntOrI()) { LIR::Use cmpUse; bool useJCMP = false; diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index 834cd34fda..415c10619b 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -1870,41 +1870,6 @@ void Lowering::ContainCheckCompare(GenTreeOp* cmp) { MakeSrcContained(cmp, op1); } - // If op1 codegen sets ZF and SF flags and ==/!= against - // zero, we don't need to generate test instruction, - // provided we don't have another GenTree node between op1 - // and cmp that could potentially modify flags. - // - // TODO-CQ: right now the below peep is inexpensive and - // gets the benefit in most of cases because in majority - // of cases op1, op2 and cmp would be in that order in - // execution. In general we should be able to check that all - // the nodes that come after op1 in execution order do not - // modify the flags so that it is safe to avoid generating a - // test instruction. Such a check requires that on each - // GenTree node we need to set the info whether its codegen - // will modify flags. - // - // TODO-CQ: We can optimize compare against zero in the - // following cases by generating the branch as indicated - // against each case. - // 1) unsigned compare - // < 0 - always FALSE - // <= 0 - ZF=1 and jne - // > 0 - ZF=0 and je - // >= 0 - always TRUE - // - // 2) signed compare - // < 0 - SF=1 and js - // >= 0 - SF=0 and jns - else if (cmp->OperIs(GT_EQ, GT_NE) && op1->gtSetZSFlags() && op2->IsIntegralConst(0) && - (op1->gtNext == op2) && (op2->gtNext == cmp)) - { - // Require codegen of op1 to set the flags. - assert(!op1->gtSetFlags()); - op1->gtFlags |= GTF_SET_FLAGS; - cmp->gtFlags |= GTF_USE_FLAGS; - } else { SetRegOptional(op1); @@ -2077,9 +2042,6 @@ void Lowering::ContainCheckBinary(GenTreeOp* node) return; } - // Codegen of these tree nodes sets ZF and SF flags. - node->gtFlags |= GTF_ZSF_SET; - // We're not marking a constant hanging on the left of an add // as containable so we assign it to a register having CQ impact. // TODO-XArch-CQ: Detect this case and support both generating a single instruction |