From 9adb98520dbfd0e7790e232bd042799557b2c719 Mon Sep 17 00:00:00 2001 From: Mike Danes Date: Mon, 4 Jun 2018 00:50:17 +0300 Subject: Cleanup LOCKADD handling LOCKADD nodes are generated rather early and there's no reason for that: * The CORINFO_INTRINSIC_InterlockedAdd32/64 intrinsics are not actually used. Even if they would be used we can still import them as XADD nodes and rely on lowering to generate LOCKADD when needed. * gtExtractSideEffList transforms XADD into LOCKADD but this can be done in lowering. LOCKADD is an XARCH specific optimization after all. Additionally: * Avoid the need for special handling in LSRA by making GT_LOCKADD a "no value" oper. * Split LOCKADD codegen from XADD/XCHG codegen, attempting to use the same code for all 3 just makes things more complex. * The address is always in a register so there's no real need to create an indir node on the fly, the relevant emitter functions can be called directly. The last point above is actually a CQ issue - we always generate `add [reg], imm`, more complex address modes are not used. Unfortunately this problem starts early, when the importer spills the address to a local variable. If that ever gets fixed then we'll could probably generate a contained LEA in lowering. --- src/jit/codegen.h | 3 ++ src/jit/codegenarm64.cpp | 11 +++-- src/jit/codegenarmarch.cpp | 1 - src/jit/codegenxarch.cpp | 109 ++++++++++++++++++++++++++------------------- src/jit/gentree.cpp | 9 ---- src/jit/gentree.h | 2 +- src/jit/gtlist.h | 2 +- src/jit/importer.cpp | 8 ++-- src/jit/lower.cpp | 18 ++++++-- src/jit/lsraxarch.cpp | 29 +++--------- src/jit/optimizer.cpp | 1 + src/jit/rationalize.cpp | 4 +- src/jit/valuenum.cpp | 1 + 13 files changed, 103 insertions(+), 95 deletions(-) (limited to 'src') diff --git a/src/jit/codegen.h b/src/jit/codegen.h index d6cd23a4c9..450a372c54 100644 --- a/src/jit/codegen.h +++ b/src/jit/codegen.h @@ -121,6 +121,9 @@ private: void genRangeCheck(GenTree* node); void genLockedInstructions(GenTreeOp* node); +#ifdef _TARGET_XARCH_ + void genCodeForLockAdd(GenTreeOp* node); +#endif //------------------------------------------------------------------------- // Register-related methods diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index eaaf5ba1a8..0cc38f7304 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -2650,8 +2650,12 @@ void CodeGen::genJumpTable(GenTree* treeNode) genProduceReg(treeNode); } -// generate code for the locked operations: -// GT_LOCKADD, GT_XCHG, GT_XADD +//------------------------------------------------------------------------ +// genLockedInstructions: Generate code for a GT_XADD or GT_XCHG node. +// +// Arguments: +// treeNode - the GT_XADD/XCHG node +// void CodeGen::genLockedInstructions(GenTreeOp* treeNode) { GenTree* data = treeNode->gtOp.gtOp2; @@ -2701,7 +2705,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) // Emit code like this: // retry: // ldxr loadReg, [addrReg] - // add storeDataReg, loadReg, dataReg # Only for GT_XADD & GT_LOCKADD + // add storeDataReg, loadReg, dataReg # Only for GT_XADD // # GT_XCHG storeDataReg === dataReg // stxr exResult, storeDataReg, [addrReg] // cbnz exResult, retry @@ -2718,7 +2722,6 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) switch (treeNode->OperGet()) { case GT_XADD: - case GT_LOCKADD: if (data->isContainedIntOrIImmed()) { // Even though INS_add is specified here, the encoder will choose either diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp index 5a2b55525d..e7209a8b93 100644 --- a/src/jit/codegenarmarch.cpp +++ b/src/jit/codegenarmarch.cpp @@ -340,7 +340,6 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) break; #ifdef _TARGET_ARM64_ - case GT_LOCKADD: case GT_XCHG: case GT_XADD: genLockedInstructions(treeNode->AsOp()); diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index a98c9e3727..84999e5649 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -1800,6 +1800,9 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) break; case GT_LOCKADD: + genCodeForLockAdd(treeNode->AsOp()); + break; + case GT_XCHG: case GT_XADD: genLockedInstructions(treeNode->AsOp()); @@ -3513,64 +3516,78 @@ void CodeGen::genJumpTable(GenTree* treeNode) genProduceReg(treeNode); } -// generate code for the locked operations: -// GT_LOCKADD, GT_XCHG, GT_XADD -void CodeGen::genLockedInstructions(GenTreeOp* treeNode) +//------------------------------------------------------------------------ +// genCodeForLockAdd: Generate code for a GT_LOCKADD node +// +// Arguments: +// node - the GT_LOCKADD node +// +void CodeGen::genCodeForLockAdd(GenTreeOp* node) { - GenTree* data = treeNode->gtOp.gtOp2; - GenTree* addr = treeNode->gtOp.gtOp1; - regNumber targetReg = treeNode->gtRegNum; - regNumber dataReg = data->gtRegNum; - regNumber addrReg = addr->gtRegNum; - var_types type = genActualType(data->TypeGet()); - instruction ins; + assert(node->OperIs(GT_LOCKADD)); - // The register allocator should have extended the lifetime of the address - // so that it is not used as the target. - noway_assert(addrReg != targetReg); + GenTree* addr = node->gtGetOp1(); + GenTree* data = node->gtGetOp2(); + emitAttr size = emitTypeSize(data->TypeGet()); - // If data is a lclVar that's not a last use, we'd better have allocated a register - // for the result (except in the case of GT_LOCKADD which does not produce a register result). - assert(targetReg != REG_NA || treeNode->OperGet() == GT_LOCKADD || !genIsRegCandidateLocal(data) || - (data->gtFlags & GTF_VAR_DEATH) != 0); + assert(addr->isUsedFromReg()); + assert(data->isUsedFromReg() || data->isContainedIntOrIImmed()); + assert((size == EA_4BYTE) || (size == EA_PTRSIZE)); - genConsumeOperands(treeNode); - if (targetReg != REG_NA && dataReg != REG_NA && dataReg != targetReg) - { - inst_RV_RV(ins_Copy(type), targetReg, dataReg); - data->gtRegNum = targetReg; + genConsumeOperands(node); + instGen(INS_lock); - // TODO-XArch-Cleanup: Consider whether it is worth it, for debugging purposes, to restore the - // original gtRegNum on data, after calling emitInsBinary below. + if (data->isContainedIntOrIImmed()) + { + int imm = static_cast(data->AsIntCon()->IconValue()); + assert(imm == data->AsIntCon()->IconValue()); + getEmitter()->emitIns_I_AR(INS_add, size, imm, addr->gtRegNum, 0); } - switch (treeNode->OperGet()) + else { - case GT_LOCKADD: - instGen(INS_lock); - ins = INS_add; - break; - case GT_XCHG: - // lock is implied by xchg - ins = INS_xchg; - break; - case GT_XADD: - instGen(INS_lock); - ins = INS_xadd; - break; - default: - unreached(); + getEmitter()->emitIns_AR_R(INS_add, size, data->gtRegNum, addr->gtRegNum, 0); } +} - // all of these nodes implicitly do an indirection on op1 - // so create a temporary node to feed into the pattern matching - GenTreeIndir i = indirForm(type, addr); - i.SetContained(); - getEmitter()->emitInsBinary(ins, emitTypeSize(type), &i, data); +//------------------------------------------------------------------------ +// genLockedInstructions: Generate code for a GT_XADD or GT_XCHG node. +// +// Arguments: +// node - the GT_XADD/XCHG node +// +void CodeGen::genLockedInstructions(GenTreeOp* node) +{ + assert(node->OperIs(GT_XADD, GT_XCHG)); - if (treeNode->gtRegNum != REG_NA) + GenTree* addr = node->gtGetOp1(); + GenTree* data = node->gtGetOp2(); + emitAttr size = emitTypeSize(node->TypeGet()); + + assert(addr->isUsedFromReg()); + assert(data->isUsedFromReg()); + assert((size == EA_4BYTE) || (size == EA_PTRSIZE)); + + genConsumeOperands(node); + + if (node->gtRegNum != data->gtRegNum) { - genProduceReg(treeNode); + // If the destination register is different from the data register then we need + // to first move the data to the target register. Make sure we don't overwrite + // the address, the register allocator should have taken care of this. + assert(node->gtRegNum != addr->gtRegNum); + getEmitter()->emitIns_R_R(INS_mov, size, node->gtRegNum, data->gtRegNum); + } + + instruction ins = node->OperIs(GT_XADD) ? INS_xadd : INS_xchg; + + // XCHG has an implied lock prefix when the first operand is a memory operand. + if (ins != INS_xchg) + { + instGen(INS_lock); } + + getEmitter()->emitIns_AR_R(ins, size, node->gtRegNum, addr->gtRegNum, 0); + genProduceReg(node); } //------------------------------------------------------------------------ diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 5c0ab71196..f124a723f9 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -15165,15 +15165,6 @@ void Compiler::gtExtractSideEffList(GenTree* expr, if (oper == GT_LOCKADD || oper == GT_XADD || oper == GT_XCHG || oper == GT_CMPXCHG) { - // XADD both adds to the memory location and also fetches the old value. If we only need the side - // effect of this instruction, change it into a GT_LOCKADD node (the add only) - if (oper == GT_XADD) - { - expr->SetOperRaw(GT_LOCKADD); - assert(genActualType(expr->gtType) == genActualType(expr->gtOp.gtOp2->gtType)); - expr->gtType = TYP_VOID; - } - // These operations are kind of important to keep *pList = gtBuildCommaList(*pList, expr); return; diff --git a/src/jit/gentree.h b/src/jit/gentree.h index 5723b060f1..905022ca65 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -980,7 +980,7 @@ public: if (gtType == TYP_VOID) { // These are the only operators which can produce either VOID or non-VOID results. - assert(OperIs(GT_NOP, GT_CALL, GT_LOCKADD, GT_FIELD_LIST, GT_COMMA) || OperIsCompare() || OperIsLong() || + assert(OperIs(GT_NOP, GT_CALL, GT_FIELD_LIST, GT_COMMA) || OperIsCompare() || OperIsLong() || OperIsSIMD() || OperIsHWIntrinsic()); return false; } diff --git a/src/jit/gtlist.h b/src/jit/gtlist.h index dcfe3ba09c..bbd9bcbcd1 100644 --- a/src/jit/gtlist.h +++ b/src/jit/gtlist.h @@ -53,7 +53,7 @@ GTNODE(RELOAD , GenTreeCopyOrReload,0,GTK_UNOP) GTNODE(ARR_LENGTH , GenTreeArrLen ,0,GTK_UNOP|GTK_EXOP) // array-length GTNODE(INTRINSIC , GenTreeIntrinsic ,0,GTK_BINOP|GTK_EXOP) // intrinsics -GTNODE(LOCKADD , GenTreeOp ,0,GTK_BINOP) +GTNODE(LOCKADD , GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE) GTNODE(XADD , GenTreeOp ,0,GTK_BINOP) GTNODE(XCHG , GenTreeOp ,0,GTK_BINOP) GTNODE(CMPXCHG , GenTreeCmpXchg ,0,GTK_SPECIAL) diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp index 3e705a8ae1..13320a07fb 100644 --- a/src/jit/importer.cpp +++ b/src/jit/importer.cpp @@ -3451,9 +3451,11 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, #if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_) // TODO-ARM-CQ: reenable treating Interlocked operation as intrinsic + + // Note that CORINFO_INTRINSIC_InterlockedAdd32/64 are not actually used. + // Anyway, we can import them as XADD and leave it to lowering/codegen to perform + // whatever optimizations may arise from the fact that result value is not used. case CORINFO_INTRINSIC_InterlockedAdd32: - interlockedOperator = GT_LOCKADD; - goto InterlockedBinOpCommon; case CORINFO_INTRINSIC_InterlockedXAdd32: interlockedOperator = GT_XADD; goto InterlockedBinOpCommon; @@ -3463,8 +3465,6 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, #ifdef _TARGET_64BIT_ case CORINFO_INTRINSIC_InterlockedAdd64: - interlockedOperator = GT_LOCKADD; - goto InterlockedBinOpCommon; case CORINFO_INTRINSIC_InterlockedXAdd64: interlockedOperator = GT_XADD; goto InterlockedBinOpCommon; diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp index 23015e0256..aa3e2f03d9 100644 --- a/src/jit/lower.cpp +++ b/src/jit/lower.cpp @@ -305,16 +305,28 @@ GenTree* Lowering::LowerNode(GenTree* node) break; } -#ifdef _TARGET_ARM64_ +#if defined(_TARGET_ARM64_) case GT_CMPXCHG: CheckImmedAndMakeContained(node, node->AsCmpXchg()->gtOpComparand); break; case GT_XADD: -#endif - case GT_LOCKADD: CheckImmedAndMakeContained(node, node->gtOp.gtOp2); break; +#elif defined(_TARGET_XARCH_) + case GT_XADD: + if (node->IsUnusedValue()) + { + node->ClearUnusedValue(); + // Make sure the types are identical, since the node type is changed to VOID + // CodeGen relies on op2's type to determine the instruction size. + assert(node->gtGetOp2()->TypeGet() == node->TypeGet()); + node->SetOper(GT_LOCKADD); + node->gtType = TYP_VOID; + CheckImmedAndMakeContained(node, node->gtGetOp2()); + } + break; +#endif default: break; diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp index 96af2f6d8a..943c4d3edc 100644 --- a/src/jit/lsraxarch.cpp +++ b/src/jit/lsraxarch.cpp @@ -450,7 +450,6 @@ int LinearScan::BuildNode(GenTree* tree) case GT_XADD: case GT_XCHG: - case GT_LOCKADD: { // TODO-XArch-Cleanup: We should make the indirection explicit on these nodes so that we don't have // to special case them. @@ -462,29 +461,10 @@ int LinearScan::BuildNode(GenTree* tree) RefPosition* addrUse = BuildUse(addr); setDelayFree(addrUse); tgtPrefUse = addrUse; - srcCount = 1; - dstCount = 1; - if (!data->isContained()) - { - RefPosition* dataUse = dataUse = BuildUse(data); - srcCount = 2; - } - - if (tree->TypeGet() == TYP_VOID) - { - // Right now a GT_XADD node could be morphed into a - // GT_LOCKADD of TYP_VOID. See gtExtractSideEffList(). - // Note that it is advantageous to use GT_LOCKADD - // instead of of GT_XADD as the former uses lock.add, - // which allows its second operand to be a contained - // immediate wheres xadd instruction requires its - // second operand to be in a register. - // Give it an artificial type and mark it as an unused value. - // This results in a Def position created but not considered consumed by its parent node. - tree->gtType = TYP_INT; - isLocalDefUse = true; - tree->SetUnusedValue(); - } + assert(!data->isContained()); + BuildUse(data); + srcCount = 2; + assert(dstCount == 1); BuildDef(tree); } break; @@ -771,6 +751,7 @@ bool LinearScan::isRMWRegOper(GenTree* tree) case GT_STORE_BLK: case GT_STORE_OBJ: case GT_SWITCH_TABLE: + case GT_LOCKADD: #ifdef _TARGET_X86_ case GT_LONG: #endif diff --git a/src/jit/optimizer.cpp b/src/jit/optimizer.cpp index 11e956b17d..5dbc404cf5 100644 --- a/src/jit/optimizer.cpp +++ b/src/jit/optimizer.cpp @@ -7719,6 +7719,7 @@ void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk) case GT_XCHG: // Binop case GT_CMPXCHG: // Specialop { + assert(!tree->OperIs(GT_LOCKADD) && "LOCKADD should not appear before lowering"); memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed); } break; diff --git a/src/jit/rationalize.cpp b/src/jit/rationalize.cpp index a08d042ed9..ad99a27ddf 100644 --- a/src/jit/rationalize.cpp +++ b/src/jit/rationalize.cpp @@ -921,8 +921,8 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStackOperIs(GT_CMP, GT_SETCC, GT_JCC, GT_JCMP)); + // These nodes should not be present in HIR. + assert(!node->OperIs(GT_CMP, GT_SETCC, GT_JCC, GT_JCMP, GT_LOCKADD)); break; } diff --git a/src/jit/valuenum.cpp b/src/jit/valuenum.cpp index 306c861a79..6bf8429abe 100644 --- a/src/jit/valuenum.cpp +++ b/src/jit/valuenum.cpp @@ -7052,6 +7052,7 @@ void Compiler::fgValueNumberTree(GenTree* tree, bool evalAsgLhsInd) case GT_LOCKADD: // Binop case GT_XADD: // Binop case GT_XCHG: // Binop + assert(!tree->OperIs(GT_LOCKADD) && "LOCKADD should not appear before lowering"); // For CMPXCHG and other intrinsics add an arbitrary side effect on GcHeap/ByrefExposed. fgMutateGcHeap(tree DEBUGARG("Interlocked intrinsic")); tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet())); -- cgit v1.2.3