summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Danes <onemihaid@hotmail.com>2018-06-04 00:50:17 +0300
committerMike Danes <onemihaid@hotmail.com>2018-06-04 21:37:48 +0300
commit9adb98520dbfd0e7790e232bd042799557b2c719 (patch)
tree22efa5f4f1861a57eae645025c8701d5908784cb /src
parent183113ec7a16d7bb000e880e5e1ce690aedc7715 (diff)
downloadcoreclr-9adb98520dbfd0e7790e232bd042799557b2c719.tar.gz
coreclr-9adb98520dbfd0e7790e232bd042799557b2c719.tar.bz2
coreclr-9adb98520dbfd0e7790e232bd042799557b2c719.zip
Cleanup LOCKADD handling
LOCKADD nodes are generated rather early and there's no reason for that: * The CORINFO_INTRINSIC_InterlockedAdd32/64 intrinsics are not actually used. Even if they would be used we can still import them as XADD nodes and rely on lowering to generate LOCKADD when needed. * gtExtractSideEffList transforms XADD into LOCKADD but this can be done in lowering. LOCKADD is an XARCH specific optimization after all. Additionally: * Avoid the need for special handling in LSRA by making GT_LOCKADD a "no value" oper. * Split LOCKADD codegen from XADD/XCHG codegen, attempting to use the same code for all 3 just makes things more complex. * The address is always in a register so there's no real need to create an indir node on the fly, the relevant emitter functions can be called directly. The last point above is actually a CQ issue - we always generate `add [reg], imm`, more complex address modes are not used. Unfortunately this problem starts early, when the importer spills the address to a local variable. If that ever gets fixed then we'll could probably generate a contained LEA in lowering.
Diffstat (limited to 'src')
-rw-r--r--src/jit/codegen.h3
-rw-r--r--src/jit/codegenarm64.cpp11
-rw-r--r--src/jit/codegenarmarch.cpp1
-rw-r--r--src/jit/codegenxarch.cpp109
-rw-r--r--src/jit/gentree.cpp9
-rw-r--r--src/jit/gentree.h2
-rw-r--r--src/jit/gtlist.h2
-rw-r--r--src/jit/importer.cpp8
-rw-r--r--src/jit/lower.cpp18
-rw-r--r--src/jit/lsraxarch.cpp29
-rw-r--r--src/jit/optimizer.cpp1
-rw-r--r--src/jit/rationalize.cpp4
-rw-r--r--src/jit/valuenum.cpp1
13 files changed, 103 insertions, 95 deletions
diff --git a/src/jit/codegen.h b/src/jit/codegen.h
index d6cd23a4c9..450a372c54 100644
--- a/src/jit/codegen.h
+++ b/src/jit/codegen.h
@@ -121,6 +121,9 @@ private:
void genRangeCheck(GenTree* node);
void genLockedInstructions(GenTreeOp* node);
+#ifdef _TARGET_XARCH_
+ void genCodeForLockAdd(GenTreeOp* node);
+#endif
//-------------------------------------------------------------------------
// Register-related methods
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp
index eaaf5ba1a8..0cc38f7304 100644
--- a/src/jit/codegenarm64.cpp
+++ b/src/jit/codegenarm64.cpp
@@ -2650,8 +2650,12 @@ void CodeGen::genJumpTable(GenTree* treeNode)
genProduceReg(treeNode);
}
-// generate code for the locked operations:
-// GT_LOCKADD, GT_XCHG, GT_XADD
+//------------------------------------------------------------------------
+// genLockedInstructions: Generate code for a GT_XADD or GT_XCHG node.
+//
+// Arguments:
+// treeNode - the GT_XADD/XCHG node
+//
void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
{
GenTree* data = treeNode->gtOp.gtOp2;
@@ -2701,7 +2705,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
// Emit code like this:
// retry:
// ldxr loadReg, [addrReg]
- // add storeDataReg, loadReg, dataReg # Only for GT_XADD & GT_LOCKADD
+ // add storeDataReg, loadReg, dataReg # Only for GT_XADD
// # GT_XCHG storeDataReg === dataReg
// stxr exResult, storeDataReg, [addrReg]
// cbnz exResult, retry
@@ -2718,7 +2722,6 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
switch (treeNode->OperGet())
{
case GT_XADD:
- case GT_LOCKADD:
if (data->isContainedIntOrIImmed())
{
// Even though INS_add is specified here, the encoder will choose either
diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp
index 5a2b55525d..e7209a8b93 100644
--- a/src/jit/codegenarmarch.cpp
+++ b/src/jit/codegenarmarch.cpp
@@ -340,7 +340,6 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
break;
#ifdef _TARGET_ARM64_
- case GT_LOCKADD:
case GT_XCHG:
case GT_XADD:
genLockedInstructions(treeNode->AsOp());
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index a98c9e3727..84999e5649 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -1800,6 +1800,9 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
break;
case GT_LOCKADD:
+ genCodeForLockAdd(treeNode->AsOp());
+ break;
+
case GT_XCHG:
case GT_XADD:
genLockedInstructions(treeNode->AsOp());
@@ -3513,64 +3516,78 @@ void CodeGen::genJumpTable(GenTree* treeNode)
genProduceReg(treeNode);
}
-// generate code for the locked operations:
-// GT_LOCKADD, GT_XCHG, GT_XADD
-void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
+//------------------------------------------------------------------------
+// genCodeForLockAdd: Generate code for a GT_LOCKADD node
+//
+// Arguments:
+// node - the GT_LOCKADD node
+//
+void CodeGen::genCodeForLockAdd(GenTreeOp* node)
{
- GenTree* data = treeNode->gtOp.gtOp2;
- GenTree* addr = treeNode->gtOp.gtOp1;
- regNumber targetReg = treeNode->gtRegNum;
- regNumber dataReg = data->gtRegNum;
- regNumber addrReg = addr->gtRegNum;
- var_types type = genActualType(data->TypeGet());
- instruction ins;
+ assert(node->OperIs(GT_LOCKADD));
- // The register allocator should have extended the lifetime of the address
- // so that it is not used as the target.
- noway_assert(addrReg != targetReg);
+ GenTree* addr = node->gtGetOp1();
+ GenTree* data = node->gtGetOp2();
+ emitAttr size = emitTypeSize(data->TypeGet());
- // If data is a lclVar that's not a last use, we'd better have allocated a register
- // for the result (except in the case of GT_LOCKADD which does not produce a register result).
- assert(targetReg != REG_NA || treeNode->OperGet() == GT_LOCKADD || !genIsRegCandidateLocal(data) ||
- (data->gtFlags & GTF_VAR_DEATH) != 0);
+ assert(addr->isUsedFromReg());
+ assert(data->isUsedFromReg() || data->isContainedIntOrIImmed());
+ assert((size == EA_4BYTE) || (size == EA_PTRSIZE));
- genConsumeOperands(treeNode);
- if (targetReg != REG_NA && dataReg != REG_NA && dataReg != targetReg)
- {
- inst_RV_RV(ins_Copy(type), targetReg, dataReg);
- data->gtRegNum = targetReg;
+ genConsumeOperands(node);
+ instGen(INS_lock);
- // TODO-XArch-Cleanup: Consider whether it is worth it, for debugging purposes, to restore the
- // original gtRegNum on data, after calling emitInsBinary below.
+ if (data->isContainedIntOrIImmed())
+ {
+ int imm = static_cast<int>(data->AsIntCon()->IconValue());
+ assert(imm == data->AsIntCon()->IconValue());
+ getEmitter()->emitIns_I_AR(INS_add, size, imm, addr->gtRegNum, 0);
}
- switch (treeNode->OperGet())
+ else
{
- case GT_LOCKADD:
- instGen(INS_lock);
- ins = INS_add;
- break;
- case GT_XCHG:
- // lock is implied by xchg
- ins = INS_xchg;
- break;
- case GT_XADD:
- instGen(INS_lock);
- ins = INS_xadd;
- break;
- default:
- unreached();
+ getEmitter()->emitIns_AR_R(INS_add, size, data->gtRegNum, addr->gtRegNum, 0);
}
+}
- // all of these nodes implicitly do an indirection on op1
- // so create a temporary node to feed into the pattern matching
- GenTreeIndir i = indirForm(type, addr);
- i.SetContained();
- getEmitter()->emitInsBinary(ins, emitTypeSize(type), &i, data);
+//------------------------------------------------------------------------
+// genLockedInstructions: Generate code for a GT_XADD or GT_XCHG node.
+//
+// Arguments:
+// node - the GT_XADD/XCHG node
+//
+void CodeGen::genLockedInstructions(GenTreeOp* node)
+{
+ assert(node->OperIs(GT_XADD, GT_XCHG));
- if (treeNode->gtRegNum != REG_NA)
+ GenTree* addr = node->gtGetOp1();
+ GenTree* data = node->gtGetOp2();
+ emitAttr size = emitTypeSize(node->TypeGet());
+
+ assert(addr->isUsedFromReg());
+ assert(data->isUsedFromReg());
+ assert((size == EA_4BYTE) || (size == EA_PTRSIZE));
+
+ genConsumeOperands(node);
+
+ if (node->gtRegNum != data->gtRegNum)
{
- genProduceReg(treeNode);
+ // If the destination register is different from the data register then we need
+ // to first move the data to the target register. Make sure we don't overwrite
+ // the address, the register allocator should have taken care of this.
+ assert(node->gtRegNum != addr->gtRegNum);
+ getEmitter()->emitIns_R_R(INS_mov, size, node->gtRegNum, data->gtRegNum);
+ }
+
+ instruction ins = node->OperIs(GT_XADD) ? INS_xadd : INS_xchg;
+
+ // XCHG has an implied lock prefix when the first operand is a memory operand.
+ if (ins != INS_xchg)
+ {
+ instGen(INS_lock);
}
+
+ getEmitter()->emitIns_AR_R(ins, size, node->gtRegNum, addr->gtRegNum, 0);
+ genProduceReg(node);
}
//------------------------------------------------------------------------
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
index 5c0ab71196..f124a723f9 100644
--- a/src/jit/gentree.cpp
+++ b/src/jit/gentree.cpp
@@ -15165,15 +15165,6 @@ void Compiler::gtExtractSideEffList(GenTree* expr,
if (oper == GT_LOCKADD || oper == GT_XADD || oper == GT_XCHG || oper == GT_CMPXCHG)
{
- // XADD both adds to the memory location and also fetches the old value. If we only need the side
- // effect of this instruction, change it into a GT_LOCKADD node (the add only)
- if (oper == GT_XADD)
- {
- expr->SetOperRaw(GT_LOCKADD);
- assert(genActualType(expr->gtType) == genActualType(expr->gtOp.gtOp2->gtType));
- expr->gtType = TYP_VOID;
- }
-
// These operations are kind of important to keep
*pList = gtBuildCommaList(*pList, expr);
return;
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
index 5723b060f1..905022ca65 100644
--- a/src/jit/gentree.h
+++ b/src/jit/gentree.h
@@ -980,7 +980,7 @@ public:
if (gtType == TYP_VOID)
{
// These are the only operators which can produce either VOID or non-VOID results.
- assert(OperIs(GT_NOP, GT_CALL, GT_LOCKADD, GT_FIELD_LIST, GT_COMMA) || OperIsCompare() || OperIsLong() ||
+ assert(OperIs(GT_NOP, GT_CALL, GT_FIELD_LIST, GT_COMMA) || OperIsCompare() || OperIsLong() ||
OperIsSIMD() || OperIsHWIntrinsic());
return false;
}
diff --git a/src/jit/gtlist.h b/src/jit/gtlist.h
index dcfe3ba09c..bbd9bcbcd1 100644
--- a/src/jit/gtlist.h
+++ b/src/jit/gtlist.h
@@ -53,7 +53,7 @@ GTNODE(RELOAD , GenTreeCopyOrReload,0,GTK_UNOP)
GTNODE(ARR_LENGTH , GenTreeArrLen ,0,GTK_UNOP|GTK_EXOP) // array-length
GTNODE(INTRINSIC , GenTreeIntrinsic ,0,GTK_BINOP|GTK_EXOP) // intrinsics
-GTNODE(LOCKADD , GenTreeOp ,0,GTK_BINOP)
+GTNODE(LOCKADD , GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE)
GTNODE(XADD , GenTreeOp ,0,GTK_BINOP)
GTNODE(XCHG , GenTreeOp ,0,GTK_BINOP)
GTNODE(CMPXCHG , GenTreeCmpXchg ,0,GTK_SPECIAL)
diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp
index 3e705a8ae1..13320a07fb 100644
--- a/src/jit/importer.cpp
+++ b/src/jit/importer.cpp
@@ -3451,9 +3451,11 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,
#if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_)
// TODO-ARM-CQ: reenable treating Interlocked operation as intrinsic
+
+ // Note that CORINFO_INTRINSIC_InterlockedAdd32/64 are not actually used.
+ // Anyway, we can import them as XADD and leave it to lowering/codegen to perform
+ // whatever optimizations may arise from the fact that result value is not used.
case CORINFO_INTRINSIC_InterlockedAdd32:
- interlockedOperator = GT_LOCKADD;
- goto InterlockedBinOpCommon;
case CORINFO_INTRINSIC_InterlockedXAdd32:
interlockedOperator = GT_XADD;
goto InterlockedBinOpCommon;
@@ -3463,8 +3465,6 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,
#ifdef _TARGET_64BIT_
case CORINFO_INTRINSIC_InterlockedAdd64:
- interlockedOperator = GT_LOCKADD;
- goto InterlockedBinOpCommon;
case CORINFO_INTRINSIC_InterlockedXAdd64:
interlockedOperator = GT_XADD;
goto InterlockedBinOpCommon;
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index 23015e0256..aa3e2f03d9 100644
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -305,16 +305,28 @@ GenTree* Lowering::LowerNode(GenTree* node)
break;
}
-#ifdef _TARGET_ARM64_
+#if defined(_TARGET_ARM64_)
case GT_CMPXCHG:
CheckImmedAndMakeContained(node, node->AsCmpXchg()->gtOpComparand);
break;
case GT_XADD:
-#endif
- case GT_LOCKADD:
CheckImmedAndMakeContained(node, node->gtOp.gtOp2);
break;
+#elif defined(_TARGET_XARCH_)
+ case GT_XADD:
+ if (node->IsUnusedValue())
+ {
+ node->ClearUnusedValue();
+ // Make sure the types are identical, since the node type is changed to VOID
+ // CodeGen relies on op2's type to determine the instruction size.
+ assert(node->gtGetOp2()->TypeGet() == node->TypeGet());
+ node->SetOper(GT_LOCKADD);
+ node->gtType = TYP_VOID;
+ CheckImmedAndMakeContained(node, node->gtGetOp2());
+ }
+ break;
+#endif
default:
break;
diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp
index 96af2f6d8a..943c4d3edc 100644
--- a/src/jit/lsraxarch.cpp
+++ b/src/jit/lsraxarch.cpp
@@ -450,7 +450,6 @@ int LinearScan::BuildNode(GenTree* tree)
case GT_XADD:
case GT_XCHG:
- case GT_LOCKADD:
{
// TODO-XArch-Cleanup: We should make the indirection explicit on these nodes so that we don't have
// to special case them.
@@ -462,29 +461,10 @@ int LinearScan::BuildNode(GenTree* tree)
RefPosition* addrUse = BuildUse(addr);
setDelayFree(addrUse);
tgtPrefUse = addrUse;
- srcCount = 1;
- dstCount = 1;
- if (!data->isContained())
- {
- RefPosition* dataUse = dataUse = BuildUse(data);
- srcCount = 2;
- }
-
- if (tree->TypeGet() == TYP_VOID)
- {
- // Right now a GT_XADD node could be morphed into a
- // GT_LOCKADD of TYP_VOID. See gtExtractSideEffList().
- // Note that it is advantageous to use GT_LOCKADD
- // instead of of GT_XADD as the former uses lock.add,
- // which allows its second operand to be a contained
- // immediate wheres xadd instruction requires its
- // second operand to be in a register.
- // Give it an artificial type and mark it as an unused value.
- // This results in a Def position created but not considered consumed by its parent node.
- tree->gtType = TYP_INT;
- isLocalDefUse = true;
- tree->SetUnusedValue();
- }
+ assert(!data->isContained());
+ BuildUse(data);
+ srcCount = 2;
+ assert(dstCount == 1);
BuildDef(tree);
}
break;
@@ -771,6 +751,7 @@ bool LinearScan::isRMWRegOper(GenTree* tree)
case GT_STORE_BLK:
case GT_STORE_OBJ:
case GT_SWITCH_TABLE:
+ case GT_LOCKADD:
#ifdef _TARGET_X86_
case GT_LONG:
#endif
diff --git a/src/jit/optimizer.cpp b/src/jit/optimizer.cpp
index 11e956b17d..5dbc404cf5 100644
--- a/src/jit/optimizer.cpp
+++ b/src/jit/optimizer.cpp
@@ -7719,6 +7719,7 @@ void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
case GT_XCHG: // Binop
case GT_CMPXCHG: // Specialop
{
+ assert(!tree->OperIs(GT_LOCKADD) && "LOCKADD should not appear before lowering");
memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
}
break;
diff --git a/src/jit/rationalize.cpp b/src/jit/rationalize.cpp
index a08d042ed9..ad99a27ddf 100644
--- a/src/jit/rationalize.cpp
+++ b/src/jit/rationalize.cpp
@@ -921,8 +921,8 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStack<G
#endif // FEATURE_SIMD
default:
- // JCMP, CMP, SETCC and JCC nodes should not be present in HIR.
- assert(!node->OperIs(GT_CMP, GT_SETCC, GT_JCC, GT_JCMP));
+ // These nodes should not be present in HIR.
+ assert(!node->OperIs(GT_CMP, GT_SETCC, GT_JCC, GT_JCMP, GT_LOCKADD));
break;
}
diff --git a/src/jit/valuenum.cpp b/src/jit/valuenum.cpp
index 306c861a79..6bf8429abe 100644
--- a/src/jit/valuenum.cpp
+++ b/src/jit/valuenum.cpp
@@ -7052,6 +7052,7 @@ void Compiler::fgValueNumberTree(GenTree* tree, bool evalAsgLhsInd)
case GT_LOCKADD: // Binop
case GT_XADD: // Binop
case GT_XCHG: // Binop
+ assert(!tree->OperIs(GT_LOCKADD) && "LOCKADD should not appear before lowering");
// For CMPXCHG and other intrinsics add an arbitrary side effect on GcHeap/ByrefExposed.
fgMutateGcHeap(tree DEBUGARG("Interlocked intrinsic"));
tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));