summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorBrian Sullivan <briansul@microsoft.com>2017-10-06 10:59:39 -0700
committerGitHub <noreply@github.com>2017-10-06 10:59:39 -0700
commit77b5ee4f53eb6a098898afbba1c8d85cb9a632d8 (patch)
treee1054af07cdc382b1b36ce114b83ab215c930e9a /src
parent573796d3727e474e7075d5d4184faa1d36a502d1 (diff)
parentf6c7baa1a34a2542c7d9c4946a486f2cef78c49b (diff)
downloadcoreclr-77b5ee4f53eb6a098898afbba1c8d85cb9a632d8.tar.gz
coreclr-77b5ee4f53eb6a098898afbba1c8d85cb9a632d8.tar.bz2
coreclr-77b5ee4f53eb6a098898afbba1c8d85cb9a632d8.zip
Merge pull request #14329 from sdmaclea/PR-ARM64-ATOMIC-OPS
[Arm64] Implement GT_XADD, GT_XCHG, GT_CMPXCHG ...
Diffstat (limited to 'src')
-rw-r--r--src/jit/codegenarm64.cpp224
-rw-r--r--src/jit/codegenarmarch.cpp5
-rw-r--r--src/jit/gentree.cpp2
-rw-r--r--src/jit/importer.cpp12
-rw-r--r--src/jit/lower.cpp7
-rw-r--r--src/jit/lowerarmarch.cpp3
-rw-r--r--src/jit/lsraarm64.cpp42
7 files changed, 243 insertions, 52 deletions
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp
index 9c238f2c4f..ca0c39f6f3 100644
--- a/src/jit/codegenarm64.cpp
+++ b/src/jit/codegenarm64.cpp
@@ -2665,62 +2665,208 @@ void CodeGen::genJumpTable(GenTree* treeNode)
// GT_LOCKADD, GT_XCHG, GT_XADD
void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
{
-#if 0
- GenTree* data = treeNode->gtOp.gtOp2;
- GenTree* addr = treeNode->gtOp.gtOp1;
+ GenTree* data = treeNode->gtOp.gtOp2;
+ GenTree* addr = treeNode->gtOp.gtOp1;
regNumber targetReg = treeNode->gtRegNum;
regNumber dataReg = data->gtRegNum;
regNumber addrReg = addr->gtRegNum;
- instruction ins;
- // all of these nodes implicitly do an indirection on op1
- // so create a temporary node to feed into the pattern matching
- GenTreeIndir i = indirForm(data->TypeGet(), addr);
- genConsumeReg(addr);
+ regNumber exResultReg = treeNode->ExtractTempReg(RBM_ALLINT);
+ regNumber storeDataReg = (treeNode->OperGet() == GT_XCHG) ? dataReg : treeNode->ExtractTempReg(RBM_ALLINT);
+ regNumber loadReg = (targetReg != REG_NA) ? targetReg : storeDataReg;
- // The register allocator should have extended the lifetime of the address
- // so that it is not used as the target.
+ // Check allocator assumptions
+ //
+ // The register allocator should have extended the lifetimes of all input and internal registers so that
+ // none interfere with the target.
noway_assert(addrReg != targetReg);
- // If data is a lclVar that's not a last use, we'd better have allocated a register
- // for the result (except in the case of GT_LOCKADD which does not produce a register result).
- assert(targetReg != REG_NA || treeNode->OperGet() == GT_LOCKADD || !genIsRegCandidateLocal(data) || (data->gtFlags & GTF_VAR_DEATH) != 0);
+ noway_assert(addrReg != loadReg);
+ noway_assert(dataReg != loadReg);
- genConsumeIfReg(data);
- if (targetReg != REG_NA && dataReg != REG_NA && dataReg != targetReg)
- {
- inst_RV_RV(ins_Copy(data->TypeGet()), targetReg, dataReg);
- data->gtRegNum = targetReg;
+ noway_assert(addrReg != storeDataReg);
+ noway_assert((treeNode->OperGet() == GT_XCHG) || (addrReg != dataReg));
+
+ assert(addr->isUsedFromReg());
+ noway_assert(exResultReg != REG_NA);
+ noway_assert(exResultReg != targetReg);
+ noway_assert((targetReg != REG_NA) || (treeNode->OperGet() != GT_XCHG));
+
+ // Store exclusive unpredictable cases must be avoided
+ noway_assert(exResultReg != storeDataReg);
+ noway_assert(exResultReg != addrReg);
+
+ genConsumeAddress(addr);
+ genConsumeRegs(data);
+
+ // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input registers
+ // die at the first instruction generated by the node. This is not the case for these atomics as the input
+ // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until
+ // we are finished generating the code for this node.
+
+ gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet());
+
+ // TODO-ARM64-CQ Use ARMv8.1 atomics if available
+ // https://github.com/dotnet/coreclr/issues/11881
+
+ // Emit code like this:
+ // retry:
+ // ldxr loadReg, [addrReg]
+ // add storeDataReg, loadReg, dataReg # Only for GT_XADD & GT_LOCKADD
+ // # GT_XCHG storeDataReg === dataReg
+ // stxr exResult, storeDataReg, [addrReg]
+ // cbnz exResult, retry
+
+ BasicBlock* labelRetry = genCreateTempLabel();
+ genDefineTempLabel(labelRetry);
+
+ // The following instruction includes a acquire half barrier
+ // TODO-ARM64-CQ Evaluate whether this is necessary
+ // https://github.com/dotnet/coreclr/issues/14346
+ getEmitter()->emitIns_R_R(INS_ldaxr, emitTypeSize(treeNode), loadReg, addrReg);
- // TODO-ARM64-Cleanup: Consider whether it is worth it, for debugging purposes, to restore the
- // original gtRegNum on data, after calling emitInsBinary below.
- }
switch (treeNode->OperGet())
{
- case GT_LOCKADD:
- instGen(INS_lock);
- ins = INS_add;
- break;
- case GT_XCHG:
- // lock is implied by xchg
- ins = INS_xchg;
- break;
- case GT_XADD:
- instGen(INS_lock);
- ins = INS_xadd;
- break;
- default:
- unreached();
+ case GT_XADD:
+ case GT_LOCKADD:
+ if (data->isContainedIntOrIImmed())
+ {
+ // Even though INS_add is specified here, the encoder will choose either
+ // an INS_add or an INS_sub and encode the immediate as a positive value
+ genInstrWithConstant(INS_add, emitActualTypeSize(treeNode), storeDataReg, loadReg,
+ data->AsIntConCommon()->IconValue(), REG_NA);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_R_R(INS_add, emitActualTypeSize(treeNode), storeDataReg, loadReg, dataReg);
+ }
+ break;
+ case GT_XCHG:
+ assert(!data->isContained());
+ storeDataReg = dataReg;
+ break;
+ default:
+ unreached();
}
- getEmitter()->emitInsBinary(ins, emitActualTypeSize(data), &i, data);
+
+ // The following instruction includes a release half barrier
+ // TODO-ARM64-CQ Evaluate whether this is necessary
+ // https://github.com/dotnet/coreclr/issues/14346
+ getEmitter()->emitIns_R_R_R(INS_stlxr, emitTypeSize(treeNode), exResultReg, storeDataReg, addrReg);
+
+ getEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg);
+
+ gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask());
if (treeNode->gtRegNum != REG_NA)
{
genProduceReg(treeNode);
}
-#else // !0
- NYI("genLockedInstructions");
-#endif // !0
+}
+
+//------------------------------------------------------------------------
+// genCodeForSwap: Produce code for a GT_CMPXCHG node.
+//
+// Arguments:
+// tree - the GT_CMPXCHG node
+//
+void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode)
+{
+ assert(treeNode->OperIs(GT_CMPXCHG));
+
+ GenTreePtr addr = treeNode->gtOpLocation; // arg1
+ GenTreePtr data = treeNode->gtOpValue; // arg2
+ GenTreePtr comparand = treeNode->gtOpComparand; // arg3
+
+ regNumber targetReg = treeNode->gtRegNum;
+ regNumber dataReg = data->gtRegNum;
+ regNumber addrReg = addr->gtRegNum;
+ regNumber comparandReg = comparand->gtRegNum;
+ regNumber exResultReg = treeNode->ExtractTempReg(RBM_ALLINT);
+
+ // Check allocator assumptions
+ //
+ // The register allocator should have extended the lifetimes of all input and internal registers so that
+ // none interfere with the target.
+ noway_assert(addrReg != targetReg);
+ noway_assert(dataReg != targetReg);
+ noway_assert(comparandReg != targetReg);
+ noway_assert(addrReg != dataReg);
+ noway_assert(targetReg != REG_NA);
+ noway_assert(exResultReg != REG_NA);
+ noway_assert(exResultReg != targetReg);
+
+ assert(addr->isUsedFromReg());
+ assert(data->isUsedFromReg());
+ assert(!comparand->isUsedFromMemory());
+
+ // Store exclusive unpredictable cases must be avoided
+ noway_assert(exResultReg != dataReg);
+ noway_assert(exResultReg != addrReg);
+
+ genConsumeAddress(addr);
+ genConsumeRegs(data);
+ genConsumeRegs(comparand);
+
+ // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input registers
+ // die at the first instruction generated by the node. This is not the case for these atomics as the input
+ // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until
+ // we are finished generating the code for this node.
+
+ gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet());
+
+ // TODO-ARM64-CQ Use ARMv8.1 atomics if available
+ // https://github.com/dotnet/coreclr/issues/11881
+
+ // Emit code like this:
+ // retry:
+ // ldxr targetReg, [addrReg]
+ // cmp targetReg, comparandReg
+ // bne compareFail
+ // stxr exResult, dataReg, [addrReg]
+ // cbnz exResult, retry
+ // compareFail:
+
+ BasicBlock* labelRetry = genCreateTempLabel();
+ BasicBlock* labelCompareFail = genCreateTempLabel();
+ genDefineTempLabel(labelRetry);
+
+ // The following instruction includes a acquire half barrier
+ // TODO-ARM64-CQ Evaluate whether this is necessary
+ // https://github.com/dotnet/coreclr/issues/14346
+ getEmitter()->emitIns_R_R(INS_ldaxr, emitTypeSize(treeNode), targetReg, addrReg);
+
+ if (comparand->isContainedIntOrIImmed())
+ {
+ if (comparand->IsIntegralConst(0))
+ {
+ getEmitter()->emitIns_J_R(INS_cbnz, emitActualTypeSize(treeNode), labelCompareFail, targetReg);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_I(INS_cmp, emitActualTypeSize(treeNode), targetReg,
+ comparand->AsIntConCommon()->IconValue());
+ getEmitter()->emitIns_J(INS_bne, labelCompareFail);
+ }
+ }
+ else
+ {
+ getEmitter()->emitIns_R_R(INS_cmp, emitActualTypeSize(treeNode), targetReg, comparandReg);
+ getEmitter()->emitIns_J(INS_bne, labelCompareFail);
+ }
+
+ // The following instruction includes a release half barrier
+ // TODO-ARM64-CQ Evaluate whether this is necessary
+ // https://github.com/dotnet/coreclr/issues/14346
+ getEmitter()->emitIns_R_R_R(INS_stlxr, emitTypeSize(treeNode), exResultReg, dataReg, addrReg);
+
+ getEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg);
+
+ genDefineTempLabel(labelCompareFail);
+
+ gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask());
+
+ genProduceReg(treeNode);
}
instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp
index 22aed1056d..535af34682 100644
--- a/src/jit/codegenarmarch.cpp
+++ b/src/jit/codegenarmarch.cpp
@@ -343,7 +343,10 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
break;
case GT_CMPXCHG:
- NYI("GT_CMPXCHG");
+ NYI_ARM("GT_CMPXCHG");
+#ifdef _TARGET_ARM64_
+ genCodeForCmpXchg(treeNode->AsCmpXchg());
+#endif
break;
case GT_RELOAD:
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
index 30695d86c5..25ca810a85 100644
--- a/src/jit/gentree.cpp
+++ b/src/jit/gentree.cpp
@@ -15155,7 +15155,9 @@ void Compiler::gtExtractSideEffList(GenTreePtr expr,
if (oper == GT_XADD)
{
expr->SetOperRaw(GT_LOCKADD);
+#ifndef _TARGET_ARM64_
expr->gtType = TYP_VOID;
+#endif
}
// These operations are kind of important to keep
diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp
index 332abbf1c7..72f747a9ff 100644
--- a/src/jit/importer.cpp
+++ b/src/jit/importer.cpp
@@ -3374,7 +3374,7 @@ GenTreePtr Compiler::impIntrinsic(GenTreePtr newobjThis,
retNode = impMathIntrinsic(method, sig, callType, intrinsicID, tailCall);
break;
-#ifdef _TARGET_XARCH_
+#if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_)
// TODO-ARM-CQ: reenable treating Interlocked operation as intrinsic
case CORINFO_INTRINSIC_InterlockedAdd32:
interlockedOperator = GT_LOCKADD;
@@ -3386,7 +3386,7 @@ GenTreePtr Compiler::impIntrinsic(GenTreePtr newobjThis,
interlockedOperator = GT_XCHG;
goto InterlockedBinOpCommon;
-#ifdef _TARGET_AMD64_
+#ifdef _TARGET_64BIT_
case CORINFO_INTRINSIC_InterlockedAdd64:
interlockedOperator = GT_LOCKADD;
goto InterlockedBinOpCommon;
@@ -3419,7 +3419,7 @@ GenTreePtr Compiler::impIntrinsic(GenTreePtr newobjThis,
op1->gtFlags |= GTF_GLOB_REF | GTF_ASG;
retNode = op1;
break;
-#endif // _TARGET_XARCH_
+#endif // defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_)
case CORINFO_INTRINSIC_MemoryBarrier:
@@ -3430,10 +3430,10 @@ GenTreePtr Compiler::impIntrinsic(GenTreePtr newobjThis,
retNode = op1;
break;
-#ifdef _TARGET_XARCH_
+#if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_)
// TODO-ARM-CQ: reenable treating InterlockedCmpXchg32 operation as intrinsic
case CORINFO_INTRINSIC_InterlockedCmpXchg32:
-#ifdef _TARGET_AMD64_
+#ifdef _TARGET_64BIT_
case CORINFO_INTRINSIC_InterlockedCmpXchg64:
#endif
{
@@ -3451,7 +3451,7 @@ GenTreePtr Compiler::impIntrinsic(GenTreePtr newobjThis,
retNode = node;
break;
}
-#endif
+#endif // defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_)
case CORINFO_INTRINSIC_StringLength:
op1 = impPopStack().val;
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index 9ecce38c15..17202effd6 100644
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -298,6 +298,13 @@ GenTree* Lowering::LowerNode(GenTree* node)
LowerStoreLoc(node->AsLclVarCommon());
break;
+#ifdef _TARGET_ARM64_
+ case GT_CMPXCHG:
+ CheckImmedAndMakeContained(node, node->AsCmpXchg()->gtOpComparand);
+ break;
+
+ case GT_XADD:
+#endif
case GT_LOCKADD:
CheckImmedAndMakeContained(node, node->gtOp.gtOp2);
break;
diff --git a/src/jit/lowerarmarch.cpp b/src/jit/lowerarmarch.cpp
index bbc879f6b6..d253520ef0 100644
--- a/src/jit/lowerarmarch.cpp
+++ b/src/jit/lowerarmarch.cpp
@@ -108,6 +108,9 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode)
case GT_ADD:
case GT_SUB:
#ifdef _TARGET_ARM64_
+ case GT_CMPXCHG:
+ case GT_LOCKADD:
+ case GT_XADD:
return emitter::emitIns_valid_imm_for_add(immVal, size);
#elif defined(_TARGET_ARM_)
return emitter::emitIns_valid_imm_for_add(immVal, flags);
diff --git a/src/jit/lsraarm64.cpp b/src/jit/lsraarm64.cpp
index 83f6d64983..c67a1c50d3 100644
--- a/src/jit/lsraarm64.cpp
+++ b/src/jit/lsraarm64.cpp
@@ -365,16 +365,46 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree)
break;
case GT_CMPXCHG:
- info->srcCount = 3;
+ {
+ GenTreeCmpXchg* cmpXchgNode = tree->AsCmpXchg();
+ info->srcCount = cmpXchgNode->gtOpComparand->isContained() ? 2 : 3;
assert(info->dstCount == 1);
- // TODO-ARM64-NYI
- NYI("CMPXCHG");
- break;
+ info->internalIntCount = 1;
+
+ // For ARMv8 exclusives the lifetime of the addr and data must be extended because
+ // it may be used used multiple during retries
+ cmpXchgNode->gtOpLocation->gtLsraInfo.isDelayFree = true;
+ cmpXchgNode->gtOpValue->gtLsraInfo.isDelayFree = true;
+ if (!cmpXchgNode->gtOpComparand->isContained())
+ {
+ cmpXchgNode->gtOpComparand->gtLsraInfo.isDelayFree = true;
+ }
+ info->hasDelayFreeSrc = true;
+
+ // Internals may not collide with target
+ info->isInternalRegDelayFree = true;
+ }
+ break;
case GT_LOCKADD:
- info->srcCount = tree->gtOp.gtOp2->isContained() ? 1 : 2;
- assert(info->dstCount == 1);
+ case GT_XADD:
+ case GT_XCHG:
+ assert(info->dstCount == (tree->OperIs(GT_LOCKADD) ? 0 : 1));
+ info->srcCount = tree->gtOp.gtOp2->isContained() ? 1 : 2;
+ info->internalIntCount = (tree->OperGet() == GT_XCHG) ? 1 : 2;
+
+ // For ARMv8 exclusives the lifetime of the addr and data must be extended because
+ // it may be used used multiple during retries
+ tree->gtOp.gtOp1->gtLsraInfo.isDelayFree = true;
+ if (!tree->gtOp.gtOp2->isContained())
+ {
+ tree->gtOp.gtOp2->gtLsraInfo.isDelayFree = true;
+ }
+ info->hasDelayFreeSrc = true;
+
+ // Internals may not collide with target
+ info->isInternalRegDelayFree = true;
break;
case GT_PUTARG_STK: