diff options
author | Brian Sullivan <briansul@microsoft.com> | 2017-10-06 10:59:39 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-10-06 10:59:39 -0700 |
commit | 77b5ee4f53eb6a098898afbba1c8d85cb9a632d8 (patch) | |
tree | e1054af07cdc382b1b36ce114b83ab215c930e9a /src | |
parent | 573796d3727e474e7075d5d4184faa1d36a502d1 (diff) | |
parent | f6c7baa1a34a2542c7d9c4946a486f2cef78c49b (diff) | |
download | coreclr-77b5ee4f53eb6a098898afbba1c8d85cb9a632d8.tar.gz coreclr-77b5ee4f53eb6a098898afbba1c8d85cb9a632d8.tar.bz2 coreclr-77b5ee4f53eb6a098898afbba1c8d85cb9a632d8.zip |
Merge pull request #14329 from sdmaclea/PR-ARM64-ATOMIC-OPS
[Arm64] Implement GT_XADD, GT_XCHG, GT_CMPXCHG ...
Diffstat (limited to 'src')
-rw-r--r-- | src/jit/codegenarm64.cpp | 224 | ||||
-rw-r--r-- | src/jit/codegenarmarch.cpp | 5 | ||||
-rw-r--r-- | src/jit/gentree.cpp | 2 | ||||
-rw-r--r-- | src/jit/importer.cpp | 12 | ||||
-rw-r--r-- | src/jit/lower.cpp | 7 | ||||
-rw-r--r-- | src/jit/lowerarmarch.cpp | 3 | ||||
-rw-r--r-- | src/jit/lsraarm64.cpp | 42 |
7 files changed, 243 insertions, 52 deletions
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index 9c238f2c4f..ca0c39f6f3 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -2665,62 +2665,208 @@ void CodeGen::genJumpTable(GenTree* treeNode) // GT_LOCKADD, GT_XCHG, GT_XADD void CodeGen::genLockedInstructions(GenTreeOp* treeNode) { -#if 0 - GenTree* data = treeNode->gtOp.gtOp2; - GenTree* addr = treeNode->gtOp.gtOp1; + GenTree* data = treeNode->gtOp.gtOp2; + GenTree* addr = treeNode->gtOp.gtOp1; regNumber targetReg = treeNode->gtRegNum; regNumber dataReg = data->gtRegNum; regNumber addrReg = addr->gtRegNum; - instruction ins; - // all of these nodes implicitly do an indirection on op1 - // so create a temporary node to feed into the pattern matching - GenTreeIndir i = indirForm(data->TypeGet(), addr); - genConsumeReg(addr); + regNumber exResultReg = treeNode->ExtractTempReg(RBM_ALLINT); + regNumber storeDataReg = (treeNode->OperGet() == GT_XCHG) ? dataReg : treeNode->ExtractTempReg(RBM_ALLINT); + regNumber loadReg = (targetReg != REG_NA) ? targetReg : storeDataReg; - // The register allocator should have extended the lifetime of the address - // so that it is not used as the target. + // Check allocator assumptions + // + // The register allocator should have extended the lifetimes of all input and internal registers so that + // none interfere with the target. noway_assert(addrReg != targetReg); - // If data is a lclVar that's not a last use, we'd better have allocated a register - // for the result (except in the case of GT_LOCKADD which does not produce a register result). - assert(targetReg != REG_NA || treeNode->OperGet() == GT_LOCKADD || !genIsRegCandidateLocal(data) || (data->gtFlags & GTF_VAR_DEATH) != 0); + noway_assert(addrReg != loadReg); + noway_assert(dataReg != loadReg); - genConsumeIfReg(data); - if (targetReg != REG_NA && dataReg != REG_NA && dataReg != targetReg) - { - inst_RV_RV(ins_Copy(data->TypeGet()), targetReg, dataReg); - data->gtRegNum = targetReg; + noway_assert(addrReg != storeDataReg); + noway_assert((treeNode->OperGet() == GT_XCHG) || (addrReg != dataReg)); + + assert(addr->isUsedFromReg()); + noway_assert(exResultReg != REG_NA); + noway_assert(exResultReg != targetReg); + noway_assert((targetReg != REG_NA) || (treeNode->OperGet() != GT_XCHG)); + + // Store exclusive unpredictable cases must be avoided + noway_assert(exResultReg != storeDataReg); + noway_assert(exResultReg != addrReg); + + genConsumeAddress(addr); + genConsumeRegs(data); + + // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input registers + // die at the first instruction generated by the node. This is not the case for these atomics as the input + // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until + // we are finished generating the code for this node. + + gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet()); + + // TODO-ARM64-CQ Use ARMv8.1 atomics if available + // https://github.com/dotnet/coreclr/issues/11881 + + // Emit code like this: + // retry: + // ldxr loadReg, [addrReg] + // add storeDataReg, loadReg, dataReg # Only for GT_XADD & GT_LOCKADD + // # GT_XCHG storeDataReg === dataReg + // stxr exResult, storeDataReg, [addrReg] + // cbnz exResult, retry + + BasicBlock* labelRetry = genCreateTempLabel(); + genDefineTempLabel(labelRetry); + + // The following instruction includes a acquire half barrier + // TODO-ARM64-CQ Evaluate whether this is necessary + // https://github.com/dotnet/coreclr/issues/14346 + getEmitter()->emitIns_R_R(INS_ldaxr, emitTypeSize(treeNode), loadReg, addrReg); - // TODO-ARM64-Cleanup: Consider whether it is worth it, for debugging purposes, to restore the - // original gtRegNum on data, after calling emitInsBinary below. - } switch (treeNode->OperGet()) { - case GT_LOCKADD: - instGen(INS_lock); - ins = INS_add; - break; - case GT_XCHG: - // lock is implied by xchg - ins = INS_xchg; - break; - case GT_XADD: - instGen(INS_lock); - ins = INS_xadd; - break; - default: - unreached(); + case GT_XADD: + case GT_LOCKADD: + if (data->isContainedIntOrIImmed()) + { + // Even though INS_add is specified here, the encoder will choose either + // an INS_add or an INS_sub and encode the immediate as a positive value + genInstrWithConstant(INS_add, emitActualTypeSize(treeNode), storeDataReg, loadReg, + data->AsIntConCommon()->IconValue(), REG_NA); + } + else + { + getEmitter()->emitIns_R_R_R(INS_add, emitActualTypeSize(treeNode), storeDataReg, loadReg, dataReg); + } + break; + case GT_XCHG: + assert(!data->isContained()); + storeDataReg = dataReg; + break; + default: + unreached(); } - getEmitter()->emitInsBinary(ins, emitActualTypeSize(data), &i, data); + + // The following instruction includes a release half barrier + // TODO-ARM64-CQ Evaluate whether this is necessary + // https://github.com/dotnet/coreclr/issues/14346 + getEmitter()->emitIns_R_R_R(INS_stlxr, emitTypeSize(treeNode), exResultReg, storeDataReg, addrReg); + + getEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg); + + gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask()); if (treeNode->gtRegNum != REG_NA) { genProduceReg(treeNode); } -#else // !0 - NYI("genLockedInstructions"); -#endif // !0 +} + +//------------------------------------------------------------------------ +// genCodeForSwap: Produce code for a GT_CMPXCHG node. +// +// Arguments: +// tree - the GT_CMPXCHG node +// +void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) +{ + assert(treeNode->OperIs(GT_CMPXCHG)); + + GenTreePtr addr = treeNode->gtOpLocation; // arg1 + GenTreePtr data = treeNode->gtOpValue; // arg2 + GenTreePtr comparand = treeNode->gtOpComparand; // arg3 + + regNumber targetReg = treeNode->gtRegNum; + regNumber dataReg = data->gtRegNum; + regNumber addrReg = addr->gtRegNum; + regNumber comparandReg = comparand->gtRegNum; + regNumber exResultReg = treeNode->ExtractTempReg(RBM_ALLINT); + + // Check allocator assumptions + // + // The register allocator should have extended the lifetimes of all input and internal registers so that + // none interfere with the target. + noway_assert(addrReg != targetReg); + noway_assert(dataReg != targetReg); + noway_assert(comparandReg != targetReg); + noway_assert(addrReg != dataReg); + noway_assert(targetReg != REG_NA); + noway_assert(exResultReg != REG_NA); + noway_assert(exResultReg != targetReg); + + assert(addr->isUsedFromReg()); + assert(data->isUsedFromReg()); + assert(!comparand->isUsedFromMemory()); + + // Store exclusive unpredictable cases must be avoided + noway_assert(exResultReg != dataReg); + noway_assert(exResultReg != addrReg); + + genConsumeAddress(addr); + genConsumeRegs(data); + genConsumeRegs(comparand); + + // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input registers + // die at the first instruction generated by the node. This is not the case for these atomics as the input + // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until + // we are finished generating the code for this node. + + gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet()); + + // TODO-ARM64-CQ Use ARMv8.1 atomics if available + // https://github.com/dotnet/coreclr/issues/11881 + + // Emit code like this: + // retry: + // ldxr targetReg, [addrReg] + // cmp targetReg, comparandReg + // bne compareFail + // stxr exResult, dataReg, [addrReg] + // cbnz exResult, retry + // compareFail: + + BasicBlock* labelRetry = genCreateTempLabel(); + BasicBlock* labelCompareFail = genCreateTempLabel(); + genDefineTempLabel(labelRetry); + + // The following instruction includes a acquire half barrier + // TODO-ARM64-CQ Evaluate whether this is necessary + // https://github.com/dotnet/coreclr/issues/14346 + getEmitter()->emitIns_R_R(INS_ldaxr, emitTypeSize(treeNode), targetReg, addrReg); + + if (comparand->isContainedIntOrIImmed()) + { + if (comparand->IsIntegralConst(0)) + { + getEmitter()->emitIns_J_R(INS_cbnz, emitActualTypeSize(treeNode), labelCompareFail, targetReg); + } + else + { + getEmitter()->emitIns_R_I(INS_cmp, emitActualTypeSize(treeNode), targetReg, + comparand->AsIntConCommon()->IconValue()); + getEmitter()->emitIns_J(INS_bne, labelCompareFail); + } + } + else + { + getEmitter()->emitIns_R_R(INS_cmp, emitActualTypeSize(treeNode), targetReg, comparandReg); + getEmitter()->emitIns_J(INS_bne, labelCompareFail); + } + + // The following instruction includes a release half barrier + // TODO-ARM64-CQ Evaluate whether this is necessary + // https://github.com/dotnet/coreclr/issues/14346 + getEmitter()->emitIns_R_R_R(INS_stlxr, emitTypeSize(treeNode), exResultReg, dataReg, addrReg); + + getEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg); + + genDefineTempLabel(labelCompareFail); + + gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask()); + + genProduceReg(treeNode); } instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp index 22aed1056d..535af34682 100644 --- a/src/jit/codegenarmarch.cpp +++ b/src/jit/codegenarmarch.cpp @@ -343,7 +343,10 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) break; case GT_CMPXCHG: - NYI("GT_CMPXCHG"); + NYI_ARM("GT_CMPXCHG"); +#ifdef _TARGET_ARM64_ + genCodeForCmpXchg(treeNode->AsCmpXchg()); +#endif break; case GT_RELOAD: diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 30695d86c5..25ca810a85 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -15155,7 +15155,9 @@ void Compiler::gtExtractSideEffList(GenTreePtr expr, if (oper == GT_XADD) { expr->SetOperRaw(GT_LOCKADD); +#ifndef _TARGET_ARM64_ expr->gtType = TYP_VOID; +#endif } // These operations are kind of important to keep diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp index 332abbf1c7..72f747a9ff 100644 --- a/src/jit/importer.cpp +++ b/src/jit/importer.cpp @@ -3374,7 +3374,7 @@ GenTreePtr Compiler::impIntrinsic(GenTreePtr newobjThis, retNode = impMathIntrinsic(method, sig, callType, intrinsicID, tailCall); break; -#ifdef _TARGET_XARCH_ +#if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_) // TODO-ARM-CQ: reenable treating Interlocked operation as intrinsic case CORINFO_INTRINSIC_InterlockedAdd32: interlockedOperator = GT_LOCKADD; @@ -3386,7 +3386,7 @@ GenTreePtr Compiler::impIntrinsic(GenTreePtr newobjThis, interlockedOperator = GT_XCHG; goto InterlockedBinOpCommon; -#ifdef _TARGET_AMD64_ +#ifdef _TARGET_64BIT_ case CORINFO_INTRINSIC_InterlockedAdd64: interlockedOperator = GT_LOCKADD; goto InterlockedBinOpCommon; @@ -3419,7 +3419,7 @@ GenTreePtr Compiler::impIntrinsic(GenTreePtr newobjThis, op1->gtFlags |= GTF_GLOB_REF | GTF_ASG; retNode = op1; break; -#endif // _TARGET_XARCH_ +#endif // defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_) case CORINFO_INTRINSIC_MemoryBarrier: @@ -3430,10 +3430,10 @@ GenTreePtr Compiler::impIntrinsic(GenTreePtr newobjThis, retNode = op1; break; -#ifdef _TARGET_XARCH_ +#if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_) // TODO-ARM-CQ: reenable treating InterlockedCmpXchg32 operation as intrinsic case CORINFO_INTRINSIC_InterlockedCmpXchg32: -#ifdef _TARGET_AMD64_ +#ifdef _TARGET_64BIT_ case CORINFO_INTRINSIC_InterlockedCmpXchg64: #endif { @@ -3451,7 +3451,7 @@ GenTreePtr Compiler::impIntrinsic(GenTreePtr newobjThis, retNode = node; break; } -#endif +#endif // defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_) case CORINFO_INTRINSIC_StringLength: op1 = impPopStack().val; diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp index 9ecce38c15..17202effd6 100644 --- a/src/jit/lower.cpp +++ b/src/jit/lower.cpp @@ -298,6 +298,13 @@ GenTree* Lowering::LowerNode(GenTree* node) LowerStoreLoc(node->AsLclVarCommon()); break; +#ifdef _TARGET_ARM64_ + case GT_CMPXCHG: + CheckImmedAndMakeContained(node, node->AsCmpXchg()->gtOpComparand); + break; + + case GT_XADD: +#endif case GT_LOCKADD: CheckImmedAndMakeContained(node, node->gtOp.gtOp2); break; diff --git a/src/jit/lowerarmarch.cpp b/src/jit/lowerarmarch.cpp index bbc879f6b6..d253520ef0 100644 --- a/src/jit/lowerarmarch.cpp +++ b/src/jit/lowerarmarch.cpp @@ -108,6 +108,9 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) case GT_ADD: case GT_SUB: #ifdef _TARGET_ARM64_ + case GT_CMPXCHG: + case GT_LOCKADD: + case GT_XADD: return emitter::emitIns_valid_imm_for_add(immVal, size); #elif defined(_TARGET_ARM_) return emitter::emitIns_valid_imm_for_add(immVal, flags); diff --git a/src/jit/lsraarm64.cpp b/src/jit/lsraarm64.cpp index 83f6d64983..c67a1c50d3 100644 --- a/src/jit/lsraarm64.cpp +++ b/src/jit/lsraarm64.cpp @@ -365,16 +365,46 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree) break; case GT_CMPXCHG: - info->srcCount = 3; + { + GenTreeCmpXchg* cmpXchgNode = tree->AsCmpXchg(); + info->srcCount = cmpXchgNode->gtOpComparand->isContained() ? 2 : 3; assert(info->dstCount == 1); - // TODO-ARM64-NYI - NYI("CMPXCHG"); - break; + info->internalIntCount = 1; + + // For ARMv8 exclusives the lifetime of the addr and data must be extended because + // it may be used used multiple during retries + cmpXchgNode->gtOpLocation->gtLsraInfo.isDelayFree = true; + cmpXchgNode->gtOpValue->gtLsraInfo.isDelayFree = true; + if (!cmpXchgNode->gtOpComparand->isContained()) + { + cmpXchgNode->gtOpComparand->gtLsraInfo.isDelayFree = true; + } + info->hasDelayFreeSrc = true; + + // Internals may not collide with target + info->isInternalRegDelayFree = true; + } + break; case GT_LOCKADD: - info->srcCount = tree->gtOp.gtOp2->isContained() ? 1 : 2; - assert(info->dstCount == 1); + case GT_XADD: + case GT_XCHG: + assert(info->dstCount == (tree->OperIs(GT_LOCKADD) ? 0 : 1)); + info->srcCount = tree->gtOp.gtOp2->isContained() ? 1 : 2; + info->internalIntCount = (tree->OperGet() == GT_XCHG) ? 1 : 2; + + // For ARMv8 exclusives the lifetime of the addr and data must be extended because + // it may be used used multiple during retries + tree->gtOp.gtOp1->gtLsraInfo.isDelayFree = true; + if (!tree->gtOp.gtOp2->isContained()) + { + tree->gtOp.gtOp2->gtLsraInfo.isDelayFree = true; + } + info->hasDelayFreeSrc = true; + + // Internals may not collide with target + info->isInternalRegDelayFree = true; break; case GT_PUTARG_STK: |