summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTanner Gooding <tagoo@outlook.com>2018-06-02 20:41:06 -0700
committerTanner Gooding <tagoo@outlook.com>2018-06-04 15:34:09 -0700
commitb69128007a783429ae9113a9033fe48eba5570bc (patch)
tree38b3276fac4eba2a1525368d1ea58b742e7200dd /src
parentcdc42699de04995b5d1f3eedf20bf5ca6fc33267 (diff)
downloadcoreclr-b69128007a783429ae9113a9033fe48eba5570bc.tar.gz
coreclr-b69128007a783429ae9113a9033fe48eba5570bc.tar.bz2
coreclr-b69128007a783429ae9113a9033fe48eba5570bc.zip
Updating the x86 HWIntrinsics to support containment for most one-operand intrinsics.
Diffstat (limited to 'src')
-rw-r--r--src/jit/codegenlinear.h2
-rw-r--r--src/jit/emitxarch.cpp14
-rw-r--r--src/jit/emitxarch.h4
-rw-r--r--src/jit/hwintrinsiccodegenxarch.cpp275
-rw-r--r--src/jit/lowerxarch.cpp66
5 files changed, 341 insertions, 20 deletions
diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h
index 0579fa5a00..1953686310 100644
--- a/src/jit/codegenlinear.h
+++ b/src/jit/codegenlinear.h
@@ -115,6 +115,8 @@ void genPutArgStkSIMD12(GenTree* treeNode);
#ifdef FEATURE_HW_INTRINSICS
void genHWIntrinsic(GenTreeHWIntrinsic* node);
#if defined(_TARGET_XARCH_)
+void genHWIntrinsic_R_RM(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr);
+void genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins);
void genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins);
void genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins);
void genHWIntrinsic_R_R_R_RM(
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
index 4283f1c336..6a78bbf6cc 100644
--- a/src/jit/emitxarch.cpp
+++ b/src/jit/emitxarch.cpp
@@ -4024,7 +4024,7 @@ void emitter::emitIns_AR(instruction ins, emitAttr attr, regNumber base, int off
emitCurIGsize += sz;
}
-void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, insFormat fmt)
+void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir)
{
ssize_t offs = indir->Offset();
instrDesc* id = emitNewInstrAmd(attr, offs);
@@ -4032,7 +4032,7 @@ void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTre
id->idIns(ins);
id->idReg1(reg1);
- emitHandleMemOp(indir, id, fmt, ins);
+ emitHandleMemOp(indir, id, IF_RRW_ARD, ins);
UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
id->idCodeSize(sz);
@@ -4160,7 +4160,7 @@ void emitter::emitIns_R_S_I(instruction ins, emitAttr attr, regNumber reg1, int
}
void emitter::emitIns_R_R_A(
- instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, insFormat fmt)
+ instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir)
{
assert(IsSSEOrAVXInstruction(ins));
assert(IsThreeOperandAVXInstruction(ins));
@@ -4172,7 +4172,7 @@ void emitter::emitIns_R_R_A(
id->idReg1(reg1);
id->idReg2(reg2);
- emitHandleMemOp(indir, id, fmt, ins);
+ emitHandleMemOp(indir, id, IF_RWR_RRD_ARD, ins);
UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
id->idCodeSize(sz);
@@ -5407,7 +5407,7 @@ void emitter::emitIns_SIMD_R_R_A(instruction ins, emitAttr attr, regNumber targe
{
if (UseVEXEncoding())
{
- emitIns_R_R_A(ins, attr, targetReg, op1Reg, indir, IF_RWR_RRD_ARD);
+ emitIns_R_R_A(ins, attr, targetReg, op1Reg, indir);
}
else
{
@@ -5415,7 +5415,7 @@ void emitter::emitIns_SIMD_R_R_A(instruction ins, emitAttr attr, regNumber targe
{
emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
}
- emitIns_R_A(ins, attr, targetReg, indir, IF_RRW_ARD);
+ emitIns_R_A(ins, attr, targetReg, indir);
}
}
@@ -5589,7 +5589,7 @@ void emitter::emitIns_SIMD_R_R_R_A(
emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
}
- emitIns_R_R_A(ins, attr, targetReg, op2Reg, indir, IF_RWR_RRD_ARD);
+ emitIns_R_R_A(ins, attr, targetReg, op2Reg, indir);
}
void emitter::emitIns_SIMD_R_R_R_AR(
diff --git a/src/jit/emitxarch.h b/src/jit/emitxarch.h
index 663e42c535..7e7bffbbe3 100644
--- a/src/jit/emitxarch.h
+++ b/src/jit/emitxarch.h
@@ -318,7 +318,7 @@ void emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg
void emitIns_AR(instruction ins, emitAttr attr, regNumber base, int offs);
-void emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, insFormat fmt);
+void emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir);
void emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, int ival);
@@ -328,7 +328,7 @@ void emitIns_R_C_I(instruction ins, emitAttr attr, regNumber reg1, CORINFO_FIELD
void emitIns_R_S_I(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs, int ival);
-void emitIns_R_R_A(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, insFormat fmt);
+void emitIns_R_R_A(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir);
void emitIns_R_R_AR(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs);
diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp
index 59cb48abcf..2e97be2b6f 100644
--- a/src/jit/hwintrinsiccodegenxarch.cpp
+++ b/src/jit/hwintrinsiccodegenxarch.cpp
@@ -84,6 +84,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
{
genConsumeOperands(node);
op1Reg = op1->gtRegNum;
+
if (category == HW_Category_MemoryLoad)
{
emit->emitIns_R_AR(ins, simdSize, targetReg, op1Reg, 0);
@@ -95,11 +96,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
else if ((ival != -1) && varTypeIsFloating(baseType))
{
assert((ival >= 0) && (ival <= 127));
- emit->emitIns_R_R_I(ins, simdSize, targetReg, op1Reg, (int8_t)ival);
+ genHWIntrinsic_R_RM_I(node, ins);
}
else
{
- emit->emitIns_R_R(ins, simdSize, targetReg, op1Reg);
+ genHWIntrinsic_R_RM(node, ins, simdSize);
}
break;
}
@@ -298,6 +299,263 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
}
//------------------------------------------------------------------------
+// genHWIntrinsic_R_RM: Generates the code for a hardware intrinsic node that takes a
+// register/memory operand and that returns a value in register
+//
+// Arguments:
+// node - The hardware intrinsic node
+// ins - The instruction being generated
+// attr - The emit attribute for the instruciton being generated
+//
+void CodeGen::genHWIntrinsic_R_RM(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr)
+{
+ var_types targetType = node->TypeGet();
+ regNumber targetReg = node->gtRegNum;
+ GenTree* op1 = node->gtGetOp1();
+ emitter* emit = getEmitter();
+
+ assert(targetReg != REG_NA);
+ assert(node->gtGetOp2() == nullptr);
+ assert(!node->OperIsCommutative());
+
+ if (op1->isContained() || op1->isUsedFromSpillTemp())
+ {
+ assert(HWIntrinsicInfo::SupportsContainment(node->gtHWIntrinsicId));
+
+#if DEBUG
+ bool supportsRegOptional = false;
+ bool isContainable = compiler->m_pLowering->IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional);
+ assert(isContainable || (supportsRegOptional && op1->IsRegOptional()));
+#endif // DEBUG
+
+ TempDsc* tmpDsc = nullptr;
+ unsigned varNum = BAD_VAR_NUM;
+ unsigned offset = (unsigned)-1;
+
+ if (op1->isUsedFromSpillTemp())
+ {
+ assert(op1->IsRegOptional());
+
+ tmpDsc = getSpillTempDsc(op1);
+ varNum = tmpDsc->tdTempNum();
+ offset = 0;
+
+ compiler->tmpRlsTemp(tmpDsc);
+ }
+ else if (op1->OperIsHWIntrinsic())
+ {
+ emit->emitIns_R_AR(ins, attr, targetReg, op1->gtGetOp1()->gtRegNum, 0);
+ return;
+ }
+ else if (op1->isIndir())
+ {
+ GenTreeIndir* memIndir = op1->AsIndir();
+ GenTree* memBase = memIndir->gtOp1;
+
+ switch (memBase->OperGet())
+ {
+ case GT_LCL_VAR_ADDR:
+ {
+ varNum = memBase->AsLclVarCommon()->GetLclNum();
+ offset = 0;
+
+ // Ensure that all the GenTreeIndir values are set to their defaults.
+ assert(!memIndir->HasIndex());
+ assert(memIndir->Scale() == 1);
+ assert(memIndir->Offset() == 0);
+
+ break;
+ }
+
+ case GT_CLS_VAR_ADDR:
+ {
+ emit->emitIns_R_C(ins, attr, targetReg, memBase->gtClsVar.gtClsVarHnd, 0);
+ return;
+ }
+
+ default:
+ {
+ emit->emitIns_R_A(ins, attr, targetReg, memIndir);
+ return;
+ }
+ }
+ }
+ else
+ {
+ switch (op1->OperGet())
+ {
+ case GT_LCL_FLD:
+ {
+ GenTreeLclFld* lclField = op1->AsLclFld();
+
+ varNum = lclField->GetLclNum();
+ offset = lclField->gtLclFld.gtLclOffs;
+ break;
+ }
+
+ case GT_LCL_VAR:
+ {
+ assert(op1->IsRegOptional() || !compiler->lvaTable[op1->gtLclVar.gtLclNum].lvIsRegCandidate());
+ varNum = op1->AsLclVar()->GetLclNum();
+ offset = 0;
+ break;
+ }
+
+ default:
+ {
+ unreached();
+ break;
+ }
+ }
+ }
+
+ // Ensure we got a good varNum and offset.
+ // We also need to check for `tmpDsc != nullptr` since spill temp numbers
+ // are negative and start with -1, which also happens to be BAD_VAR_NUM.
+ assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr));
+ assert(offset != (unsigned)-1);
+
+ emit->emitIns_R_S(ins, attr, targetReg, varNum, offset);
+ }
+ else
+ {
+ regNumber op1Reg = op1->gtRegNum;
+ emit->emitIns_R_R(ins, attr, targetReg, op1Reg);
+ }
+}
+
+//------------------------------------------------------------------------
+// genHWIntrinsic_R_RM_I: Generates the code for a hardware intrinsic node that takes a register operand, a
+// register/memory operand, an immediate operand, and that returns a value in register
+//
+// Arguments:
+// node - The hardware intrinsic node
+// ins - The instruction being generated
+//
+void CodeGen::genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins)
+{
+ var_types targetType = node->TypeGet();
+ regNumber targetReg = node->gtRegNum;
+ GenTree* op1 = node->gtGetOp1();
+ emitAttr simdSize = EA_ATTR(node->gtSIMDSize);
+ emitter* emit = getEmitter();
+
+ int ival = HWIntrinsicInfo::lookupIval(node->gtHWIntrinsicId);
+ assert((ival >= 0) && (ival <= 127));
+
+ // TODO-XArch-CQ: Commutative operations can have op1 be contained
+ // TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained
+
+ assert(targetReg != REG_NA);
+ assert(node->gtGetOp2() == nullptr);
+ assert(!node->OperIsCommutative());
+
+ if (op1->isContained() || op1->isUsedFromSpillTemp())
+ {
+ assert(HWIntrinsicInfo::SupportsContainment(node->gtHWIntrinsicId));
+
+#if DEBUG
+ bool supportsRegOptional = false;
+ bool isContainable = compiler->m_pLowering->IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional);
+ assert(isContainable || (supportsRegOptional && op1->IsRegOptional()));
+#endif // DEBUG
+
+ TempDsc* tmpDsc = nullptr;
+ unsigned varNum = BAD_VAR_NUM;
+ unsigned offset = (unsigned)-1;
+
+ if (op1->isUsedFromSpillTemp())
+ {
+ assert(op1->IsRegOptional());
+
+ tmpDsc = getSpillTempDsc(op1);
+ varNum = tmpDsc->tdTempNum();
+ offset = 0;
+
+ compiler->tmpRlsTemp(tmpDsc);
+ }
+ else if (op1->OperIsHWIntrinsic())
+ {
+ emit->emitIns_R_AR_I(ins, simdSize, targetReg, op1->gtGetOp1()->gtRegNum, 0, ival);
+ return;
+ }
+ else if (op1->isIndir())
+ {
+ GenTreeIndir* memIndir = op1->AsIndir();
+ GenTree* memBase = memIndir->gtOp1;
+
+ switch (memBase->OperGet())
+ {
+ case GT_LCL_VAR_ADDR:
+ {
+ varNum = memBase->AsLclVarCommon()->GetLclNum();
+ offset = 0;
+
+ // Ensure that all the GenTreeIndir values are set to their defaults.
+ assert(!memIndir->HasIndex());
+ assert(memIndir->Scale() == 1);
+ assert(memIndir->Offset() == 0);
+
+ break;
+ }
+
+ case GT_CLS_VAR_ADDR:
+ {
+ emit->emitIns_R_C_I(ins, simdSize, targetReg, memBase->gtClsVar.gtClsVarHnd, 0,
+ ival);
+ return;
+ }
+
+ default:
+ {
+ emit->emitIns_R_A_I(ins, simdSize, targetReg, memIndir, ival);
+ return;
+ }
+ }
+ }
+ else
+ {
+ switch (op1->OperGet())
+ {
+ case GT_LCL_FLD:
+ {
+ GenTreeLclFld* lclField = op1->AsLclFld();
+
+ varNum = lclField->GetLclNum();
+ offset = lclField->gtLclFld.gtLclOffs;
+ break;
+ }
+
+ case GT_LCL_VAR:
+ {
+ assert(op1->IsRegOptional() || !compiler->lvaTable[op1->gtLclVar.gtLclNum].lvIsRegCandidate());
+ varNum = op1->AsLclVar()->GetLclNum();
+ offset = 0;
+ break;
+ }
+
+ default:
+ unreached();
+ break;
+ }
+ }
+
+ // Ensure we got a good varNum and offset.
+ // We also need to check for `tmpDsc != nullptr` since spill temp numbers
+ // are negative and start with -1, which also happens to be BAD_VAR_NUM.
+ assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr));
+ assert(offset != (unsigned)-1);
+
+ emit->emitIns_R_S_I(ins, simdSize, targetReg, varNum, offset, ival);
+ }
+ else
+ {
+ regNumber op1Reg = op1->gtRegNum;
+ emit->emitIns_R_R_I(ins, simdSize, targetReg, op1Reg, ival);
+ }
+}
+
+//------------------------------------------------------------------------
// genHWIntrinsic_R_R_RM: Generates the code for a hardware intrinsic node that takes a register operand, a
// register/memory operand, and that returns a value in register
//
@@ -1130,7 +1388,7 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
assert(op1 != nullptr);
assert(op2 == nullptr);
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
- emit->emitIns_R_R(ins, emitTypeSize(baseType), targetReg, op1Reg);
+ genHWIntrinsic_R_RM(node, ins, emitTypeSize(baseType));
break;
}
@@ -1152,16 +1410,17 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
case NI_SSE2_ConvertToUInt64:
{
assert(op2 == nullptr);
- assert(baseType == TYP_DOUBLE || baseType == TYP_FLOAT || baseType == TYP_INT || baseType == TYP_UINT ||
- baseType == TYP_LONG || baseType == TYP_ULONG);
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
- if (baseType == TYP_DOUBLE || baseType == TYP_FLOAT)
+
+ if (varTypeIsIntegral(baseType))
{
- emit->emitIns_R_R(ins, emitTypeSize(targetType), targetReg, op1Reg);
+ assert(baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG);
+ emit->emitIns_R_R(ins, emitActualTypeSize(baseType), op1Reg, targetReg);
}
else
{
- emit->emitIns_R_R(ins, emitActualTypeSize(baseType), op1Reg, targetReg);
+ assert(baseType == TYP_DOUBLE || baseType == TYP_FLOAT);
+ genHWIntrinsic_R_RM(node, ins, emitTypeSize(targetType));
}
break;
}
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index 080d70d5b0..b9054b96cd 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -2308,7 +2308,7 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode)
//
bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, GenTree* node, bool* supportsRegOptional)
{
- NamedIntrinsic containingintrinsicId = containingNode->gtHWintrinsicId;
+ NamedIntrinsic containingintrinsicId = containingNode->gtHWIntrinsicId;
HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(containingintrinsicId);
// We shouldn't have called in here if containingNode doesn't support containment
@@ -2461,7 +2461,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge
// TODO-XArch: Update this to be table driven, if possible.
- NamedIntrinsic intrinsicId = node->AsHWIntrinsic()->gtHWintrinsicId;
+ NamedIntrinsic intrinsicId = node->AsHWIntrinsic()->gtHWIntrinsicId;
switch (intrinsicId)
{
@@ -2502,7 +2502,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge
//
void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
{
- NamedIntrinsic intrinsicId = node->gtHWintrinsicId;
+ NamedIntrinsic intrinsicId = node->gtHWIntrinsicId;
HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(intrinsicId);
int numArgs = HWIntrinsicInfo::lookupNumArgs(node);
var_types baseType = node->gtSIMDBaseType;
@@ -2531,6 +2531,66 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
switch (category)
{
+ case HW_Category_SimpleSIMD:
+ case HW_Category_SIMDScalar:
+ {
+ switch (intrinsicId)
+ {
+ case NI_SSE_ReciprocalScalar:
+ case NI_SSE_ReciprocalSqrtScalar:
+ case NI_SSE_SqrtScalar:
+ case NI_SSE2_SqrtScalar:
+ case NI_SSE41_CeilingScalar:
+ case NI_SSE41_FloorScalar:
+ case NI_SSE41_RoundCurrentDirectionScalar:
+ case NI_SSE41_RoundToNearestIntegerScalar:
+ case NI_SSE41_RoundToNegativeInfinityScalar:
+ case NI_SSE41_RoundToPositiveInfinityScalar:
+ case NI_SSE41_RoundToZeroScalar:
+ {
+ // These intrinsics have both 1 and 2-operand overloads.
+ //
+ // The 1-operand overload basically does `intrinsic(op1, op1)`
+ //
+ // Because of this, the operand must be loaded into a register
+ // and cannot be contained.
+ return;
+ }
+
+ case NI_SSE2_ConvertToInt32:
+ case NI_SSE2_ConvertToInt64:
+ case NI_SSE2_ConvertToUInt32:
+ case NI_SSE2_ConvertToUInt64:
+ {
+ if (varTypeIsIntegral(baseType))
+ {
+ // These intrinsics are "ins reg/mem, xmm" and don't
+ // currently support containment.
+ return;
+ }
+
+ break;
+ }
+
+ default:
+ {
+ break;
+ }
+ }
+
+ bool supportsRegOptional = false;
+
+ if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional))
+ {
+ MakeSrcContained(node, op1);
+ }
+ else if (supportsRegOptional)
+ {
+ op1->SetRegOptional();
+ }
+ break;
+ }
+
default:
{
// TODO-XArch-CQ: Assert that this is unreached after we have ensured the relevant node types are