summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTanner Gooding <tagoo@outlook.com>2018-01-29 21:20:08 -0800
committerTanner Gooding <tagoo@outlook.com>2018-02-03 07:48:26 -0800
commit41f8b17ae1741dbcb15eaf0e7456c82548672889 (patch)
treede39985a7714be82475ccd96df97592d03e4e5e0 /src
parent67feb5e680e4e94756e0133744da2c7150565666 (diff)
downloadcoreclr-41f8b17ae1741dbcb15eaf0e7456c82548672889.tar.gz
coreclr-41f8b17ae1741dbcb15eaf0e7456c82548672889.tar.bz2
coreclr-41f8b17ae1741dbcb15eaf0e7456c82548672889.zip
Updating the HWIntrinsic codegen to support marking LoadVector128 and LoadAlignedVector128 as contained.
Diffstat (limited to 'src')
-rw-r--r--src/jit/codegenlinear.cpp4
-rw-r--r--src/jit/emitxarch.cpp63
-rw-r--r--src/jit/emitxarch.h5
-rw-r--r--src/jit/hwintrinsiccodegenxarch.cpp29
-rw-r--r--src/jit/hwintrinsiclistxarch.h6
-rw-r--r--src/jit/hwintrinsicxarch.cpp8
-rw-r--r--src/jit/lower.h15
-rw-r--r--src/jit/lowerxarch.cpp105
-rw-r--r--src/jit/lsrabuild.cpp5
-rw-r--r--src/jit/namedintrinsiclist.h3
-rw-r--r--src/jit/rationalize.cpp2
11 files changed, 216 insertions, 29 deletions
diff --git a/src/jit/codegenlinear.cpp b/src/jit/codegenlinear.cpp
index fb8b6b5fe0..eddec19c1f 100644
--- a/src/jit/codegenlinear.cpp
+++ b/src/jit/codegenlinear.cpp
@@ -1297,6 +1297,10 @@ void CodeGen::genConsumeRegs(GenTree* tree)
{
genConsumeReg(tree->gtGetOp1());
}
+ else if (tree->OperIsHWIntrinsic())
+ {
+ genConsumeReg(tree->gtGetOp1());
+ }
else
{
#ifdef FEATURE_SIMD
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
index 99dbce19a8..e697b7bd57 100644
--- a/src/jit/emitxarch.cpp
+++ b/src/jit/emitxarch.cpp
@@ -4009,6 +4009,28 @@ void emitter::emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenT
emitCurIGsize += sz;
}
+void emitter::emitIns_R_AR_I(instruction ins, emitAttr attr, regNumber reg1, regNumber base, int offs, int ival)
+{
+ noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
+ assert(IsSSEOrAVXInstruction(ins));
+
+ instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
+
+ id->idIns(ins);
+ id->idReg1(reg1);
+
+ id->idInsFmt(IF_RRW_ARD_CNS);
+ id->idAddr()->iiaAddrMode.amBaseReg = base;
+ id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+
+ // Plus one for the 1-byte immediate (ival)
+ UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
void emitter::emitIns_R_C_I(
instruction ins, emitAttr attr, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival)
{
@@ -4202,6 +4224,30 @@ void emitter::emitIns_R_R_A_I(
dispIns(id);
emitCurIGsize += sz;
}
+
+void emitter::emitIns_R_R_AR_I(
+ instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs, int ival)
+{
+ assert(IsSSEOrAVXInstruction(ins));
+ assert(IsThreeOperandAVXInstruction(ins));
+
+ instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);
+
+ id->idIns(ins);
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+
+ id->idInsFmt(IF_RWR_RRD_ARD_CNS);
+ id->idAddr()->iiaAddrMode.amBaseReg = base;
+ id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+
+ // Plus one for the 1-byte immediate (ival)
+ UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
#endif // !LEGACY_BACKEND
void emitter::emitIns_R_R_C_I(
@@ -5396,6 +5442,23 @@ void emitter::emitIns_SIMD_R_R_A_I(
}
}
+void emitter::emitIns_SIMD_R_R_AR_I(
+ instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber base, int ival)
+{
+ if (UseVEXEncoding())
+ {
+ emitIns_R_R_AR_I(ins, attr, reg, reg1, base, 0, ival);
+ }
+ else
+ {
+ if (reg1 != reg)
+ {
+ emitIns_R_R(INS_movaps, attr, reg, reg1);
+ }
+ emitIns_R_AR_I(ins, attr, reg, base, 0, ival);
+ }
+}
+
void emitter::emitIns_SIMD_R_R_C_I(
instruction ins, emitAttr attr, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival)
{
diff --git a/src/jit/emitxarch.h b/src/jit/emitxarch.h
index cca099cc72..8542767438 100644
--- a/src/jit/emitxarch.h
+++ b/src/jit/emitxarch.h
@@ -386,6 +386,8 @@ void emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* i
void emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, int ival);
+void emitIns_R_AR_I(instruction ins, emitAttr attr, regNumber reg1, regNumber base, int offs, int ival);
+
void emitIns_R_C_I(instruction ins, emitAttr attr, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival);
void emitIns_R_S_I(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs, int ival);
@@ -405,6 +407,8 @@ void emitIns_R_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg
#ifndef LEGACY_BACKEND
void emitIns_R_R_A_I(
instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, int ival, insFormat fmt);
+void emitIns_R_R_AR_I(
+ instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs, int ival);
#endif // !LEGACY_BACKEND
void emitIns_R_R_C_I(
@@ -475,6 +479,7 @@ void emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg,
#ifdef FEATURE_HW_INTRINSICS
void emitIns_SIMD_R_R_AR(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber base);
void emitIns_SIMD_R_R_A_I(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, GenTreeIndir* indir, int ival);
+void emitIns_SIMD_R_R_AR_I(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber base, int ival);
void emitIns_SIMD_R_R_C_I(
instruction ins, emitAttr attr, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival);
void emitIns_SIMD_R_R_R_I(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber reg2, int ival);
diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp
index e3e703e83d..1aea1f1605 100644
--- a/src/jit/hwintrinsiccodegenxarch.cpp
+++ b/src/jit/hwintrinsiccodegenxarch.cpp
@@ -215,6 +215,9 @@ void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins)
if (op2->isContained() || op2->isUsedFromSpillTemp())
{
+ assert((Compiler::flagsOfHWIntrinsic(node->gtHWIntrinsicId) & HW_Flag_NoContainment) == 0);
+ assert(compiler->m_pLowering->IsContainableHWIntrinsicOp(node, op2) || op2->IsRegOptional());
+
TempDsc* tmpDsc = nullptr;
unsigned varNum = BAD_VAR_NUM;
unsigned offset = (unsigned)-1;
@@ -229,6 +232,11 @@ void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins)
compiler->tmpRlsTemp(tmpDsc);
}
+ else if (op2->OperIsHWIntrinsic())
+ {
+ emit->emitIns_SIMD_R_R_AR(ins, simdSize, targetReg, op1Reg, op2->gtGetOp1()->gtRegNum);
+ return;
+ }
else if (op2->isIndir())
{
GenTreeIndir* memIndir = op2->AsIndir();
@@ -242,7 +250,6 @@ void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins)
offset = 0;
// Ensure that all the GenTreeIndir values are set to their defaults.
- assert(memBase->gtRegNum == REG_NA);
assert(!memIndir->HasIndex());
assert(memIndir->Scale() == 1);
assert(memIndir->Offset() == 0);
@@ -310,6 +317,7 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins)
regNumber targetReg = node->gtRegNum;
GenTree* op1 = node->gtGetOp1();
GenTree* op2 = node->gtGetOp2();
+ emitAttr simdSize = (emitAttr)(node->gtSIMDSize);
int ival = Compiler::ivalOfHWIntrinsic(node->gtHWIntrinsicId);
emitter* emit = getEmitter();
@@ -323,6 +331,9 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins)
if (op2->isContained() || op2->isUsedFromSpillTemp())
{
+ assert((Compiler::flagsOfHWIntrinsic(node->gtHWIntrinsicId) & HW_Flag_NoContainment) == 0);
+ assert(compiler->m_pLowering->IsContainableHWIntrinsicOp(node, op2) || op2->IsRegOptional());
+
TempDsc* tmpDsc = nullptr;
unsigned varNum = BAD_VAR_NUM;
unsigned offset = (unsigned)-1;
@@ -337,6 +348,11 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins)
compiler->tmpRlsTemp(tmpDsc);
}
+ else if (op2->OperIsHWIntrinsic())
+ {
+ emit->emitIns_SIMD_R_R_AR_I(ins, simdSize, targetReg, op1Reg, op2->gtGetOp1()->gtRegNum, ival);
+ return;
+ }
else if (op2->isIndir())
{
GenTreeIndir* memIndir = op2->AsIndir();
@@ -350,7 +366,6 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins)
offset = 0;
// Ensure that all the GenTreeIndir values are set to their defaults.
- assert(memBase->gtRegNum == REG_NA);
assert(!memIndir->HasIndex());
assert(memIndir->Scale() == 1);
assert(memIndir->Offset() == 0);
@@ -360,14 +375,14 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins)
case GT_CLS_VAR_ADDR:
{
- emit->emitIns_SIMD_R_R_C_I(ins, emitTypeSize(targetType), targetReg, op1Reg,
- memBase->gtClsVar.gtClsVarHnd, 0, ival);
+ emit->emitIns_SIMD_R_R_C_I(ins, simdSize, targetReg, op1Reg, memBase->gtClsVar.gtClsVarHnd, 0,
+ ival);
return;
}
default:
{
- emit->emitIns_SIMD_R_R_A_I(ins, emitTypeSize(targetType), targetReg, op1Reg, memIndir, ival);
+ emit->emitIns_SIMD_R_R_A_I(ins, simdSize, targetReg, op1Reg, memIndir, ival);
return;
}
}
@@ -405,11 +420,11 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins)
assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr));
assert(offset != (unsigned)-1);
- emit->emitIns_SIMD_R_R_S_I(ins, emitTypeSize(targetType), targetReg, op1Reg, varNum, offset, ival);
+ emit->emitIns_SIMD_R_R_S_I(ins, simdSize, targetReg, op1Reg, varNum, offset, ival);
}
else
{
- emit->emitIns_SIMD_R_R_R_I(ins, emitTypeSize(targetType), targetReg, op1Reg, op2->gtRegNum, ival);
+ emit->emitIns_SIMD_R_R_R_I(ins, simdSize, targetReg, op1Reg, op2->gtRegNum, ival);
}
}
diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h
index 9237f1a177..18b0bc94bb 100644
--- a/src/jit/hwintrinsiclistxarch.h
+++ b/src/jit/hwintrinsiclistxarch.h
@@ -176,7 +176,7 @@ HARDWARE_INTRINSIC(SSE41_BlendVariable, "BlendVaria
// SSE42 Intrinsics
HARDWARE_INTRINSIC(SSE42_IsSupported, "get_IsSupported", SSE42, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IsSupportedProperty, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE42_Crc32, "Crc32", SSE42, -1, 0, 2, {INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE42_Crc32, "Crc32", SSE42, -1, 0, 2, {INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed)
// AVX Intrinsics
// TODO-XArch When implementing SetZeroVector256 add case to switch table in gentree.cpp
@@ -207,14 +207,14 @@ HARDWARE_INTRINSIC(FMA_IsSupported, "get_IsSupp
// LZCNT Intrinsics
HARDWARE_INTRINSIC(LZCNT_IsSupported, "get_IsSupported", LZCNT, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IsSupportedProperty, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(LZCNT_LeadingZeroCount, "LeadingZeroCount", LZCNT, -1, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lzcnt, INS_invalid, INS_lzcnt, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(LZCNT_LeadingZeroCount, "LeadingZeroCount", LZCNT, -1, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lzcnt, INS_invalid, INS_lzcnt, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed)
// PCLMULQDQ Intrinsics
HARDWARE_INTRINSIC(PCLMULQDQ_IsSupported, "get_IsSupported", PCLMULQDQ, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IsSupportedProperty, HW_Flag_NoFlag)
// POPCNT Intrinsics
HARDWARE_INTRINSIC(POPCNT_IsSupported, "get_IsSupported", POPCNT, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IsSupportedProperty, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(POPCNT_PopCount, "PopCount", POPCNT, -1, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_popcnt, INS_invalid, INS_popcnt, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(POPCNT_PopCount, "PopCount", POPCNT, -1, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_popcnt, INS_invalid, INS_popcnt, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed)
#endif // FEATURE_HW_INTRINSIC
#undef HARDWARE_INTRINSIC
diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp
index 88b0eda793..5f16dd00be 100644
--- a/src/jit/hwintrinsicxarch.cpp
+++ b/src/jit/hwintrinsicxarch.cpp
@@ -420,6 +420,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
int numArgs = sig->numArgs;
var_types retType = JITtype2varType(sig->retType);
var_types baseType = TYP_UNKNOWN;
+
if (retType == TYP_STRUCT && featureSIMD)
{
unsigned int sizeBytes;
@@ -482,6 +483,13 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
}
}
+ if ((flags & HW_Flag_NoFloatingPointUsed) == 0)
+ {
+ // Set `compFloatingPointUsed` to cover the scenario where an intrinsic is being on SIMD fields, but
+ // where no SIMD local vars are in use. This is the same logic as is used for FEATURE_SIMD.
+ compFloatingPointUsed = true;
+ }
+
// table-driven importer of simple intrinsics
if (impIsTableDrivenHWIntrinsic(category, flags))
{
diff --git a/src/jit/lower.h b/src/jit/lower.h
index 2f7d3bd9ef..0d298e0a52 100644
--- a/src/jit/lower.h
+++ b/src/jit/lower.h
@@ -319,11 +319,6 @@ private:
public:
static bool IndirsAreEquivalent(GenTree* pTreeA, GenTree* pTreeB);
-private:
- static bool NodesAreEquivalentLeaves(GenTree* candidate, GenTree* storeInd);
-
- bool AreSourcesPossiblyModifiedLocals(GenTree* addr, GenTree* base, GenTree* index);
-
// return true if 'childNode' is an immediate that can be contained
// by the 'parentNode' (i.e. folded into an instruction)
// for example small enough and non-relocatable
@@ -335,6 +330,16 @@ private:
return m_lsra->isContainableMemoryOp(node);
}
+#ifdef FEATURE_HW_INTRINSICS
+ // Return true if 'node' is a containable HWIntrinsic op.
+ bool IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, GenTree* node);
+#endif // FEATURE_HW_INTRINSICS
+
+private:
+ static bool NodesAreEquivalentLeaves(GenTree* candidate, GenTree* storeInd);
+
+ bool AreSourcesPossiblyModifiedLocals(GenTree* addr, GenTree* base, GenTree* index);
+
// Makes 'childNode' contained in the 'parentNode'
void MakeSrcContained(GenTree* parentNode, GenTree* childNode);
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index d212d86163..559d0c7960 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -2297,6 +2297,65 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode)
#ifdef FEATURE_HW_INTRINSICS
//----------------------------------------------------------------------------------------------
+// IsContainableHWIntrinsicOp: Return true if 'node' is a containable HWIntrinsic op.
+//
+// Arguments:
+// containingNode - The hardware intrinsic node which contains 'node'
+// node - The node to check
+//
+// Return Value:
+// true if 'node' is a containable hardware intrinsic node; otherwise, false.
+//
+bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, GenTree* node)
+{
+ if (!node->OperIsHWIntrinsic())
+ {
+ // non-HWIntrinsic nodes are assumed to be unaligned loads, which are only
+ // supported by the VEX encoding.
+ return comp->canUseVexEncoding() && IsContainableMemoryOp(node);
+ }
+
+ bool isContainable = false;
+
+ // TODO-XArch: Update this to be table driven, if possible.
+
+ NamedIntrinsic containingIntrinsicID = containingNode->gtHWIntrinsicId;
+ HWIntrinsicCategory containingCategory = Compiler::categoryOfHWIntrinsic(containingIntrinsicID);
+ NamedIntrinsic intrinsicID = node->AsHWIntrinsic()->gtHWIntrinsicId;
+
+ switch (intrinsicID)
+ {
+ // Non-VEX encoded instructions require aligned memory ops, so we can fold them.
+ // However, we cannot do the same for the VEX-encoding as it changes an observable
+ // side-effect and may mask an Access Violation that would otherwise occur.
+ case NI_SSE_LoadAlignedVector128:
+ isContainable = (containingCategory == HW_Category_SimpleSIMD) && !comp->canUseVexEncoding();
+ break;
+
+ // Only fold a scalar load into a SIMD scalar intrinsic to ensure the number of bits
+ // read remains the same. Likewise, we can't fold a larger load into a SIMD scalar
+ // intrinsic as that would read fewer bits that requested.
+ case NI_SSE_LoadScalarVector128:
+ isContainable = (containingCategory == HW_Category_SIMDScalar);
+ break;
+
+ // VEX encoding supports unaligned memory ops, so we can fold them
+ case NI_SSE_LoadVector128:
+ isContainable = (containingCategory == HW_Category_SimpleSIMD) && comp->canUseVexEncoding();
+ break;
+
+ default:
+ return false;
+ }
+
+ // For containable nodes, the base type of the original node and the base type of the contained node
+ // should be the same. This helps ensure we aren't reading too many or too few bits.
+ assert(!isContainable || (containingNode->gtSIMDBaseType == node->AsHWIntrinsic()->gtSIMDBaseType));
+
+ return isContainable;
+}
+
+//----------------------------------------------------------------------------------------------
// ContainCheckHWIntrinsic: Perform containment analysis for a hardware intrinsic node.
//
// Arguments:
@@ -2311,25 +2370,45 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
GenTree* op1 = node->gtGetOp1();
GenTree* op2 = node->gtGetOp2();
+ if ((flags & HW_Flag_NoContainment) != 0)
+ {
+ // Exit early if containment isn't supported
+ return;
+ }
+
// TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained
- // TODO-XArch-CQ: Non-VEX encoded instructions require memory ops to be aligned
- if (comp->canUseVexEncoding() && numArgs == 2 && (flags & HW_Flag_NoContainment) == 0 &&
- category == HW_Category_SimpleSIMD)
+ if (numArgs == 2)
{
- if (IsContainableMemoryOp(op2))
+ switch (category)
{
- MakeSrcContained(node, op2);
- }
- else
- {
- // TODO-XArch-CQ: Commutative operations can have op1 be contained
- op2->SetRegOptional();
+ case HW_Category_SimpleSIMD:
+ case HW_Category_SIMDScalar:
+ if (IsContainableHWIntrinsicOp(node, op2))
+ {
+ MakeSrcContained(node, op2);
+ }
+ else if (((flags & HW_Flag_Commutative) != 0) && IsContainableHWIntrinsicOp(node, op1))
+ {
+ MakeSrcContained(node, op1);
+
+ // Swap the operands here to make the containment checks in codegen significantly simpler
+ node->gtOp1 = op2;
+ node->gtOp2 = op1;
+ }
+ else if (comp->canUseVexEncoding())
+ {
+ // We can only mark as reg optional when using the VEX encoding
+ // since that supports unaligned mem operands and non-VEX doesn't
+ op2->SetRegOptional();
+ }
+ break;
+
+ default:
+ break;
}
}
-
- // TODO - change to all IMM intrinsics
- if (intrinsicID == NI_SSE_Shuffle)
+ else if (intrinsicID == NI_SSE_Shuffle) // TODO - change to all IMM intrinsics
{
assert(op1->OperIsList());
GenTree* op3 = op1->AsArgList()->Rest()->Rest()->Current();
diff --git a/src/jit/lsrabuild.cpp b/src/jit/lsrabuild.cpp
index 0d3cccb6b3..f3f019d874 100644
--- a/src/jit/lsrabuild.cpp
+++ b/src/jit/lsrabuild.cpp
@@ -2614,6 +2614,11 @@ int LinearScan::GetOperandInfo(GenTree* node)
const unsigned srcCount = GetIndirInfo(node->AsIndir());
return srcCount;
}
+ if (node->OperIsHWIntrinsic())
+ {
+ appendLocationInfoToList(node->gtGetOp1());
+ return 1;
+ }
return 0;
}
diff --git a/src/jit/namedintrinsiclist.h b/src/jit/namedintrinsiclist.h
index a6edd867b1..3bc85976d1 100644
--- a/src/jit/namedintrinsiclist.h
+++ b/src/jit/namedintrinsiclist.h
@@ -77,6 +77,9 @@ enum HWIntrinsicFlag : unsigned int
// Select base type using argument type
HW_Flag_BaseTypeFromArg = 0x400,
+
+ // Indicates compFloatingPointUsed does not need to be set.
+ HW_Flag_NoFloatingPointUsed = 0x800
};
inline HWIntrinsicFlag operator|(HWIntrinsicFlag c1, HWIntrinsicFlag c2)
diff --git a/src/jit/rationalize.cpp b/src/jit/rationalize.cpp
index 3696f40f7c..b40abd6791 100644
--- a/src/jit/rationalize.cpp
+++ b/src/jit/rationalize.cpp
@@ -848,7 +848,7 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStack<G
else if (!comp->isAddrOfSIMDType(node->AsBlk()->Addr()))
{
GenTree* dataSrc = parent->gtGetOp2();
- if (!dataSrc->IsLocal() && (dataSrc->OperGet() != GT_SIMD))
+ if (!dataSrc->IsLocal() && (dataSrc->OperGet() != GT_SIMD) && (!dataSrc->OperIsHWIntrinsic()))
{
noway_assert(dataSrc->OperIsIndir());
keepBlk = !comp->isAddrOfSIMDType(dataSrc->AsIndir()->Addr());