summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorFei Peng <fei.peng@intel.com>2017-10-24 14:35:18 -0700
committerFei Peng <fei.peng@intel.com>2017-10-25 15:10:06 -0700
commit9dfc3cf5e90a98c204669838b5f9e14baa1d1e3e (patch)
treede768274d3ec8f1d9ce05491c987fd26de6a656e /src
parent15e1472aab7ed42dafde0331ed6cece465a0763d (diff)
downloadcoreclr-9dfc3cf5e90a98c204669838b5f9e14baa1d1e3e.tar.gz
coreclr-9dfc3cf5e90a98c204669838b5f9e14baa1d1e3e.tar.bz2
coreclr-9dfc3cf5e90a98c204669838b5f9e14baa1d1e3e.zip
Decoupling SIMD levels from instruction sets
Diffstat (limited to 'src')
-rw-r--r--src/jit/codegencommon.cpp2
-rw-r--r--src/jit/codegenxarch.cpp2
-rw-r--r--src/jit/compiler.h20
-rw-r--r--src/jit/instr.h16
-rw-r--r--src/jit/lowerxarch.cpp4
-rw-r--r--src/jit/lsraxarch.cpp24
-rw-r--r--src/jit/simd.cpp17
-rw-r--r--src/jit/simd.h37
-rw-r--r--src/jit/simdcodegenxarch.cpp128
9 files changed, 140 insertions, 110 deletions
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
index 64f38ae080..18b42592e8 100644
--- a/src/jit/codegencommon.cpp
+++ b/src/jit/codegencommon.cpp
@@ -11143,7 +11143,7 @@ void CodeGen::genVzeroupperIfNeeded(bool check256bitOnly /* = true*/)
if (emitVzeroUpper)
{
- assert(compiler->getSIMDInstructionSet() == InstructionSet_AVX);
+ assert(compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported);
instGen(INS_vzeroupper);
}
}
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index f1730b9999..778bcfa309 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -5360,7 +5360,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
// when there's preceding 256-bit AVX to legacy SSE transition penalty.
if (call->IsPInvoke() && (call->gtCallType == CT_USER_FUNC) && getEmitter()->Contains256bitAVX())
{
- assert(compiler->getSIMDInstructionSet() == InstructionSet_AVX);
+ assert(compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported);
instGen(INS_vzeroupper);
}
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index 6a64a7b648..629129baef 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -7352,39 +7352,39 @@ private:
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
*/
- // Get highest available instruction set for floating point codegen
- InstructionSet getFloatingPointInstructionSet()
+ // Get highest available level for floating point codegen
+ SIMDLevel getFloatingPointCodegenLevel()
{
#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
if (canUseAVX())
{
- return InstructionSet_AVX;
+ return SIMD_AVX2_Supported;
}
if (CanUseSSE3_4())
{
- return InstructionSet_SSE3_4;
+ return SIMD_SSE4_Supported;
}
// min bar is SSE2
assert(canUseSSE2());
- return InstructionSet_SSE2;
+ return SIMD_SSE2_Supported;
#else
assert(!"getFPInstructionSet() is not implemented for target arch");
unreached();
- return InstructionSet_NONE;
+ return SIMD_Not_Supported;
#endif
}
- // Get highest available instruction set for SIMD codegen
- InstructionSet getSIMDInstructionSet()
+ // Get highest available level for SIMD codegen
+ SIMDLevel getSIMDSupportLevel()
{
#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
- return getFloatingPointInstructionSet();
+ return getFloatingPointCodegenLevel();
#else
assert(!"Available instruction set(s) for SIMD codegen is not defined for target arch");
unreached();
- return InstructionSet_NONE;
+ return SIMD_Not_Supported;
#endif
}
diff --git a/src/jit/instr.h b/src/jit/instr.h
index eac425634c..ec58f61458 100644
--- a/src/jit/instr.h
+++ b/src/jit/instr.h
@@ -277,23 +277,19 @@ enum emitAttr : unsigned
enum InstructionSet
{
#ifdef _TARGET_XARCH_
- // Linear order start
InstructionSet_ILLEGAL = 0,
+ // Start linear order SIMD instruction sets
+ // These ISAs have strictly generation to generation order.
InstructionSet_SSE = 1,
InstructionSet_SSE2 = 2,
InstructionSet_SSE3 = 3,
InstructionSet_SSSE3 = 4,
InstructionSet_SSE41 = 5,
InstructionSet_SSE42 = 6,
- InstructionSet_SSE3_4 = 7, // SSE3, SSSE3, SSE4.1 and SSE4.2 instruction set
- InstructionSet_AVX = 8,
- InstructionSet_AVX2 = 9,
- // Linear order end
- // TODO - Instruction sets have the linear order only in above area.
- // We should no long compare the return value of getSIMDInstructionSet()
- // or getFloatingPointInstructionSet() to the InstructionSet values.
- // Should refactor SIMD code only to be aware of SIMD feature levels
- // (SSE2, SSE3_4, AVX, and AVX2, etc.) rather than concrete ISA.
+ InstructionSet_AVX = 7,
+ InstructionSet_AVX2 = 8,
+ // Reserve values <32 for future SIMD instruction sets (i.e., AVX512),
+ // End linear order SIMD instruction sets.
InstructionSet_AES = 32,
InstructionSet_BMI1 = 33,
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index f509475fe1..65a1ef3661 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -2207,7 +2207,7 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode)
{
MakeSrcContained(simdNode, op1);
}
- else if ((comp->getSIMDInstructionSet() == InstructionSet_AVX) &&
+ else if ((comp->getSIMDSupportLevel() == SIMD_AVX2_Supported) &&
((simdNode->gtSIMDSize == 16) || (simdNode->gtSIMDSize == 32)))
{
// Either op1 is a float or dbl constant or an addr
@@ -2231,7 +2231,7 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode)
// for integral vectors but not for floating-point for the reason
// that we have +0.0 and -0.0 and +0.0 == -0.0
op2 = simdNode->gtGetOp2();
- if ((comp->getSIMDInstructionSet() >= InstructionSet_SSE3_4) && op2->IsIntegralConstVector(0))
+ if ((comp->getSIMDSupportLevel() >= SIMD_SSE4_Supported) && op2->IsIntegralConstVector(0))
{
MakeSrcContained(simdNode, op2);
}
diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp
index 53aebe00bf..9b806dcd99 100644
--- a/src/jit/lsraxarch.cpp
+++ b/src/jit/lsraxarch.cpp
@@ -2108,7 +2108,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
// Must be a Vector<int> or Vector<short> Vector<sbyte>
assert(simdTree->gtSIMDBaseType == TYP_INT || simdTree->gtSIMDBaseType == TYP_SHORT ||
simdTree->gtSIMDBaseType == TYP_BYTE);
- assert(compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4);
+ assert(compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported);
info->srcCount = 1;
break;
@@ -2131,7 +2131,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
// SSE2 32-bit integer multiplication requires two temp regs
if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT &&
- compiler->getSIMDInstructionSet() == InstructionSet_SSE2)
+ compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported)
{
info->internalFloatCount = 2;
info->setInternalCandidates(this, allSIMDRegs());
@@ -2198,7 +2198,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
// and the need for scratch registers.
if (varTypeIsFloating(simdTree->gtSIMDBaseType))
{
- if ((compiler->getSIMDInstructionSet() == InstructionSet_SSE2) ||
+ if ((compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported) ||
(simdTree->gtOp.gtOp1->TypeGet() == TYP_SIMD32))
{
info->internalFloatCount = 1;
@@ -2209,8 +2209,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
}
else
{
- assert(simdTree->gtSIMDBaseType == TYP_INT &&
- compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4);
+ assert(simdTree->gtSIMDBaseType == TYP_INT && compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported);
// No need to set isInternalRegDelayFree since targetReg is a
// an int type reg and guaranteed to be different from xmm/ymm
@@ -2268,7 +2267,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
{
bool needFloatTemp;
if (varTypeIsSmallInt(simdTree->gtSIMDBaseType) &&
- (compiler->getSIMDInstructionSet() == InstructionSet_AVX))
+ (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported))
{
int byteShiftCnt = (int)op2->AsIntCon()->gtIconVal * genTypeSize(simdTree->gtSIMDBaseType);
needFloatTemp = (byteShiftCnt >= 16);
@@ -2295,7 +2294,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
info->srcCount = 2;
// We need an internal integer register for SSE2 codegen
- if (compiler->getSIMDInstructionSet() == InstructionSet_SSE2)
+ if (compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported)
{
info->internalIntCount = 1;
info->setInternalCandidates(this, allRegs(TYP_INT));
@@ -2342,7 +2341,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
info->isInternalRegDelayFree = true;
info->srcCount = 1;
info->internalIntCount = 1;
- if (compiler->getSIMDInstructionSet() == InstructionSet_AVX)
+ if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported)
{
info->internalFloatCount = 2;
}
@@ -2365,8 +2364,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
}
else
#endif
- if ((compiler->getSIMDInstructionSet() == InstructionSet_AVX) ||
- (simdTree->gtSIMDBaseType == TYP_ULONG))
+ if ((compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) || (simdTree->gtSIMDBaseType == TYP_ULONG))
{
info->internalFloatCount = 2;
}
@@ -2381,7 +2379,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
// We need an internal register different from targetReg.
info->isInternalRegDelayFree = true;
info->srcCount = 2;
- if ((compiler->getSIMDInstructionSet() == InstructionSet_AVX) && (simdTree->gtSIMDBaseType != TYP_DOUBLE))
+ if ((compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) && (simdTree->gtSIMDBaseType != TYP_DOUBLE))
{
info->internalFloatCount = 2;
}
@@ -2767,11 +2765,11 @@ void LinearScan::SetContainsAVXFlags(bool isFloatingPointType /* = true */, unsi
{
if (isFloatingPointType)
{
- if (compiler->getFloatingPointInstructionSet() == InstructionSet_AVX)
+ if (compiler->getFloatingPointCodegenLevel() == SIMD_AVX2_Supported)
{
compiler->getEmitter()->SetContainsAVX(true);
}
- if (sizeOfSIMDVector == 32 && compiler->getSIMDInstructionSet() == InstructionSet_AVX)
+ if (sizeOfSIMDVector == 32 && compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported)
{
compiler->getEmitter()->SetContains256bitAVX(true);
}
diff --git a/src/jit/simd.cpp b/src/jit/simd.cpp
index d190cd29f4..b85d97c1ac 100644
--- a/src/jit/simd.cpp
+++ b/src/jit/simd.cpp
@@ -940,7 +940,7 @@ SIMDIntrinsicID Compiler::impSIMDIntegralRelOpGreaterThanOrEqual(
// This routine should be used only for integer base type vectors
assert(varTypeIsIntegral(baseType));
- if ((getSIMDInstructionSet() == InstructionSet_SSE2) && ((baseType == TYP_LONG) || baseType == TYP_UBYTE))
+ if ((getSIMDSupportLevel() == SIMD_SSE2_Supported) && ((baseType == TYP_LONG) || baseType == TYP_UBYTE))
{
return impSIMDLongRelOpGreaterThanOrEqual(typeHnd, size, pOp1, pOp2);
}
@@ -1038,7 +1038,7 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId,
: SIMDIntrinsicGreaterThanOrEqual;
}
- if ((getSIMDInstructionSet() == InstructionSet_SSE2) && baseType == TYP_LONG)
+ if ((getSIMDSupportLevel() == SIMD_SSE2_Supported) && baseType == TYP_LONG)
{
// There is no direct SSE2 support for comparing TYP_LONG vectors.
// These have to be implemented interms of TYP_INT vector comparison operations.
@@ -1187,7 +1187,7 @@ GenTreePtr Compiler::impSIMDAbs(CORINFO_CLASS_HANDLE typeHnd, var_types baseType
// result = ConditionalSelect(BitVector, vector.Zero - v, v)
bool useConditionalSelect = false;
- if (getSIMDInstructionSet() == InstructionSet_SSE2)
+ if (getSIMDSupportLevel() == SIMD_SSE2_Supported)
{
// SSE2 doesn't support abs on signed integer type vectors.
if (baseType == TYP_LONG || baseType == TYP_INT || baseType == TYP_SHORT || baseType == TYP_BYTE)
@@ -1197,7 +1197,7 @@ GenTreePtr Compiler::impSIMDAbs(CORINFO_CLASS_HANDLE typeHnd, var_types baseType
}
else
{
- assert(getSIMDInstructionSet() >= InstructionSet_SSE3_4);
+ assert(getSIMDSupportLevel() >= SIMD_SSE4_Supported);
if (baseType == TYP_LONG)
{
// SSE3_4/AVX2 don't support abs on long type vector.
@@ -1300,7 +1300,7 @@ GenTreePtr Compiler::impSIMDAbs(CORINFO_CLASS_HANDLE typeHnd, var_types baseType
}
else
{
- assert(getSIMDInstructionSet() >= InstructionSet_SSE3_4);
+ assert(getSIMDSupportLevel() >= SIMD_SSE4_Supported);
assert(baseType != TYP_LONG);
retVal = gtNewSIMDNode(simdType, op1, SIMDIntrinsicAbs, baseType, size);
@@ -1428,7 +1428,7 @@ GenTreePtr Compiler::impSIMDMinMax(SIMDIntrinsicID intrinsicId,
// result = result - 2^15 ; readjust it back
if (varTypeIsFloating(baseType) || baseType == TYP_SHORT || baseType == TYP_UBYTE ||
- (getSIMDInstructionSet() >= InstructionSet_SSE3_4 &&
+ (getSIMDSupportLevel() >= SIMD_SSE4_Supported &&
(baseType == TYP_BYTE || baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_CHAR)))
{
// SSE2 or SSE4.1 has direct support
@@ -1436,7 +1436,7 @@ GenTreePtr Compiler::impSIMDMinMax(SIMDIntrinsicID intrinsicId,
}
else if (baseType == TYP_CHAR || baseType == TYP_BYTE)
{
- assert(getSIMDInstructionSet() == InstructionSet_SSE2);
+ assert(getSIMDSupportLevel() == SIMD_SSE2_Supported);
int constVal;
SIMDIntrinsicID operIntrinsic;
SIMDIntrinsicID adjustIntrinsic;
@@ -2568,8 +2568,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
#if defined(_TARGET_XARCH_)
// Right now dot product is supported only for float/double vectors and
// int vectors on SSE4/AVX.
- if (!varTypeIsFloating(baseType) &&
- !(baseType == TYP_INT && getSIMDInstructionSet() >= InstructionSet_SSE3_4))
+ if (!varTypeIsFloating(baseType) && !(baseType == TYP_INT && getSIMDSupportLevel() >= SIMD_SSE4_Supported))
{
return nullptr;
}
diff --git a/src/jit/simd.h b/src/jit/simd.h
index ff522fd52f..c1650482a5 100644
--- a/src/jit/simd.h
+++ b/src/jit/simd.h
@@ -5,6 +5,43 @@
#ifndef _SIMD_H_
#define _SIMD_H_
+// Underlying hardware information
+// This type is used to control
+// 1. The length of System.Numerics.Vector<T>.
+// 2. Codegen of System.Numerics.Vectors.
+// 3. Codegen of floating-point arithmetics (VEX-encoding or not).
+//
+// Note
+// - Hardware SIMD support is classified to the levels. Do not directly use
+// InstructionSet (instr.h) for System.Numerics.Vectors.
+// - Values of SIMDLevel have strictly increasing order that each SIMD level
+// is a superset of the previous levels.
+enum SIMDLevel
+{
+ SIMD_Not_Supported = 0,
+#ifdef _TARGET_XARCH_
+ // SSE2 - The min bar of SIMD ISA on x86/x64.
+ // Vector<T> length is 128-bit.
+ // Floating-point instructions are legacy SSE encoded.
+ SIMD_SSE2_Supported = 1,
+
+ // SSE4 - RyuJIT may generate SSE3, SSSE3, SSE4.1 and SSE4.2 instructions for certain intrinsics.
+ // Vector<T> length is 128-bit.
+ // Floating-point instructions are legacy SSE encoded.
+ SIMD_SSE4_Supported = 2,
+
+ // TODO - AVX - Hardware supports AVX instruction set.
+ // TODO - Vector<T> length is 128-bit and SIMD instructions are VEX-128 encoded.
+ // TODO - Floating-point instructions are VEX-128 encoded.
+ SIMD_AVX_Supported = 3,
+
+ // AVX2 - Hardware has AVX and AVX2 instruction set.
+ // Vector<T> length is 256-bit and SIMD instructions are VEX-256 encoded.
+ // Floating-point instructions are VEX-128 encoded.
+ SIMD_AVX2_Supported = 4,
+#endif
+};
+
#ifdef FEATURE_SIMD
#ifdef DEBUG
diff --git a/src/jit/simdcodegenxarch.cpp b/src/jit/simdcodegenxarch.cpp
index d9ba77589d..4db83b6f8d 100644
--- a/src/jit/simdcodegenxarch.cpp
+++ b/src/jit/simdcodegenxarch.cpp
@@ -205,7 +205,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
{
result = INS_pmullw;
}
- else if ((baseType == TYP_INT) && (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4))
+ else if ((baseType == TYP_INT) && (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported))
{
result = INS_pmulld;
}
@@ -243,7 +243,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
{
result = INS_pminsw;
}
- else if (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4)
+ else if (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported)
{
if (baseType == TYP_BYTE)
{
@@ -285,7 +285,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
{
result = INS_pmaxsw;
}
- else if (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4)
+ else if (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported)
{
if (baseType == TYP_BYTE)
{
@@ -311,7 +311,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
break;
case SIMDIntrinsicAbs:
- if (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4)
+ if (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported)
{
if (baseType == TYP_INT)
{
@@ -354,7 +354,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
result = INS_pcmpeqb;
}
else if ((baseType == TYP_ULONG || baseType == TYP_LONG) &&
- (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4))
+ (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported))
{
result = INS_pcmpeqq;
}
@@ -413,7 +413,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
{
result = INS_pcmpgtb;
}
- else if ((baseType == TYP_LONG) && (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4))
+ else if ((baseType == TYP_LONG) && (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported))
{
result = INS_pcmpgtq;
}
@@ -515,7 +515,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
{
case TYP_INT:
case TYP_UINT:
- if (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4)
+ if (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported)
{
result = INS_packusdw;
}
@@ -671,7 +671,7 @@ void CodeGen::genSIMDScalarMove(
var_types targetType, var_types baseType, regNumber targetReg, regNumber srcReg, SIMDScalarMoveType moveType)
{
assert(varTypeIsFloating(baseType));
- if (compiler->getSIMDInstructionSet() == InstructionSet_AVX)
+ if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported)
{
switch (moveType)
{
@@ -784,9 +784,9 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
var_types baseType = simdNode->gtSIMDBaseType;
regNumber targetReg = simdNode->gtRegNum;
assert(targetReg != REG_NA);
- var_types targetType = simdNode->TypeGet();
- InstructionSet iset = compiler->getSIMDInstructionSet();
- unsigned size = simdNode->gtSIMDSize;
+ var_types targetType = simdNode->TypeGet();
+ SIMDLevel level = compiler->getSIMDSupportLevel();
+ unsigned size = simdNode->gtSIMDSize;
// Should never see small int base type vectors except for zero initialization.
noway_assert(!varTypeIsSmallInt(baseType) || op1->IsIntegralConst(0));
@@ -869,7 +869,7 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
}
else
{
- assert(iset == InstructionSet_AVX);
+ assert(level == SIMD_AVX2_Supported);
ins = getOpForSIMDIntrinsic(SIMDIntrinsicInit, baseType);
if (op1->IsCnsFltOrDbl())
{
@@ -887,7 +887,7 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
}
}
}
- else if (iset == InstructionSet_AVX && ((size == 32) || (size == 16)))
+ else if (level == SIMD_AVX2_Supported && ((size == 32) || (size == 16)))
{
regNumber srcReg = genConsumeReg(op1);
if (baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG)
@@ -1139,7 +1139,7 @@ void CodeGen::genSIMDIntrinsic32BitConvert(GenTreeSIMD* simdNode)
getEmitter()->emitIns_R_I(INS_mov, EA_8BYTE, tmpIntReg, (ssize_t)0X5300000053000000);
inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_ULONG);
#else
- if (compiler->getSIMDInstructionSet() == InstructionSet_AVX)
+ if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported)
{
getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, tmpIntReg, (ssize_t)0X53000000);
inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT);
@@ -1152,7 +1152,7 @@ void CodeGen::genSIMDIntrinsic32BitConvert(GenTreeSIMD* simdNode)
getEmitter()->emitIns_R_R_I(INS_pinsrw, emitTypeSize(TYP_INT), tmpReg, tmpIntReg, 3);
}
#endif
- if (compiler->getSIMDInstructionSet() == InstructionSet_AVX)
+ if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported)
{
inst_RV_RV(INS_vpbroadcastd, tmpReg, tmpReg, targetType, emitActualTypeSize(targetType));
}
@@ -1233,13 +1233,13 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode)
var_types baseType = simdNode->gtSIMDBaseType;
regNumber targetReg = simdNode->gtRegNum;
assert(targetReg != REG_NA);
- var_types simdType = simdNode->TypeGet();
- regNumber op1Reg = genConsumeReg(op1);
- regNumber tmpIntReg = simdNode->GetSingleTempReg(RBM_ALLINT);
- regNumber tmpReg;
- regNumber tmpReg2;
- regNumber tmpReg3;
- InstructionSet iset = compiler->getSIMDInstructionSet();
+ var_types simdType = simdNode->TypeGet();
+ regNumber op1Reg = genConsumeReg(op1);
+ regNumber tmpIntReg = simdNode->GetSingleTempReg(RBM_ALLINT);
+ regNumber tmpReg;
+ regNumber tmpReg2;
+ regNumber tmpReg3;
+ SIMDLevel level = compiler->getSIMDSupportLevel();
#ifdef _TARGET_X86_
if (baseType == TYP_LONG)
@@ -1251,7 +1251,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode)
}
else
#endif
- if (iset == InstructionSet_AVX || (baseType == TYP_ULONG))
+ if (level == SIMD_AVX2_Supported || (baseType == TYP_ULONG))
{
tmpReg = simdNode->ExtractTempReg(RBM_ALLFLOAT);
tmpReg2 = simdNode->GetSingleTempReg(RBM_ALLFLOAT);
@@ -1307,7 +1307,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode)
inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT);
getEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4);
#endif
- if (iset == InstructionSet_AVX)
+ if (level == SIMD_AVX2_Supported)
{
inst_RV_RV(INS_vpbroadcastq, tmpReg, tmpReg, simdType, emitActualTypeSize(simdType));
}
@@ -1329,7 +1329,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode)
inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT);
getEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4);
#endif
- if (iset == InstructionSet_AVX)
+ if (level == SIMD_AVX2_Supported)
{
inst_RV_RV(INS_vpbroadcastq, tmpReg, tmpReg, simdType, emitActualTypeSize(simdType));
}
@@ -1351,7 +1351,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode)
instruction rightShiftIns = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, TYP_SIMD16);
instruction leftShiftIns = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, TYP_SIMD16);
- if (iset == InstructionSet_AVX)
+ if (level == SIMD_AVX2_Supported)
{
// Extract the high 16-bits
getEmitter()->emitIns_R_R_I(INS_vextracti128, EA_32BYTE, tmpReg, op1Reg, 0x01);
@@ -1389,7 +1389,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode)
inst_RV_RV(INS_movaps, targetReg, tmpReg, simdType, emitActualTypeSize(simdType));
}
- if (iset == InstructionSet_AVX)
+ if (level == SIMD_AVX2_Supported)
{
getEmitter()->emitIns_R_R_I(INS_vinsertf128, EA_32BYTE, targetReg, tmpReg2, 0x01);
}
@@ -1419,7 +1419,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode)
inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT);
getEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4);
- if (iset == InstructionSet_AVX)
+ if (level == SIMD_AVX2_Supported)
{
inst_RV_RV(INS_vpbroadcastq, tmpReg, tmpReg, simdType, emitActualTypeSize(simdType));
}
@@ -1437,7 +1437,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode)
inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT);
getEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4);
- if (iset == InstructionSet_AVX)
+ if (level == SIMD_AVX2_Supported)
{
inst_RV_RV(INS_vpbroadcastq, tmpReg, tmpReg, simdType, emitActualTypeSize(simdType));
}
@@ -1462,7 +1462,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode)
instruction rightShiftIns = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, TYP_SIMD16);
instruction leftShiftIns = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, TYP_SIMD16);
- if (iset == InstructionSet_AVX)
+ if (level == SIMD_AVX2_Supported)
{
// Extract the high 16-bits
getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, tmpReg, op1Reg, 0x01);
@@ -1498,7 +1498,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode)
// Merge or copy the results (only at this point are we done with op1Reg).
assert(tmpReg != targetReg);
inst_RV_RV(INS_por, targetReg, tmpReg, simdType, emitActualTypeSize(simdType));
- if (iset == InstructionSet_AVX)
+ if (level == SIMD_AVX2_Supported)
{
getEmitter()->emitIns_R_R_I(INS_vinserti128, EA_32BYTE, targetReg, tmpReg2, 0x01);
}
@@ -1520,7 +1520,7 @@ void CodeGen::genSIMDExtractUpperHalf(GenTreeSIMD* simdNode, regNumber srcReg, r
{
var_types simdType = simdNode->TypeGet();
emitAttr emitSize = emitActualTypeSize(simdType);
- if (compiler->getSIMDInstructionSet() == InstructionSet_AVX)
+ if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported)
{
instruction extractIns = varTypeIsFloating(simdNode->gtSIMDBaseType) ? INS_vextractf128 : INS_vextracti128;
getEmitter()->emitIns_R_R_I(extractIns, EA_32BYTE, tgtReg, srcReg, 0x01);
@@ -1554,8 +1554,8 @@ void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode)
var_types baseType = simdNode->gtSIMDBaseType;
regNumber targetReg = simdNode->gtRegNum;
assert(targetReg != REG_NA);
- var_types simdType = simdNode->TypeGet();
- InstructionSet iset = compiler->getSIMDInstructionSet();
+ var_types simdType = simdNode->TypeGet();
+ SIMDLevel level = compiler->getSIMDSupportLevel();
genConsumeOperands(simdNode);
regNumber op1Reg = op1->gtRegNum;
@@ -1582,7 +1582,7 @@ void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode)
regNumber tmpReg = simdNode->GetSingleTempReg(RBM_ALLFLOAT);
assert(tmpReg != op1Reg);
- if (iset == InstructionSet_AVX)
+ if (level == SIMD_AVX2_Supported)
{
// permute op1Reg and put it into targetReg
unsigned ival = 0xd4;
@@ -1627,9 +1627,9 @@ void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode)
var_types baseType = simdNode->gtSIMDBaseType;
regNumber targetReg = simdNode->gtRegNum;
assert(targetReg != REG_NA);
- var_types simdType = simdNode->TypeGet();
- emitAttr emitSize = emitTypeSize(simdType);
- InstructionSet iset = compiler->getSIMDInstructionSet();
+ var_types simdType = simdNode->TypeGet();
+ emitAttr emitSize = emitTypeSize(simdType);
+ SIMDLevel level = compiler->getSIMDSupportLevel();
genConsumeOperands(simdNode);
regNumber op1Reg = op1->gtRegNum;
@@ -1652,7 +1652,7 @@ void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode)
}
else if (varTypeIsLong(baseType))
{
- if (iset == InstructionSet_AVX)
+ if (level == SIMD_AVX2_Supported)
{
// We have 8 long elements, 0-3 in op1Reg, 4-7 in op2Reg.
// We will generate the following:
@@ -1714,7 +1714,7 @@ void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode)
instruction shiftLeftIns = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, baseType);
instruction shiftRightIns = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, baseType);
- if (iset == InstructionSet_AVX)
+ if (level == SIMD_AVX2_Supported)
{
regNumber tmpReg = simdNode->ExtractTempReg(RBM_ALLFLOAT);
regNumber tmpReg2 = simdNode->GetSingleTempReg(RBM_ALLFLOAT);
@@ -1738,7 +1738,7 @@ void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode)
inst_RV_RV(ins_Copy(simdType), tmpReg, op2Reg, simdType, emitSize);
instruction tmpShiftRight = shiftRightIns;
- if ((baseType == TYP_INT || baseType == TYP_UINT) && iset == InstructionSet_SSE2)
+ if ((baseType == TYP_INT || baseType == TYP_UINT) && level == SIMD_SSE2_Supported)
{
tmpShiftRight = INS_psrad;
}
@@ -1778,8 +1778,8 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
var_types baseType = simdNode->gtSIMDBaseType;
regNumber targetReg = simdNode->gtRegNum;
assert(targetReg != REG_NA);
- var_types targetType = simdNode->TypeGet();
- InstructionSet iset = compiler->getSIMDInstructionSet();
+ var_types targetType = simdNode->TypeGet();
+ SIMDLevel level = compiler->getSIMDSupportLevel();
genConsumeOperands(simdNode);
regNumber op1Reg = op1->gtRegNum;
@@ -1790,7 +1790,7 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
// SSE2 doesn't have an instruction to perform this operation directly
// whereas SSE4.1 does (pmulld). This is special cased and computed
// as follows.
- if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul && baseType == TYP_INT && iset == InstructionSet_SSE2)
+ if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul && baseType == TYP_INT && level == SIMD_SSE2_Supported)
{
// We need a temporary register that is NOT the same as the target,
// and we MAY need another.
@@ -1952,12 +1952,12 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
//
void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
{
- GenTree* op1 = simdNode->gtGetOp1();
- GenTree* op2 = simdNode->gtGetOp2();
- var_types baseType = simdNode->gtSIMDBaseType;
- regNumber targetReg = simdNode->gtRegNum;
- var_types targetType = simdNode->TypeGet();
- InstructionSet iset = compiler->getSIMDInstructionSet();
+ GenTree* op1 = simdNode->gtGetOp1();
+ GenTree* op2 = simdNode->gtGetOp2();
+ var_types baseType = simdNode->gtSIMDBaseType;
+ regNumber targetReg = simdNode->gtRegNum;
+ var_types targetType = simdNode->TypeGet();
+ SIMDLevel level = compiler->getSIMDSupportLevel();
genConsumeOperands(simdNode);
regNumber op1Reg = op1->gtRegNum;
@@ -1976,7 +1976,7 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
// TYP_INT comparison operations
if (baseType == TYP_LONG || baseType == TYP_ULONG)
{
- assert(iset >= InstructionSet_SSE3_4);
+ assert(level >= SIMD_SSE4_Supported);
}
#endif
@@ -2063,7 +2063,7 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
// On SSE4/AVX, we can generate optimal code for (in)equality against zero using ptest.
if (op2->isContained())
{
- assert((compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4) && op2->IsIntegralConstVector(0));
+ assert((compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported) && op2->IsIntegralConstVector(0));
inst_RV_RV(INS_ptest, op1->gtRegNum, op1->gtRegNum, simdType, emitActualTypeSize(simdType));
}
else
@@ -2175,7 +2175,7 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
regNumber tmpReg1 = REG_NA;
regNumber tmpReg2 = REG_NA;
- InstructionSet iset = compiler->getSIMDInstructionSet();
+ SIMDLevel level = compiler->getSIMDSupportLevel();
// Dot product intrinsic is supported only on float/double vectors
// and 32-byte int vectors on AVX.
@@ -2192,7 +2192,7 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
// different from targetReg as scratch.
if (varTypeIsFloating(baseType))
{
- if ((compiler->getSIMDInstructionSet() == InstructionSet_SSE2) || (simdEvalType == TYP_SIMD32))
+ if ((compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported) || (simdEvalType == TYP_SIMD32))
{
tmpReg1 = simdNode->GetSingleTempReg();
assert(tmpReg1 != targetReg);
@@ -2205,9 +2205,9 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
else
{
assert(baseType == TYP_INT);
- assert(iset >= InstructionSet_SSE3_4);
+ assert(level >= SIMD_SSE4_Supported);
- if (iset == InstructionSet_SSE3_4)
+ if (level == SIMD_SSE4_Supported)
{
tmpReg1 = simdNode->GetSingleTempReg();
}
@@ -2218,7 +2218,7 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
}
}
- if (iset == InstructionSet_SSE2)
+ if (level == SIMD_SSE2_Supported)
{
// We avoid reg move if either op1Reg == targetReg or op2Reg == targetReg
if (op1Reg == targetReg)
@@ -2294,7 +2294,7 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
}
else
{
- assert(iset >= InstructionSet_SSE3_4);
+ assert(level >= SIMD_SSE4_Supported);
if (varTypeIsFloating(baseType))
{
@@ -2349,7 +2349,7 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
{
// On AVX, we have no 16-byte vectors of double. Note that, if we did, we could use
// dppd directly.
- assert(iset == InstructionSet_SSE3_4);
+ assert(level == SIMD_SSE4_Supported);
inst_RV_RV_IV(INS_dppd, emitSize, targetReg, op2Reg, 0x31);
}
}
@@ -2375,7 +2375,7 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
#endif
// tmpReg1 = op1 * op2
- if (iset == InstructionSet_AVX)
+ if (level == SIMD_AVX2_Supported)
{
// On AVX take advantage 3 operand form of pmulld
inst_RV_RV_RV(INS_pmulld, tmpReg1, op1Reg, op2Reg, emitTypeSize(simdEvalType));
@@ -2572,7 +2572,7 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
if (byteShiftCnt >= 16)
{
- assert(compiler->getSIMDInstructionSet() == InstructionSet_AVX);
+ assert(compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported);
byteShiftCnt -= 16;
regNumber newSrcReg;
if (varTypeIsFloating(baseType))
@@ -2632,7 +2632,7 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
// low 3 bits of index, but it's better to use the right value.
if (index > 8)
{
- assert(compiler->getSIMDInstructionSet() == InstructionSet_AVX);
+ assert(compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported);
index -= 8;
}
@@ -2764,7 +2764,7 @@ void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode)
// logic needs modification.
noway_assert(baseType == TYP_FLOAT);
- if (compiler->getSIMDInstructionSet() == InstructionSet_SSE2)
+ if (compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported)
{
// We need one additional int register as scratch
regNumber tmpReg = simdNode->GetSingleTempReg();
@@ -2804,7 +2804,7 @@ void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode)
void CodeGen::genSIMDIntrinsicShuffleSSE2(GenTreeSIMD* simdNode)
{
assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicShuffleSSE2);
- noway_assert(compiler->getSIMDInstructionSet() == InstructionSet_SSE2);
+ noway_assert(compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported);
GenTree* op1 = simdNode->gtGetOp1();
GenTree* op2 = simdNode->gtGetOp2();