Decoupling SIMD levels from instruction sets

author: Fei Peng <fei.peng@intel.com> 2017-10-24 14:35:18 -0700
committer: Fei Peng <fei.peng@intel.com> 2017-10-25 15:10:06 -0700
commit: 9dfc3cf5e90a98c204669838b5f9e14baa1d1e3e (patch)
tree: de768274d3ec8f1d9ce05491c987fd26de6a656e /src
parent: 15e1472aab7ed42dafde0331ed6cece465a0763d (diff)
download: coreclr-9dfc3cf5e90a98c204669838b5f9e14baa1d1e3e.tar.gz
coreclr-9dfc3cf5e90a98c204669838b5f9e14baa1d1e3e.tar.bz2
coreclr-9dfc3cf5e90a98c204669838b5f9e14baa1d1e3e.zip
9 files changed, 140 insertions, 110 deletions
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
index 64f38ae080..18b42592e8 100644
--- a/src/jit/codegencommon.cpp
+++ b/src/jit/codegencommon.cpp
@@ -11143,7 +11143,7 @@ void CodeGen::genVzeroupperIfNeeded(bool check256bitOnly /* = true*/)
 
     if (emitVzeroUpper)
     {
-        assert(compiler->getSIMDInstructionSet() == InstructionSet_AVX);
+        assert(compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported);
         instGen(INS_vzeroupper);
     }
 }
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index f1730b9999..778bcfa309 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -5360,7 +5360,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
     // when there's preceding 256-bit AVX to legacy SSE transition penalty.
     if (call->IsPInvoke() && (call->gtCallType == CT_USER_FUNC) && getEmitter()->Contains256bitAVX())
     {
-        assert(compiler->getSIMDInstructionSet() == InstructionSet_AVX);
+        assert(compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported);
         instGen(INS_vzeroupper);
     }
 
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index 6a64a7b648..629129baef 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -7352,39 +7352,39 @@ private:
     XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     */
 
-    // Get highest available instruction set for floating point codegen
-    InstructionSet getFloatingPointInstructionSet()
+    // Get highest available level for floating point codegen
+    SIMDLevel getFloatingPointCodegenLevel()
     {
 #if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
         if (canUseAVX())
         {
-            return InstructionSet_AVX;
+            return SIMD_AVX2_Supported;
         }
 
         if (CanUseSSE3_4())
         {
-            return InstructionSet_SSE3_4;
+            return SIMD_SSE4_Supported;
         }
 
         // min bar is SSE2
         assert(canUseSSE2());
-        return InstructionSet_SSE2;
+        return SIMD_SSE2_Supported;
 #else
         assert(!"getFPInstructionSet() is not implemented for target arch");
         unreached();
-        return InstructionSet_NONE;
+        return SIMD_Not_Supported;
 #endif
     }
 
-    // Get highest available instruction set for SIMD codegen
-    InstructionSet getSIMDInstructionSet()
+    // Get highest available level for SIMD codegen
+    SIMDLevel getSIMDSupportLevel()
     {
 #if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
-        return getFloatingPointInstructionSet();
+        return getFloatingPointCodegenLevel();
 #else
         assert(!"Available instruction set(s) for SIMD codegen is not defined for target arch");
         unreached();
-        return InstructionSet_NONE;
+        return SIMD_Not_Supported;
 #endif
     }
 
diff --git a/src/jit/instr.h b/src/jit/instr.h
index eac425634c..ec58f61458 100644
--- a/src/jit/instr.h
+++ b/src/jit/instr.h
@@ -277,23 +277,19 @@ enum emitAttr : unsigned
 enum InstructionSet
 {
 #ifdef _TARGET_XARCH_
-    // Linear order start
     InstructionSet_ILLEGAL = 0,
+    // Start linear order SIMD instruction sets
+    // These ISAs have strictly generation to generation order.
     InstructionSet_SSE     = 1,
     InstructionSet_SSE2    = 2,
     InstructionSet_SSE3    = 3,
     InstructionSet_SSSE3   = 4,
     InstructionSet_SSE41   = 5,
     InstructionSet_SSE42   = 6,
-    InstructionSet_SSE3_4  = 7,    // SSE3, SSSE3, SSE4.1 and SSE4.2 instruction set
-    InstructionSet_AVX     = 8,
-    InstructionSet_AVX2    = 9,
-    // Linear order end
-    // TODO - Instruction sets have the linear order only in above area.
-    // We should no long compare the return value of getSIMDInstructionSet()
-    // or getFloatingPointInstructionSet() to the InstructionSet values.
-    // Should refactor SIMD code only to be aware of SIMD feature levels 
-    // (SSE2, SSE3_4, AVX, and AVX2, etc.) rather than concrete ISA.
+    InstructionSet_AVX     = 7,
+    InstructionSet_AVX2    = 8,
+    // Reserve values <32 for future SIMD instruction sets (i.e., AVX512),
+    // End linear order SIMD instruction sets.
     
     InstructionSet_AES     = 32,
     InstructionSet_BMI1    = 33,
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index f509475fe1..65a1ef3661 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -2207,7 +2207,7 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode)
             {
                 MakeSrcContained(simdNode, op1);
             }
-            else if ((comp->getSIMDInstructionSet() == InstructionSet_AVX) &&
+            else if ((comp->getSIMDSupportLevel() == SIMD_AVX2_Supported) &&
                      ((simdNode->gtSIMDSize == 16) || (simdNode->gtSIMDSize == 32)))
             {
                 // Either op1 is a float or dbl constant or an addr
@@ -2231,7 +2231,7 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode)
             // for integral vectors but not for floating-point for the reason
             // that we have +0.0 and -0.0 and +0.0 == -0.0
             op2 = simdNode->gtGetOp2();
-            if ((comp->getSIMDInstructionSet() >= InstructionSet_SSE3_4) && op2->IsIntegralConstVector(0))
+            if ((comp->getSIMDSupportLevel() >= SIMD_SSE4_Supported) && op2->IsIntegralConstVector(0))
             {
                 MakeSrcContained(simdNode, op2);
             }
diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp
index 53aebe00bf..9b806dcd99 100644
--- a/src/jit/lsraxarch.cpp
+++ b/src/jit/lsraxarch.cpp
@@ -2108,7 +2108,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
             // Must be a Vector<int> or Vector<short> Vector<sbyte>
             assert(simdTree->gtSIMDBaseType == TYP_INT || simdTree->gtSIMDBaseType == TYP_SHORT ||
                    simdTree->gtSIMDBaseType == TYP_BYTE);
-            assert(compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4);
+            assert(compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported);
             info->srcCount = 1;
             break;
 
@@ -2131,7 +2131,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
 
             // SSE2 32-bit integer multiplication requires two temp regs
             if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT &&
-                compiler->getSIMDInstructionSet() == InstructionSet_SSE2)
+                compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported)
             {
                 info->internalFloatCount = 2;
                 info->setInternalCandidates(this, allSIMDRegs());
@@ -2198,7 +2198,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
             // and the need for scratch registers.
             if (varTypeIsFloating(simdTree->gtSIMDBaseType))
             {
-                if ((compiler->getSIMDInstructionSet() == InstructionSet_SSE2) ||
+                if ((compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported) ||
                     (simdTree->gtOp.gtOp1->TypeGet() == TYP_SIMD32))
                 {
                     info->internalFloatCount     = 1;
@@ -2209,8 +2209,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
             }
             else
             {
-                assert(simdTree->gtSIMDBaseType == TYP_INT &&
-                       compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4);
+                assert(simdTree->gtSIMDBaseType == TYP_INT && compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported);
 
                 // No need to set isInternalRegDelayFree since targetReg is a
                 // an int type reg and guaranteed to be different from xmm/ymm
@@ -2268,7 +2267,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
                 {
                     bool needFloatTemp;
                     if (varTypeIsSmallInt(simdTree->gtSIMDBaseType) &&
-                        (compiler->getSIMDInstructionSet() == InstructionSet_AVX))
+                        (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported))
                     {
                         int byteShiftCnt = (int)op2->AsIntCon()->gtIconVal * genTypeSize(simdTree->gtSIMDBaseType);
                         needFloatTemp    = (byteShiftCnt >= 16);
@@ -2295,7 +2294,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
             info->srcCount = 2;
 
             // We need an internal integer register for SSE2 codegen
-            if (compiler->getSIMDInstructionSet() == InstructionSet_SSE2)
+            if (compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported)
             {
                 info->internalIntCount = 1;
                 info->setInternalCandidates(this, allRegs(TYP_INT));
@@ -2342,7 +2341,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
             info->isInternalRegDelayFree = true;
             info->srcCount               = 1;
             info->internalIntCount       = 1;
-            if (compiler->getSIMDInstructionSet() == InstructionSet_AVX)
+            if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported)
             {
                 info->internalFloatCount = 2;
             }
@@ -2365,8 +2364,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
             }
             else
 #endif
-                if ((compiler->getSIMDInstructionSet() == InstructionSet_AVX) ||
-                    (simdTree->gtSIMDBaseType == TYP_ULONG))
+                if ((compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) || (simdTree->gtSIMDBaseType == TYP_ULONG))
             {
                 info->internalFloatCount = 2;
             }
@@ -2381,7 +2379,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
             // We need an internal register different from targetReg.
             info->isInternalRegDelayFree = true;
             info->srcCount               = 2;
-            if ((compiler->getSIMDInstructionSet() == InstructionSet_AVX) && (simdTree->gtSIMDBaseType != TYP_DOUBLE))
+            if ((compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) && (simdTree->gtSIMDBaseType != TYP_DOUBLE))
             {
                 info->internalFloatCount = 2;
             }
@@ -2767,11 +2765,11 @@ void LinearScan::SetContainsAVXFlags(bool isFloatingPointType /* = true */, unsi
 {
     if (isFloatingPointType)
     {
-        if (compiler->getFloatingPointInstructionSet() == InstructionSet_AVX)
+        if (compiler->getFloatingPointCodegenLevel() == SIMD_AVX2_Supported)
         {
             compiler->getEmitter()->SetContainsAVX(true);
         }
-        if (sizeOfSIMDVector == 32 && compiler->getSIMDInstructionSet() == InstructionSet_AVX)
+        if (sizeOfSIMDVector == 32 && compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported)
         {
             compiler->getEmitter()->SetContains256bitAVX(true);
         }
diff --git a/src/jit/simd.cpp b/src/jit/simd.cpp
index d190cd29f4..b85d97c1ac 100644
--- a/src/jit/simd.cpp
+++ b/src/jit/simd.cpp
@@ -940,7 +940,7 @@ SIMDIntrinsicID Compiler::impSIMDIntegralRelOpGreaterThanOrEqual(
 
     // This routine should be used only for integer base type vectors
     assert(varTypeIsIntegral(baseType));
-    if ((getSIMDInstructionSet() == InstructionSet_SSE2) && ((baseType == TYP_LONG) || baseType == TYP_UBYTE))
+    if ((getSIMDSupportLevel() == SIMD_SSE2_Supported) && ((baseType == TYP_LONG) || baseType == TYP_UBYTE))
     {
         return impSIMDLongRelOpGreaterThanOrEqual(typeHnd, size, pOp1, pOp2);
     }
@@ -1038,7 +1038,7 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID      relOpIntrinsicId,
                                                                       : SIMDIntrinsicGreaterThanOrEqual;
         }
 
-        if ((getSIMDInstructionSet() == InstructionSet_SSE2) && baseType == TYP_LONG)
+        if ((getSIMDSupportLevel() == SIMD_SSE2_Supported) && baseType == TYP_LONG)
         {
             // There is no direct SSE2 support for comparing TYP_LONG vectors.
             // These have to be implemented interms of TYP_INT vector comparison operations.
@@ -1187,7 +1187,7 @@ GenTreePtr Compiler::impSIMDAbs(CORINFO_CLASS_HANDLE typeHnd, var_types baseType
     //     result = ConditionalSelect(BitVector, vector.Zero - v, v)
 
     bool useConditionalSelect = false;
-    if (getSIMDInstructionSet() == InstructionSet_SSE2)
+    if (getSIMDSupportLevel() == SIMD_SSE2_Supported)
     {
         // SSE2 doesn't support abs on signed integer type vectors.
         if (baseType == TYP_LONG || baseType == TYP_INT || baseType == TYP_SHORT || baseType == TYP_BYTE)
@@ -1197,7 +1197,7 @@ GenTreePtr Compiler::impSIMDAbs(CORINFO_CLASS_HANDLE typeHnd, var_types baseType
     }
     else
     {
-        assert(getSIMDInstructionSet() >= InstructionSet_SSE3_4);
+        assert(getSIMDSupportLevel() >= SIMD_SSE4_Supported);
         if (baseType == TYP_LONG)
         {
             // SSE3_4/AVX2 don't support abs on long type vector.
@@ -1300,7 +1300,7 @@ GenTreePtr Compiler::impSIMDAbs(CORINFO_CLASS_HANDLE typeHnd, var_types baseType
     }
     else
     {
-        assert(getSIMDInstructionSet() >= InstructionSet_SSE3_4);
+        assert(getSIMDSupportLevel() >= SIMD_SSE4_Supported);
         assert(baseType != TYP_LONG);
 
         retVal = gtNewSIMDNode(simdType, op1, SIMDIntrinsicAbs, baseType, size);
@@ -1428,7 +1428,7 @@ GenTreePtr Compiler::impSIMDMinMax(SIMDIntrinsicID      intrinsicId,
     //        result = result - 2^15 ; readjust it back
 
     if (varTypeIsFloating(baseType) || baseType == TYP_SHORT || baseType == TYP_UBYTE ||
-        (getSIMDInstructionSet() >= InstructionSet_SSE3_4 &&
+        (getSIMDSupportLevel() >= SIMD_SSE4_Supported &&
          (baseType == TYP_BYTE || baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_CHAR)))
     {
         // SSE2 or SSE4.1 has direct support
@@ -1436,7 +1436,7 @@ GenTreePtr Compiler::impSIMDMinMax(SIMDIntrinsicID      intrinsicId,
     }
     else if (baseType == TYP_CHAR || baseType == TYP_BYTE)
     {
-        assert(getSIMDInstructionSet() == InstructionSet_SSE2);
+        assert(getSIMDSupportLevel() == SIMD_SSE2_Supported);
         int             constVal;
         SIMDIntrinsicID operIntrinsic;
         SIMDIntrinsicID adjustIntrinsic;
@@ -2568,8 +2568,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE                opcode,
 #if defined(_TARGET_XARCH_)
             // Right now dot product is supported only for float/double vectors and
             // int vectors on SSE4/AVX.
-            if (!varTypeIsFloating(baseType) &&
-                !(baseType == TYP_INT && getSIMDInstructionSet() >= InstructionSet_SSE3_4))
+            if (!varTypeIsFloating(baseType) && !(baseType == TYP_INT && getSIMDSupportLevel() >= SIMD_SSE4_Supported))
             {
                 return nullptr;
             }
diff --git a/src/jit/simd.h b/src/jit/simd.h
index ff522fd52f..c1650482a5 100644
--- a/src/jit/simd.h
+++ b/src/jit/simd.h
@@ -5,6 +5,43 @@
 #ifndef _SIMD_H_
 #define _SIMD_H_
 
+// Underlying hardware information
+// This type is used to control
+// 1. The length of System.Numerics.Vector<T>.
+// 2. Codegen of System.Numerics.Vectors.
+// 3. Codegen of floating-point arithmetics (VEX-encoding or not).
+//
+// Note
+// - Hardware SIMD support is classified to the levels. Do not directly use
+//   InstructionSet (instr.h) for System.Numerics.Vectors.
+// - Values of SIMDLevel have strictly increasing order that each SIMD level
+//   is a superset of the previous levels.
+enum SIMDLevel
+{
+    SIMD_Not_Supported = 0,
+#ifdef _TARGET_XARCH_
+    // SSE2 - The min bar of SIMD ISA on x86/x64.
+    // Vector<T> length is 128-bit.
+    // Floating-point instructions are legacy SSE encoded.
+    SIMD_SSE2_Supported = 1,
+
+    // SSE4 - RyuJIT may generate SSE3, SSSE3, SSE4.1 and SSE4.2 instructions for certain intrinsics.
+    // Vector<T> length is 128-bit.
+    // Floating-point instructions are legacy SSE encoded.
+    SIMD_SSE4_Supported = 2,
+
+    // TODO - AVX - Hardware supports AVX instruction set.
+    // TODO - Vector<T> length is 128-bit and SIMD instructions are VEX-128 encoded.
+    // TODO - Floating-point instructions are VEX-128 encoded.
+    SIMD_AVX_Supported = 3,
+
+    // AVX2 - Hardware has AVX and AVX2 instruction set.
+    // Vector<T> length is 256-bit and SIMD instructions are VEX-256 encoded.
+    // Floating-point instructions are VEX-128 encoded.
+    SIMD_AVX2_Supported = 4,
+#endif
+};
+
 #ifdef FEATURE_SIMD
 
 #ifdef DEBUG
diff --git a/src/jit/simdcodegenxarch.cpp b/src/jit/simdcodegenxarch.cpp
index d9ba77589d..4db83b6f8d 100644
--- a/src/jit/simdcodegenxarch.cpp
+++ b/src/jit/simdcodegenxarch.cpp
@@ -205,7 +205,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
             {
                 result = INS_pmullw;
             }
-            else if ((baseType == TYP_INT) && (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4))
+            else if ((baseType == TYP_INT) && (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported))
             {
                 result = INS_pmulld;
             }
@@ -243,7 +243,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
             {
                 result = INS_pminsw;
             }
-            else if (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4)
+            else if (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported)
             {
                 if (baseType == TYP_BYTE)
                 {
@@ -285,7 +285,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
             {
                 result = INS_pmaxsw;
             }
-            else if (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4)
+            else if (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported)
             {
                 if (baseType == TYP_BYTE)
                 {
@@ -311,7 +311,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
             break;
 
         case SIMDIntrinsicAbs:
-            if (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4)
+            if (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported)
             {
                 if (baseType == TYP_INT)
                 {
@@ -354,7 +354,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
                 result = INS_pcmpeqb;
             }
             else if ((baseType == TYP_ULONG || baseType == TYP_LONG) &&
-                     (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4))
+                     (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported))
             {
                 result = INS_pcmpeqq;
             }
@@ -413,7 +413,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
             {
                 result = INS_pcmpgtb;
             }
-            else if ((baseType == TYP_LONG) && (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4))
+            else if ((baseType == TYP_LONG) && (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported))
             {
                 result = INS_pcmpgtq;
             }
@@ -515,7 +515,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
             {
                 case TYP_INT:
                 case TYP_UINT:
-                    if (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4)
+                    if (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported)
                     {
                         result = INS_packusdw;
                     }
@@ -671,7 +671,7 @@ void CodeGen::genSIMDScalarMove(
     var_types targetType, var_types baseType, regNumber targetReg, regNumber srcReg, SIMDScalarMoveType moveType)
 {
     assert(varTypeIsFloating(baseType));
-    if (compiler->getSIMDInstructionSet() == InstructionSet_AVX)
+    if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported)
     {
         switch (moveType)
         {
@@ -784,9 +784,9 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
     var_types baseType  = simdNode->gtSIMDBaseType;
     regNumber targetReg = simdNode->gtRegNum;
     assert(targetReg != REG_NA);
-    var_types      targetType = simdNode->TypeGet();
-    InstructionSet iset       = compiler->getSIMDInstructionSet();
-    unsigned       size       = simdNode->gtSIMDSize;
+    var_types targetType = simdNode->TypeGet();
+    SIMDLevel level      = compiler->getSIMDSupportLevel();
+    unsigned  size       = simdNode->gtSIMDSize;
 
     // Should never see small int base type vectors except for zero initialization.
     noway_assert(!varTypeIsSmallInt(baseType) || op1->IsIntegralConst(0));
@@ -869,7 +869,7 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
         }
         else
         {
-            assert(iset == InstructionSet_AVX);
+            assert(level == SIMD_AVX2_Supported);
             ins = getOpForSIMDIntrinsic(SIMDIntrinsicInit, baseType);
             if (op1->IsCnsFltOrDbl())
             {
@@ -887,7 +887,7 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
             }
         }
     }
-    else if (iset == InstructionSet_AVX && ((size == 32) || (size == 16)))
+    else if (level == SIMD_AVX2_Supported && ((size == 32) || (size == 16)))
     {
         regNumber srcReg = genConsumeReg(op1);
         if (baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG)
@@ -1139,7 +1139,7 @@ void CodeGen::genSIMDIntrinsic32BitConvert(GenTreeSIMD* simdNode)
         getEmitter()->emitIns_R_I(INS_mov, EA_8BYTE, tmpIntReg, (ssize_t)0X5300000053000000);
         inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_ULONG);
 #else
-        if (compiler->getSIMDInstructionSet() == InstructionSet_AVX)
+        if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported)
         {
             getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, tmpIntReg, (ssize_t)0X53000000);
             inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT);
@@ -1152,7 +1152,7 @@ void CodeGen::genSIMDIntrinsic32BitConvert(GenTreeSIMD* simdNode)
             getEmitter()->emitIns_R_R_I(INS_pinsrw, emitTypeSize(TYP_INT), tmpReg, tmpIntReg, 3);
         }
 #endif
-        if (compiler->getSIMDInstructionSet() == InstructionSet_AVX)
+        if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported)
         {
             inst_RV_RV(INS_vpbroadcastd, tmpReg, tmpReg, targetType, emitActualTypeSize(targetType));
         }
@@ -1233,13 +1233,13 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode)
     var_types baseType  = simdNode->gtSIMDBaseType;
     regNumber targetReg = simdNode->gtRegNum;
     assert(targetReg != REG_NA);
-    var_types      simdType  = simdNode->TypeGet();
-    regNumber      op1Reg    = genConsumeReg(op1);
-    regNumber      tmpIntReg = simdNode->GetSingleTempReg(RBM_ALLINT);
-    regNumber      tmpReg;
-    regNumber      tmpReg2;
-    regNumber      tmpReg3;
-    InstructionSet iset = compiler->getSIMDInstructionSet();
+    var_types simdType  = simdNode->TypeGet();
+    regNumber op1Reg    = genConsumeReg(op1);
+    regNumber tmpIntReg = simdNode->GetSingleTempReg(RBM_ALLINT);
+    regNumber tmpReg;
+    regNumber tmpReg2;
+    regNumber tmpReg3;
+    SIMDLevel level = compiler->getSIMDSupportLevel();
 
 #ifdef _TARGET_X86_
     if (baseType == TYP_LONG)
@@ -1251,7 +1251,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode)
     }
     else
 #endif
-        if (iset == InstructionSet_AVX || (baseType == TYP_ULONG))
+        if (level == SIMD_AVX2_Supported || (baseType == TYP_ULONG))
     {
         tmpReg  = simdNode->ExtractTempReg(RBM_ALLFLOAT);
         tmpReg2 = simdNode->GetSingleTempReg(RBM_ALLFLOAT);
@@ -1307,7 +1307,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode)
         inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT);
         getEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4);
 #endif
-        if (iset == InstructionSet_AVX)
+        if (level == SIMD_AVX2_Supported)
         {
             inst_RV_RV(INS_vpbroadcastq, tmpReg, tmpReg, simdType, emitActualTypeSize(simdType));
         }
@@ -1329,7 +1329,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode)
         inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT);
         getEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4);
 #endif
-        if (iset == InstructionSet_AVX)
+        if (level == SIMD_AVX2_Supported)
         {
             inst_RV_RV(INS_vpbroadcastq, tmpReg, tmpReg, simdType, emitActualTypeSize(simdType));
         }
@@ -1351,7 +1351,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode)
         instruction rightShiftIns = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, TYP_SIMD16);
         instruction leftShiftIns  = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, TYP_SIMD16);
 
-        if (iset == InstructionSet_AVX)
+        if (level == SIMD_AVX2_Supported)
         {
             // Extract the high 16-bits
             getEmitter()->emitIns_R_R_I(INS_vextracti128, EA_32BYTE, tmpReg, op1Reg, 0x01);
@@ -1389,7 +1389,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode)
             inst_RV_RV(INS_movaps, targetReg, tmpReg, simdType, emitActualTypeSize(simdType));
         }
 
-        if (iset == InstructionSet_AVX)
+        if (level == SIMD_AVX2_Supported)
         {
             getEmitter()->emitIns_R_R_I(INS_vinsertf128, EA_32BYTE, targetReg, tmpReg2, 0x01);
         }
@@ -1419,7 +1419,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode)
         inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT);
         getEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4);
 
-        if (iset == InstructionSet_AVX)
+        if (level == SIMD_AVX2_Supported)
         {
             inst_RV_RV(INS_vpbroadcastq, tmpReg, tmpReg, simdType, emitActualTypeSize(simdType));
         }
@@ -1437,7 +1437,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode)
         inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT);
         getEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4);
 
-        if (iset == InstructionSet_AVX)
+        if (level == SIMD_AVX2_Supported)
         {
             inst_RV_RV(INS_vpbroadcastq, tmpReg, tmpReg, simdType, emitActualTypeSize(simdType));
         }
@@ -1462,7 +1462,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode)
         instruction rightShiftIns = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, TYP_SIMD16);
         instruction leftShiftIns  = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, TYP_SIMD16);
 
-        if (iset == InstructionSet_AVX)
+        if (level == SIMD_AVX2_Supported)
         {
             // Extract the high 16-bits
             getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, tmpReg, op1Reg, 0x01);
@@ -1498,7 +1498,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode)
         // Merge or copy the results (only at this point are we done with op1Reg).
         assert(tmpReg != targetReg);
         inst_RV_RV(INS_por, targetReg, tmpReg, simdType, emitActualTypeSize(simdType));
-        if (iset == InstructionSet_AVX)
+        if (level == SIMD_AVX2_Supported)
         {
             getEmitter()->emitIns_R_R_I(INS_vinserti128, EA_32BYTE, targetReg, tmpReg2, 0x01);
         }
@@ -1520,7 +1520,7 @@ void CodeGen::genSIMDExtractUpperHalf(GenTreeSIMD* simdNode, regNumber srcReg, r
 {
     var_types simdType = simdNode->TypeGet();
     emitAttr  emitSize = emitActualTypeSize(simdType);
-    if (compiler->getSIMDInstructionSet() == InstructionSet_AVX)
+    if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported)
     {
         instruction extractIns = varTypeIsFloating(simdNode->gtSIMDBaseType) ? INS_vextractf128 : INS_vextracti128;
         getEmitter()->emitIns_R_R_I(extractIns, EA_32BYTE, tgtReg, srcReg, 0x01);
@@ -1554,8 +1554,8 @@ void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode)
     var_types baseType  = simdNode->gtSIMDBaseType;
     regNumber targetReg = simdNode->gtRegNum;
     assert(targetReg != REG_NA);
-    var_types      simdType = simdNode->TypeGet();
-    InstructionSet iset     = compiler->getSIMDInstructionSet();
+    var_types simdType = simdNode->TypeGet();
+    SIMDLevel level    = compiler->getSIMDSupportLevel();
 
     genConsumeOperands(simdNode);
     regNumber   op1Reg   = op1->gtRegNum;
@@ -1582,7 +1582,7 @@ void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode)
         regNumber tmpReg = simdNode->GetSingleTempReg(RBM_ALLFLOAT);
         assert(tmpReg != op1Reg);
 
-        if (iset == InstructionSet_AVX)
+        if (level == SIMD_AVX2_Supported)
         {
             // permute op1Reg and put it into targetReg
             unsigned ival = 0xd4;
@@ -1627,9 +1627,9 @@ void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode)
     var_types baseType  = simdNode->gtSIMDBaseType;
     regNumber targetReg = simdNode->gtRegNum;
     assert(targetReg != REG_NA);
-    var_types      simdType = simdNode->TypeGet();
-    emitAttr       emitSize = emitTypeSize(simdType);
-    InstructionSet iset     = compiler->getSIMDInstructionSet();
+    var_types simdType = simdNode->TypeGet();
+    emitAttr  emitSize = emitTypeSize(simdType);
+    SIMDLevel level    = compiler->getSIMDSupportLevel();
 
     genConsumeOperands(simdNode);
     regNumber op1Reg = op1->gtRegNum;
@@ -1652,7 +1652,7 @@ void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode)
     }
     else if (varTypeIsLong(baseType))
     {
-        if (iset == InstructionSet_AVX)
+        if (level == SIMD_AVX2_Supported)
         {
             // We have 8 long elements, 0-3 in op1Reg, 4-7 in op2Reg.
             // We will generate the following:
@@ -1714,7 +1714,7 @@ void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode)
         instruction shiftLeftIns  = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, baseType);
         instruction shiftRightIns = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, baseType);
 
-        if (iset == InstructionSet_AVX)
+        if (level == SIMD_AVX2_Supported)
         {
             regNumber tmpReg  = simdNode->ExtractTempReg(RBM_ALLFLOAT);
             regNumber tmpReg2 = simdNode->GetSingleTempReg(RBM_ALLFLOAT);
@@ -1738,7 +1738,7 @@ void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode)
             inst_RV_RV(ins_Copy(simdType), tmpReg, op2Reg, simdType, emitSize);
 
             instruction tmpShiftRight = shiftRightIns;
-            if ((baseType == TYP_INT || baseType == TYP_UINT) && iset == InstructionSet_SSE2)
+            if ((baseType == TYP_INT || baseType == TYP_UINT) && level == SIMD_SSE2_Supported)
             {
                 tmpShiftRight = INS_psrad;
             }
@@ -1778,8 +1778,8 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
     var_types baseType  = simdNode->gtSIMDBaseType;
     regNumber targetReg = simdNode->gtRegNum;
     assert(targetReg != REG_NA);
-    var_types      targetType = simdNode->TypeGet();
-    InstructionSet iset       = compiler->getSIMDInstructionSet();
+    var_types targetType = simdNode->TypeGet();
+    SIMDLevel level      = compiler->getSIMDSupportLevel();
 
     genConsumeOperands(simdNode);
     regNumber op1Reg   = op1->gtRegNum;
@@ -1790,7 +1790,7 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
     // SSE2 doesn't have an instruction to perform this operation directly
     // whereas SSE4.1 does (pmulld).  This is special cased and computed
     // as follows.
-    if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul && baseType == TYP_INT && iset == InstructionSet_SSE2)
+    if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul && baseType == TYP_INT && level == SIMD_SSE2_Supported)
     {
         // We need a temporary register that is NOT the same as the target,
         // and we MAY need another.
@@ -1952,12 +1952,12 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
 //
 void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
 {
-    GenTree*       op1        = simdNode->gtGetOp1();
-    GenTree*       op2        = simdNode->gtGetOp2();
-    var_types      baseType   = simdNode->gtSIMDBaseType;
-    regNumber      targetReg  = simdNode->gtRegNum;
-    var_types      targetType = simdNode->TypeGet();
-    InstructionSet iset       = compiler->getSIMDInstructionSet();
+    GenTree*  op1        = simdNode->gtGetOp1();
+    GenTree*  op2        = simdNode->gtGetOp2();
+    var_types baseType   = simdNode->gtSIMDBaseType;
+    regNumber targetReg  = simdNode->gtRegNum;
+    var_types targetType = simdNode->TypeGet();
+    SIMDLevel level      = compiler->getSIMDSupportLevel();
 
     genConsumeOperands(simdNode);
     regNumber op1Reg   = op1->gtRegNum;
@@ -1976,7 +1976,7 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
             // TYP_INT comparison operations
             if (baseType == TYP_LONG || baseType == TYP_ULONG)
             {
-                assert(iset >= InstructionSet_SSE3_4);
+                assert(level >= SIMD_SSE4_Supported);
             }
 #endif
 
@@ -2063,7 +2063,7 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
             // On SSE4/AVX, we can generate optimal code for (in)equality against zero using ptest.
             if (op2->isContained())
             {
-                assert((compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4) && op2->IsIntegralConstVector(0));
+                assert((compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported) && op2->IsIntegralConstVector(0));
                 inst_RV_RV(INS_ptest, op1->gtRegNum, op1->gtRegNum, simdType, emitActualTypeSize(simdType));
             }
             else
@@ -2175,7 +2175,7 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
     regNumber tmpReg1 = REG_NA;
     regNumber tmpReg2 = REG_NA;
 
-    InstructionSet iset = compiler->getSIMDInstructionSet();
+    SIMDLevel level = compiler->getSIMDSupportLevel();
 
     // Dot product intrinsic is supported only on float/double vectors
     // and 32-byte int vectors on AVX.
@@ -2192,7 +2192,7 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
     // different from targetReg as scratch.
     if (varTypeIsFloating(baseType))
     {
-        if ((compiler->getSIMDInstructionSet() == InstructionSet_SSE2) || (simdEvalType == TYP_SIMD32))
+        if ((compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported) || (simdEvalType == TYP_SIMD32))
         {
             tmpReg1 = simdNode->GetSingleTempReg();
             assert(tmpReg1 != targetReg);
@@ -2205,9 +2205,9 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
     else
     {
         assert(baseType == TYP_INT);
-        assert(iset >= InstructionSet_SSE3_4);
+        assert(level >= SIMD_SSE4_Supported);
 
-        if (iset == InstructionSet_SSE3_4)
+        if (level == SIMD_SSE4_Supported)
         {
             tmpReg1 = simdNode->GetSingleTempReg();
         }
@@ -2218,7 +2218,7 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
         }
     }
 
-    if (iset == InstructionSet_SSE2)
+    if (level == SIMD_SSE2_Supported)
     {
         // We avoid reg move if either op1Reg == targetReg or op2Reg == targetReg
         if (op1Reg == targetReg)
@@ -2294,7 +2294,7 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
     }
     else
     {
-        assert(iset >= InstructionSet_SSE3_4);
+        assert(level >= SIMD_SSE4_Supported);
 
         if (varTypeIsFloating(baseType))
         {
@@ -2349,7 +2349,7 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
                 {
                     // On AVX, we have no 16-byte vectors of double.  Note that, if we did, we could use
                     // dppd directly.
-                    assert(iset == InstructionSet_SSE3_4);
+                    assert(level == SIMD_SSE4_Supported);
                     inst_RV_RV_IV(INS_dppd, emitSize, targetReg, op2Reg, 0x31);
                 }
             }
@@ -2375,7 +2375,7 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
 #endif
 
             // tmpReg1 = op1 * op2
-            if (iset == InstructionSet_AVX)
+            if (level == SIMD_AVX2_Supported)
             {
                 // On AVX take advantage 3 operand form of pmulld
                 inst_RV_RV_RV(INS_pmulld, tmpReg1, op1Reg, op2Reg, emitTypeSize(simdEvalType));
@@ -2572,7 +2572,7 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
 
     if (byteShiftCnt >= 16)
     {
-        assert(compiler->getSIMDInstructionSet() == InstructionSet_AVX);
+        assert(compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported);
         byteShiftCnt -= 16;
         regNumber newSrcReg;
         if (varTypeIsFloating(baseType))
@@ -2632,7 +2632,7 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
             // low 3 bits of index, but it's better to use the right value.
             if (index > 8)
             {
-                assert(compiler->getSIMDInstructionSet() == InstructionSet_AVX);
+                assert(compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported);
                 index -= 8;
             }
 
@@ -2764,7 +2764,7 @@ void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode)
     // logic needs modification.
     noway_assert(baseType == TYP_FLOAT);
 
-    if (compiler->getSIMDInstructionSet() == InstructionSet_SSE2)
+    if (compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported)
     {
         // We need one additional int register as scratch
         regNumber tmpReg = simdNode->GetSingleTempReg();
@@ -2804,7 +2804,7 @@ void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode)
 void CodeGen::genSIMDIntrinsicShuffleSSE2(GenTreeSIMD* simdNode)
 {
     assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicShuffleSSE2);
-    noway_assert(compiler->getSIMDInstructionSet() == InstructionSet_SSE2);
+    noway_assert(compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported);
 
     GenTree* op1 = simdNode->gtGetOp1();
     GenTree* op2 = simdNode->gtGetOp2();
author	Fei Peng <fei.peng@intel.com>	2017-10-24 14:35:18 -0700
committer	Fei Peng <fei.peng@intel.com>	2017-10-25 15:10:06 -0700
commit	9dfc3cf5e90a98c204669838b5f9e14baa1d1e3e (patch)
tree	de768274d3ec8f1d9ce05491c987fd26de6a656e /src
parent	15e1472aab7ed42dafde0331ed6cece465a0763d (diff)
download	coreclr-9dfc3cf5e90a98c204669838b5f9e14baa1d1e3e.tar.gz coreclr-9dfc3cf5e90a98c204669838b5f9e14baa1d1e3e.tar.bz2 coreclr-9dfc3cf5e90a98c204669838b5f9e14baa1d1e3e.zip