diff options
author | Fei Peng <fei.peng@intel.com> | 2017-10-24 14:35:18 -0700 |
---|---|---|
committer | Fei Peng <fei.peng@intel.com> | 2017-10-25 15:10:06 -0700 |
commit | 9dfc3cf5e90a98c204669838b5f9e14baa1d1e3e (patch) | |
tree | de768274d3ec8f1d9ce05491c987fd26de6a656e /src | |
parent | 15e1472aab7ed42dafde0331ed6cece465a0763d (diff) | |
download | coreclr-9dfc3cf5e90a98c204669838b5f9e14baa1d1e3e.tar.gz coreclr-9dfc3cf5e90a98c204669838b5f9e14baa1d1e3e.tar.bz2 coreclr-9dfc3cf5e90a98c204669838b5f9e14baa1d1e3e.zip |
Decoupling SIMD levels from instruction sets
Diffstat (limited to 'src')
-rw-r--r-- | src/jit/codegencommon.cpp | 2 | ||||
-rw-r--r-- | src/jit/codegenxarch.cpp | 2 | ||||
-rw-r--r-- | src/jit/compiler.h | 20 | ||||
-rw-r--r-- | src/jit/instr.h | 16 | ||||
-rw-r--r-- | src/jit/lowerxarch.cpp | 4 | ||||
-rw-r--r-- | src/jit/lsraxarch.cpp | 24 | ||||
-rw-r--r-- | src/jit/simd.cpp | 17 | ||||
-rw-r--r-- | src/jit/simd.h | 37 | ||||
-rw-r--r-- | src/jit/simdcodegenxarch.cpp | 128 |
9 files changed, 140 insertions, 110 deletions
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index 64f38ae080..18b42592e8 100644 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -11143,7 +11143,7 @@ void CodeGen::genVzeroupperIfNeeded(bool check256bitOnly /* = true*/) if (emitVzeroUpper) { - assert(compiler->getSIMDInstructionSet() == InstructionSet_AVX); + assert(compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported); instGen(INS_vzeroupper); } } diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index f1730b9999..778bcfa309 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -5360,7 +5360,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call) // when there's preceding 256-bit AVX to legacy SSE transition penalty. if (call->IsPInvoke() && (call->gtCallType == CT_USER_FUNC) && getEmitter()->Contains256bitAVX()) { - assert(compiler->getSIMDInstructionSet() == InstructionSet_AVX); + assert(compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported); instGen(INS_vzeroupper); } diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 6a64a7b648..629129baef 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -7352,39 +7352,39 @@ private: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ - // Get highest available instruction set for floating point codegen - InstructionSet getFloatingPointInstructionSet() + // Get highest available level for floating point codegen + SIMDLevel getFloatingPointCodegenLevel() { #if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND) if (canUseAVX()) { - return InstructionSet_AVX; + return SIMD_AVX2_Supported; } if (CanUseSSE3_4()) { - return InstructionSet_SSE3_4; + return SIMD_SSE4_Supported; } // min bar is SSE2 assert(canUseSSE2()); - return InstructionSet_SSE2; + return SIMD_SSE2_Supported; #else assert(!"getFPInstructionSet() is not implemented for target arch"); unreached(); - return InstructionSet_NONE; + return SIMD_Not_Supported; #endif } - // Get highest available instruction set for SIMD codegen - InstructionSet getSIMDInstructionSet() + // Get highest available level for SIMD codegen + SIMDLevel getSIMDSupportLevel() { #if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND) - return getFloatingPointInstructionSet(); + return getFloatingPointCodegenLevel(); #else assert(!"Available instruction set(s) for SIMD codegen is not defined for target arch"); unreached(); - return InstructionSet_NONE; + return SIMD_Not_Supported; #endif } diff --git a/src/jit/instr.h b/src/jit/instr.h index eac425634c..ec58f61458 100644 --- a/src/jit/instr.h +++ b/src/jit/instr.h @@ -277,23 +277,19 @@ enum emitAttr : unsigned enum InstructionSet { #ifdef _TARGET_XARCH_ - // Linear order start InstructionSet_ILLEGAL = 0, + // Start linear order SIMD instruction sets + // These ISAs have strictly generation to generation order. InstructionSet_SSE = 1, InstructionSet_SSE2 = 2, InstructionSet_SSE3 = 3, InstructionSet_SSSE3 = 4, InstructionSet_SSE41 = 5, InstructionSet_SSE42 = 6, - InstructionSet_SSE3_4 = 7, // SSE3, SSSE3, SSE4.1 and SSE4.2 instruction set - InstructionSet_AVX = 8, - InstructionSet_AVX2 = 9, - // Linear order end - // TODO - Instruction sets have the linear order only in above area. - // We should no long compare the return value of getSIMDInstructionSet() - // or getFloatingPointInstructionSet() to the InstructionSet values. - // Should refactor SIMD code only to be aware of SIMD feature levels - // (SSE2, SSE3_4, AVX, and AVX2, etc.) rather than concrete ISA. + InstructionSet_AVX = 7, + InstructionSet_AVX2 = 8, + // Reserve values <32 for future SIMD instruction sets (i.e., AVX512), + // End linear order SIMD instruction sets. InstructionSet_AES = 32, InstructionSet_BMI1 = 33, diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index f509475fe1..65a1ef3661 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -2207,7 +2207,7 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) { MakeSrcContained(simdNode, op1); } - else if ((comp->getSIMDInstructionSet() == InstructionSet_AVX) && + else if ((comp->getSIMDSupportLevel() == SIMD_AVX2_Supported) && ((simdNode->gtSIMDSize == 16) || (simdNode->gtSIMDSize == 32))) { // Either op1 is a float or dbl constant or an addr @@ -2231,7 +2231,7 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) // for integral vectors but not for floating-point for the reason // that we have +0.0 and -0.0 and +0.0 == -0.0 op2 = simdNode->gtGetOp2(); - if ((comp->getSIMDInstructionSet() >= InstructionSet_SSE3_4) && op2->IsIntegralConstVector(0)) + if ((comp->getSIMDSupportLevel() >= SIMD_SSE4_Supported) && op2->IsIntegralConstVector(0)) { MakeSrcContained(simdNode, op2); } diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp index 53aebe00bf..9b806dcd99 100644 --- a/src/jit/lsraxarch.cpp +++ b/src/jit/lsraxarch.cpp @@ -2108,7 +2108,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) // Must be a Vector<int> or Vector<short> Vector<sbyte> assert(simdTree->gtSIMDBaseType == TYP_INT || simdTree->gtSIMDBaseType == TYP_SHORT || simdTree->gtSIMDBaseType == TYP_BYTE); - assert(compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4); + assert(compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported); info->srcCount = 1; break; @@ -2131,7 +2131,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) // SSE2 32-bit integer multiplication requires two temp regs if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT && - compiler->getSIMDInstructionSet() == InstructionSet_SSE2) + compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported) { info->internalFloatCount = 2; info->setInternalCandidates(this, allSIMDRegs()); @@ -2198,7 +2198,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) // and the need for scratch registers. if (varTypeIsFloating(simdTree->gtSIMDBaseType)) { - if ((compiler->getSIMDInstructionSet() == InstructionSet_SSE2) || + if ((compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported) || (simdTree->gtOp.gtOp1->TypeGet() == TYP_SIMD32)) { info->internalFloatCount = 1; @@ -2209,8 +2209,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) } else { - assert(simdTree->gtSIMDBaseType == TYP_INT && - compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4); + assert(simdTree->gtSIMDBaseType == TYP_INT && compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported); // No need to set isInternalRegDelayFree since targetReg is a // an int type reg and guaranteed to be different from xmm/ymm @@ -2268,7 +2267,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) { bool needFloatTemp; if (varTypeIsSmallInt(simdTree->gtSIMDBaseType) && - (compiler->getSIMDInstructionSet() == InstructionSet_AVX)) + (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported)) { int byteShiftCnt = (int)op2->AsIntCon()->gtIconVal * genTypeSize(simdTree->gtSIMDBaseType); needFloatTemp = (byteShiftCnt >= 16); @@ -2295,7 +2294,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) info->srcCount = 2; // We need an internal integer register for SSE2 codegen - if (compiler->getSIMDInstructionSet() == InstructionSet_SSE2) + if (compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported) { info->internalIntCount = 1; info->setInternalCandidates(this, allRegs(TYP_INT)); @@ -2342,7 +2341,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) info->isInternalRegDelayFree = true; info->srcCount = 1; info->internalIntCount = 1; - if (compiler->getSIMDInstructionSet() == InstructionSet_AVX) + if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) { info->internalFloatCount = 2; } @@ -2365,8 +2364,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) } else #endif - if ((compiler->getSIMDInstructionSet() == InstructionSet_AVX) || - (simdTree->gtSIMDBaseType == TYP_ULONG)) + if ((compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) || (simdTree->gtSIMDBaseType == TYP_ULONG)) { info->internalFloatCount = 2; } @@ -2381,7 +2379,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree) // We need an internal register different from targetReg. info->isInternalRegDelayFree = true; info->srcCount = 2; - if ((compiler->getSIMDInstructionSet() == InstructionSet_AVX) && (simdTree->gtSIMDBaseType != TYP_DOUBLE)) + if ((compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) && (simdTree->gtSIMDBaseType != TYP_DOUBLE)) { info->internalFloatCount = 2; } @@ -2767,11 +2765,11 @@ void LinearScan::SetContainsAVXFlags(bool isFloatingPointType /* = true */, unsi { if (isFloatingPointType) { - if (compiler->getFloatingPointInstructionSet() == InstructionSet_AVX) + if (compiler->getFloatingPointCodegenLevel() == SIMD_AVX2_Supported) { compiler->getEmitter()->SetContainsAVX(true); } - if (sizeOfSIMDVector == 32 && compiler->getSIMDInstructionSet() == InstructionSet_AVX) + if (sizeOfSIMDVector == 32 && compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) { compiler->getEmitter()->SetContains256bitAVX(true); } diff --git a/src/jit/simd.cpp b/src/jit/simd.cpp index d190cd29f4..b85d97c1ac 100644 --- a/src/jit/simd.cpp +++ b/src/jit/simd.cpp @@ -940,7 +940,7 @@ SIMDIntrinsicID Compiler::impSIMDIntegralRelOpGreaterThanOrEqual( // This routine should be used only for integer base type vectors assert(varTypeIsIntegral(baseType)); - if ((getSIMDInstructionSet() == InstructionSet_SSE2) && ((baseType == TYP_LONG) || baseType == TYP_UBYTE)) + if ((getSIMDSupportLevel() == SIMD_SSE2_Supported) && ((baseType == TYP_LONG) || baseType == TYP_UBYTE)) { return impSIMDLongRelOpGreaterThanOrEqual(typeHnd, size, pOp1, pOp2); } @@ -1038,7 +1038,7 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, : SIMDIntrinsicGreaterThanOrEqual; } - if ((getSIMDInstructionSet() == InstructionSet_SSE2) && baseType == TYP_LONG) + if ((getSIMDSupportLevel() == SIMD_SSE2_Supported) && baseType == TYP_LONG) { // There is no direct SSE2 support for comparing TYP_LONG vectors. // These have to be implemented interms of TYP_INT vector comparison operations. @@ -1187,7 +1187,7 @@ GenTreePtr Compiler::impSIMDAbs(CORINFO_CLASS_HANDLE typeHnd, var_types baseType // result = ConditionalSelect(BitVector, vector.Zero - v, v) bool useConditionalSelect = false; - if (getSIMDInstructionSet() == InstructionSet_SSE2) + if (getSIMDSupportLevel() == SIMD_SSE2_Supported) { // SSE2 doesn't support abs on signed integer type vectors. if (baseType == TYP_LONG || baseType == TYP_INT || baseType == TYP_SHORT || baseType == TYP_BYTE) @@ -1197,7 +1197,7 @@ GenTreePtr Compiler::impSIMDAbs(CORINFO_CLASS_HANDLE typeHnd, var_types baseType } else { - assert(getSIMDInstructionSet() >= InstructionSet_SSE3_4); + assert(getSIMDSupportLevel() >= SIMD_SSE4_Supported); if (baseType == TYP_LONG) { // SSE3_4/AVX2 don't support abs on long type vector. @@ -1300,7 +1300,7 @@ GenTreePtr Compiler::impSIMDAbs(CORINFO_CLASS_HANDLE typeHnd, var_types baseType } else { - assert(getSIMDInstructionSet() >= InstructionSet_SSE3_4); + assert(getSIMDSupportLevel() >= SIMD_SSE4_Supported); assert(baseType != TYP_LONG); retVal = gtNewSIMDNode(simdType, op1, SIMDIntrinsicAbs, baseType, size); @@ -1428,7 +1428,7 @@ GenTreePtr Compiler::impSIMDMinMax(SIMDIntrinsicID intrinsicId, // result = result - 2^15 ; readjust it back if (varTypeIsFloating(baseType) || baseType == TYP_SHORT || baseType == TYP_UBYTE || - (getSIMDInstructionSet() >= InstructionSet_SSE3_4 && + (getSIMDSupportLevel() >= SIMD_SSE4_Supported && (baseType == TYP_BYTE || baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_CHAR))) { // SSE2 or SSE4.1 has direct support @@ -1436,7 +1436,7 @@ GenTreePtr Compiler::impSIMDMinMax(SIMDIntrinsicID intrinsicId, } else if (baseType == TYP_CHAR || baseType == TYP_BYTE) { - assert(getSIMDInstructionSet() == InstructionSet_SSE2); + assert(getSIMDSupportLevel() == SIMD_SSE2_Supported); int constVal; SIMDIntrinsicID operIntrinsic; SIMDIntrinsicID adjustIntrinsic; @@ -2568,8 +2568,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, #if defined(_TARGET_XARCH_) // Right now dot product is supported only for float/double vectors and // int vectors on SSE4/AVX. - if (!varTypeIsFloating(baseType) && - !(baseType == TYP_INT && getSIMDInstructionSet() >= InstructionSet_SSE3_4)) + if (!varTypeIsFloating(baseType) && !(baseType == TYP_INT && getSIMDSupportLevel() >= SIMD_SSE4_Supported)) { return nullptr; } diff --git a/src/jit/simd.h b/src/jit/simd.h index ff522fd52f..c1650482a5 100644 --- a/src/jit/simd.h +++ b/src/jit/simd.h @@ -5,6 +5,43 @@ #ifndef _SIMD_H_ #define _SIMD_H_ +// Underlying hardware information +// This type is used to control +// 1. The length of System.Numerics.Vector<T>. +// 2. Codegen of System.Numerics.Vectors. +// 3. Codegen of floating-point arithmetics (VEX-encoding or not). +// +// Note +// - Hardware SIMD support is classified to the levels. Do not directly use +// InstructionSet (instr.h) for System.Numerics.Vectors. +// - Values of SIMDLevel have strictly increasing order that each SIMD level +// is a superset of the previous levels. +enum SIMDLevel +{ + SIMD_Not_Supported = 0, +#ifdef _TARGET_XARCH_ + // SSE2 - The min bar of SIMD ISA on x86/x64. + // Vector<T> length is 128-bit. + // Floating-point instructions are legacy SSE encoded. + SIMD_SSE2_Supported = 1, + + // SSE4 - RyuJIT may generate SSE3, SSSE3, SSE4.1 and SSE4.2 instructions for certain intrinsics. + // Vector<T> length is 128-bit. + // Floating-point instructions are legacy SSE encoded. + SIMD_SSE4_Supported = 2, + + // TODO - AVX - Hardware supports AVX instruction set. + // TODO - Vector<T> length is 128-bit and SIMD instructions are VEX-128 encoded. + // TODO - Floating-point instructions are VEX-128 encoded. + SIMD_AVX_Supported = 3, + + // AVX2 - Hardware has AVX and AVX2 instruction set. + // Vector<T> length is 256-bit and SIMD instructions are VEX-256 encoded. + // Floating-point instructions are VEX-128 encoded. + SIMD_AVX2_Supported = 4, +#endif +}; + #ifdef FEATURE_SIMD #ifdef DEBUG diff --git a/src/jit/simdcodegenxarch.cpp b/src/jit/simdcodegenxarch.cpp index d9ba77589d..4db83b6f8d 100644 --- a/src/jit/simdcodegenxarch.cpp +++ b/src/jit/simdcodegenxarch.cpp @@ -205,7 +205,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type { result = INS_pmullw; } - else if ((baseType == TYP_INT) && (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4)) + else if ((baseType == TYP_INT) && (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported)) { result = INS_pmulld; } @@ -243,7 +243,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type { result = INS_pminsw; } - else if (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4) + else if (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported) { if (baseType == TYP_BYTE) { @@ -285,7 +285,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type { result = INS_pmaxsw; } - else if (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4) + else if (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported) { if (baseType == TYP_BYTE) { @@ -311,7 +311,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type break; case SIMDIntrinsicAbs: - if (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4) + if (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported) { if (baseType == TYP_INT) { @@ -354,7 +354,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type result = INS_pcmpeqb; } else if ((baseType == TYP_ULONG || baseType == TYP_LONG) && - (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4)) + (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported)) { result = INS_pcmpeqq; } @@ -413,7 +413,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type { result = INS_pcmpgtb; } - else if ((baseType == TYP_LONG) && (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4)) + else if ((baseType == TYP_LONG) && (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported)) { result = INS_pcmpgtq; } @@ -515,7 +515,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type { case TYP_INT: case TYP_UINT: - if (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4) + if (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported) { result = INS_packusdw; } @@ -671,7 +671,7 @@ void CodeGen::genSIMDScalarMove( var_types targetType, var_types baseType, regNumber targetReg, regNumber srcReg, SIMDScalarMoveType moveType) { assert(varTypeIsFloating(baseType)); - if (compiler->getSIMDInstructionSet() == InstructionSet_AVX) + if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) { switch (moveType) { @@ -784,9 +784,9 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) var_types baseType = simdNode->gtSIMDBaseType; regNumber targetReg = simdNode->gtRegNum; assert(targetReg != REG_NA); - var_types targetType = simdNode->TypeGet(); - InstructionSet iset = compiler->getSIMDInstructionSet(); - unsigned size = simdNode->gtSIMDSize; + var_types targetType = simdNode->TypeGet(); + SIMDLevel level = compiler->getSIMDSupportLevel(); + unsigned size = simdNode->gtSIMDSize; // Should never see small int base type vectors except for zero initialization. noway_assert(!varTypeIsSmallInt(baseType) || op1->IsIntegralConst(0)); @@ -869,7 +869,7 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) } else { - assert(iset == InstructionSet_AVX); + assert(level == SIMD_AVX2_Supported); ins = getOpForSIMDIntrinsic(SIMDIntrinsicInit, baseType); if (op1->IsCnsFltOrDbl()) { @@ -887,7 +887,7 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) } } } - else if (iset == InstructionSet_AVX && ((size == 32) || (size == 16))) + else if (level == SIMD_AVX2_Supported && ((size == 32) || (size == 16))) { regNumber srcReg = genConsumeReg(op1); if (baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG) @@ -1139,7 +1139,7 @@ void CodeGen::genSIMDIntrinsic32BitConvert(GenTreeSIMD* simdNode) getEmitter()->emitIns_R_I(INS_mov, EA_8BYTE, tmpIntReg, (ssize_t)0X5300000053000000); inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_ULONG); #else - if (compiler->getSIMDInstructionSet() == InstructionSet_AVX) + if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) { getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, tmpIntReg, (ssize_t)0X53000000); inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT); @@ -1152,7 +1152,7 @@ void CodeGen::genSIMDIntrinsic32BitConvert(GenTreeSIMD* simdNode) getEmitter()->emitIns_R_R_I(INS_pinsrw, emitTypeSize(TYP_INT), tmpReg, tmpIntReg, 3); } #endif - if (compiler->getSIMDInstructionSet() == InstructionSet_AVX) + if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) { inst_RV_RV(INS_vpbroadcastd, tmpReg, tmpReg, targetType, emitActualTypeSize(targetType)); } @@ -1233,13 +1233,13 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) var_types baseType = simdNode->gtSIMDBaseType; regNumber targetReg = simdNode->gtRegNum; assert(targetReg != REG_NA); - var_types simdType = simdNode->TypeGet(); - regNumber op1Reg = genConsumeReg(op1); - regNumber tmpIntReg = simdNode->GetSingleTempReg(RBM_ALLINT); - regNumber tmpReg; - regNumber tmpReg2; - regNumber tmpReg3; - InstructionSet iset = compiler->getSIMDInstructionSet(); + var_types simdType = simdNode->TypeGet(); + regNumber op1Reg = genConsumeReg(op1); + regNumber tmpIntReg = simdNode->GetSingleTempReg(RBM_ALLINT); + regNumber tmpReg; + regNumber tmpReg2; + regNumber tmpReg3; + SIMDLevel level = compiler->getSIMDSupportLevel(); #ifdef _TARGET_X86_ if (baseType == TYP_LONG) @@ -1251,7 +1251,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) } else #endif - if (iset == InstructionSet_AVX || (baseType == TYP_ULONG)) + if (level == SIMD_AVX2_Supported || (baseType == TYP_ULONG)) { tmpReg = simdNode->ExtractTempReg(RBM_ALLFLOAT); tmpReg2 = simdNode->GetSingleTempReg(RBM_ALLFLOAT); @@ -1307,7 +1307,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT); getEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4); #endif - if (iset == InstructionSet_AVX) + if (level == SIMD_AVX2_Supported) { inst_RV_RV(INS_vpbroadcastq, tmpReg, tmpReg, simdType, emitActualTypeSize(simdType)); } @@ -1329,7 +1329,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT); getEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4); #endif - if (iset == InstructionSet_AVX) + if (level == SIMD_AVX2_Supported) { inst_RV_RV(INS_vpbroadcastq, tmpReg, tmpReg, simdType, emitActualTypeSize(simdType)); } @@ -1351,7 +1351,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) instruction rightShiftIns = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, TYP_SIMD16); instruction leftShiftIns = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, TYP_SIMD16); - if (iset == InstructionSet_AVX) + if (level == SIMD_AVX2_Supported) { // Extract the high 16-bits getEmitter()->emitIns_R_R_I(INS_vextracti128, EA_32BYTE, tmpReg, op1Reg, 0x01); @@ -1389,7 +1389,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) inst_RV_RV(INS_movaps, targetReg, tmpReg, simdType, emitActualTypeSize(simdType)); } - if (iset == InstructionSet_AVX) + if (level == SIMD_AVX2_Supported) { getEmitter()->emitIns_R_R_I(INS_vinsertf128, EA_32BYTE, targetReg, tmpReg2, 0x01); } @@ -1419,7 +1419,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT); getEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4); - if (iset == InstructionSet_AVX) + if (level == SIMD_AVX2_Supported) { inst_RV_RV(INS_vpbroadcastq, tmpReg, tmpReg, simdType, emitActualTypeSize(simdType)); } @@ -1437,7 +1437,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT); getEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4); - if (iset == InstructionSet_AVX) + if (level == SIMD_AVX2_Supported) { inst_RV_RV(INS_vpbroadcastq, tmpReg, tmpReg, simdType, emitActualTypeSize(simdType)); } @@ -1462,7 +1462,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) instruction rightShiftIns = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, TYP_SIMD16); instruction leftShiftIns = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, TYP_SIMD16); - if (iset == InstructionSet_AVX) + if (level == SIMD_AVX2_Supported) { // Extract the high 16-bits getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, tmpReg, op1Reg, 0x01); @@ -1498,7 +1498,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) // Merge or copy the results (only at this point are we done with op1Reg). assert(tmpReg != targetReg); inst_RV_RV(INS_por, targetReg, tmpReg, simdType, emitActualTypeSize(simdType)); - if (iset == InstructionSet_AVX) + if (level == SIMD_AVX2_Supported) { getEmitter()->emitIns_R_R_I(INS_vinserti128, EA_32BYTE, targetReg, tmpReg2, 0x01); } @@ -1520,7 +1520,7 @@ void CodeGen::genSIMDExtractUpperHalf(GenTreeSIMD* simdNode, regNumber srcReg, r { var_types simdType = simdNode->TypeGet(); emitAttr emitSize = emitActualTypeSize(simdType); - if (compiler->getSIMDInstructionSet() == InstructionSet_AVX) + if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) { instruction extractIns = varTypeIsFloating(simdNode->gtSIMDBaseType) ? INS_vextractf128 : INS_vextracti128; getEmitter()->emitIns_R_R_I(extractIns, EA_32BYTE, tgtReg, srcReg, 0x01); @@ -1554,8 +1554,8 @@ void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode) var_types baseType = simdNode->gtSIMDBaseType; regNumber targetReg = simdNode->gtRegNum; assert(targetReg != REG_NA); - var_types simdType = simdNode->TypeGet(); - InstructionSet iset = compiler->getSIMDInstructionSet(); + var_types simdType = simdNode->TypeGet(); + SIMDLevel level = compiler->getSIMDSupportLevel(); genConsumeOperands(simdNode); regNumber op1Reg = op1->gtRegNum; @@ -1582,7 +1582,7 @@ void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode) regNumber tmpReg = simdNode->GetSingleTempReg(RBM_ALLFLOAT); assert(tmpReg != op1Reg); - if (iset == InstructionSet_AVX) + if (level == SIMD_AVX2_Supported) { // permute op1Reg and put it into targetReg unsigned ival = 0xd4; @@ -1627,9 +1627,9 @@ void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode) var_types baseType = simdNode->gtSIMDBaseType; regNumber targetReg = simdNode->gtRegNum; assert(targetReg != REG_NA); - var_types simdType = simdNode->TypeGet(); - emitAttr emitSize = emitTypeSize(simdType); - InstructionSet iset = compiler->getSIMDInstructionSet(); + var_types simdType = simdNode->TypeGet(); + emitAttr emitSize = emitTypeSize(simdType); + SIMDLevel level = compiler->getSIMDSupportLevel(); genConsumeOperands(simdNode); regNumber op1Reg = op1->gtRegNum; @@ -1652,7 +1652,7 @@ void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode) } else if (varTypeIsLong(baseType)) { - if (iset == InstructionSet_AVX) + if (level == SIMD_AVX2_Supported) { // We have 8 long elements, 0-3 in op1Reg, 4-7 in op2Reg. // We will generate the following: @@ -1714,7 +1714,7 @@ void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode) instruction shiftLeftIns = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, baseType); instruction shiftRightIns = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, baseType); - if (iset == InstructionSet_AVX) + if (level == SIMD_AVX2_Supported) { regNumber tmpReg = simdNode->ExtractTempReg(RBM_ALLFLOAT); regNumber tmpReg2 = simdNode->GetSingleTempReg(RBM_ALLFLOAT); @@ -1738,7 +1738,7 @@ void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode) inst_RV_RV(ins_Copy(simdType), tmpReg, op2Reg, simdType, emitSize); instruction tmpShiftRight = shiftRightIns; - if ((baseType == TYP_INT || baseType == TYP_UINT) && iset == InstructionSet_SSE2) + if ((baseType == TYP_INT || baseType == TYP_UINT) && level == SIMD_SSE2_Supported) { tmpShiftRight = INS_psrad; } @@ -1778,8 +1778,8 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) var_types baseType = simdNode->gtSIMDBaseType; regNumber targetReg = simdNode->gtRegNum; assert(targetReg != REG_NA); - var_types targetType = simdNode->TypeGet(); - InstructionSet iset = compiler->getSIMDInstructionSet(); + var_types targetType = simdNode->TypeGet(); + SIMDLevel level = compiler->getSIMDSupportLevel(); genConsumeOperands(simdNode); regNumber op1Reg = op1->gtRegNum; @@ -1790,7 +1790,7 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) // SSE2 doesn't have an instruction to perform this operation directly // whereas SSE4.1 does (pmulld). This is special cased and computed // as follows. - if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul && baseType == TYP_INT && iset == InstructionSet_SSE2) + if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul && baseType == TYP_INT && level == SIMD_SSE2_Supported) { // We need a temporary register that is NOT the same as the target, // and we MAY need another. @@ -1952,12 +1952,12 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) { - GenTree* op1 = simdNode->gtGetOp1(); - GenTree* op2 = simdNode->gtGetOp2(); - var_types baseType = simdNode->gtSIMDBaseType; - regNumber targetReg = simdNode->gtRegNum; - var_types targetType = simdNode->TypeGet(); - InstructionSet iset = compiler->getSIMDInstructionSet(); + GenTree* op1 = simdNode->gtGetOp1(); + GenTree* op2 = simdNode->gtGetOp2(); + var_types baseType = simdNode->gtSIMDBaseType; + regNumber targetReg = simdNode->gtRegNum; + var_types targetType = simdNode->TypeGet(); + SIMDLevel level = compiler->getSIMDSupportLevel(); genConsumeOperands(simdNode); regNumber op1Reg = op1->gtRegNum; @@ -1976,7 +1976,7 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) // TYP_INT comparison operations if (baseType == TYP_LONG || baseType == TYP_ULONG) { - assert(iset >= InstructionSet_SSE3_4); + assert(level >= SIMD_SSE4_Supported); } #endif @@ -2063,7 +2063,7 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) // On SSE4/AVX, we can generate optimal code for (in)equality against zero using ptest. if (op2->isContained()) { - assert((compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4) && op2->IsIntegralConstVector(0)); + assert((compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported) && op2->IsIntegralConstVector(0)); inst_RV_RV(INS_ptest, op1->gtRegNum, op1->gtRegNum, simdType, emitActualTypeSize(simdType)); } else @@ -2175,7 +2175,7 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) regNumber tmpReg1 = REG_NA; regNumber tmpReg2 = REG_NA; - InstructionSet iset = compiler->getSIMDInstructionSet(); + SIMDLevel level = compiler->getSIMDSupportLevel(); // Dot product intrinsic is supported only on float/double vectors // and 32-byte int vectors on AVX. @@ -2192,7 +2192,7 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) // different from targetReg as scratch. if (varTypeIsFloating(baseType)) { - if ((compiler->getSIMDInstructionSet() == InstructionSet_SSE2) || (simdEvalType == TYP_SIMD32)) + if ((compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported) || (simdEvalType == TYP_SIMD32)) { tmpReg1 = simdNode->GetSingleTempReg(); assert(tmpReg1 != targetReg); @@ -2205,9 +2205,9 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) else { assert(baseType == TYP_INT); - assert(iset >= InstructionSet_SSE3_4); + assert(level >= SIMD_SSE4_Supported); - if (iset == InstructionSet_SSE3_4) + if (level == SIMD_SSE4_Supported) { tmpReg1 = simdNode->GetSingleTempReg(); } @@ -2218,7 +2218,7 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) } } - if (iset == InstructionSet_SSE2) + if (level == SIMD_SSE2_Supported) { // We avoid reg move if either op1Reg == targetReg or op2Reg == targetReg if (op1Reg == targetReg) @@ -2294,7 +2294,7 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) } else { - assert(iset >= InstructionSet_SSE3_4); + assert(level >= SIMD_SSE4_Supported); if (varTypeIsFloating(baseType)) { @@ -2349,7 +2349,7 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) { // On AVX, we have no 16-byte vectors of double. Note that, if we did, we could use // dppd directly. - assert(iset == InstructionSet_SSE3_4); + assert(level == SIMD_SSE4_Supported); inst_RV_RV_IV(INS_dppd, emitSize, targetReg, op2Reg, 0x31); } } @@ -2375,7 +2375,7 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) #endif // tmpReg1 = op1 * op2 - if (iset == InstructionSet_AVX) + if (level == SIMD_AVX2_Supported) { // On AVX take advantage 3 operand form of pmulld inst_RV_RV_RV(INS_pmulld, tmpReg1, op1Reg, op2Reg, emitTypeSize(simdEvalType)); @@ -2572,7 +2572,7 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) if (byteShiftCnt >= 16) { - assert(compiler->getSIMDInstructionSet() == InstructionSet_AVX); + assert(compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported); byteShiftCnt -= 16; regNumber newSrcReg; if (varTypeIsFloating(baseType)) @@ -2632,7 +2632,7 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) // low 3 bits of index, but it's better to use the right value. if (index > 8) { - assert(compiler->getSIMDInstructionSet() == InstructionSet_AVX); + assert(compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported); index -= 8; } @@ -2764,7 +2764,7 @@ void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode) // logic needs modification. noway_assert(baseType == TYP_FLOAT); - if (compiler->getSIMDInstructionSet() == InstructionSet_SSE2) + if (compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported) { // We need one additional int register as scratch regNumber tmpReg = simdNode->GetSingleTempReg(); @@ -2804,7 +2804,7 @@ void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode) void CodeGen::genSIMDIntrinsicShuffleSSE2(GenTreeSIMD* simdNode) { assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicShuffleSSE2); - noway_assert(compiler->getSIMDInstructionSet() == InstructionSet_SSE2); + noway_assert(compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported); GenTree* op1 = simdNode->gtGetOp1(); GenTree* op2 = simdNode->gtGetOp2(); |