summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarol Eidt <carol.eidt@microsoft.com>2017-12-13 10:23:18 -0800
committerGitHub <noreply@github.com>2017-12-13 10:23:18 -0800
commit1971e79c5be185825a23b3a1f0c7bb950c14029d (patch)
tree4f175b9b2bdd8fc25d3c42284b75e364da2b1b68
parent0c4736b5a37dc1d81ad764296243cddfe346d8b8 (diff)
parent66658e6cd082e6456330b163a1c1d7ab830f5908 (diff)
downloadcoreclr-1971e79c5be185825a23b3a1f0c7bb950c14029d.tar.gz
coreclr-1971e79c5be185825a23b3a1f0c7bb950c14029d.tar.bz2
coreclr-1971e79c5be185825a23b3a1f0c7bb950c14029d.zip
Merge pull request #15244 from fiigii/vector
Enable Vector128/256<T> and Add intrinsics
-rw-r--r--src/ToolBox/superpmi/superpmi-shared/icorjitinfoimpl.h4
-rw-r--r--src/ToolBox/superpmi/superpmi-shared/lwmlist.h1
-rw-r--r--src/ToolBox/superpmi/superpmi-shared/methodcontext.cpp23
-rw-r--r--src/ToolBox/superpmi/superpmi-shared/methodcontext.h7
-rw-r--r--src/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp10
-rw-r--r--src/ToolBox/superpmi/superpmi-shim-counter/icorjitinfo.cpp8
-rw-r--r--src/ToolBox/superpmi/superpmi-shim-simple/icorjitinfo.cpp7
-rw-r--r--src/ToolBox/superpmi/superpmi/icorjitinfo.cpp8
-rw-r--r--src/inc/corinfo.h17
-rw-r--r--src/jit/ICorJitInfo_API_names.h1
-rw-r--r--src/jit/ICorJitInfo_API_wrapper.hpp9
-rw-r--r--src/jit/codegenlinear.h2
-rw-r--r--src/jit/compiler.cpp22
-rw-r--r--src/jit/compiler.h27
-rw-r--r--src/jit/emitxarch.cpp18
-rw-r--r--src/jit/emitxarch.h4
-rw-r--r--src/jit/gentree.cpp6
-rw-r--r--src/jit/gentree.h22
-rw-r--r--src/jit/hwintrinsiccodegenxarch.cpp177
-rw-r--r--src/jit/hwintrinsiclistxarch.h4
-rw-r--r--src/jit/hwintrinsicxarch.cpp193
-rw-r--r--src/jit/importer.cpp23
-rw-r--r--src/jit/lowerxarch.cpp2
-rw-r--r--src/jit/morph.cpp2
-rw-r--r--src/jit/simd.cpp651
-rw-r--r--src/vm/jitinterface.cpp43
-rw-r--r--src/vm/jitinterface.h6
-rw-r--r--src/zap/zapinfo.cpp5
-rw-r--r--src/zap/zapinfo.h1
-rw-r--r--tests/src/JIT/HardwareIntrinsics/Add.cs428
-rw-r--r--tests/src/JIT/HardwareIntrinsics/Add_r.csproj34
-rw-r--r--tests/src/JIT/HardwareIntrinsics/Add_ro.csproj34
32 files changed, 1567 insertions, 232 deletions
diff --git a/src/ToolBox/superpmi/superpmi-shared/icorjitinfoimpl.h b/src/ToolBox/superpmi/superpmi-shared/icorjitinfoimpl.h
index e0a5886e76..61d1785b51 100644
--- a/src/ToolBox/superpmi/superpmi-shared/icorjitinfoimpl.h
+++ b/src/ToolBox/superpmi/superpmi-shared/icorjitinfoimpl.h
@@ -430,6 +430,10 @@ CORINFO_CLASS_HANDLE getBuiltinClass(CorInfoClassId classId);
// "System.Int32" ==> CORINFO_TYPE_INT..
CorInfoType getTypeForPrimitiveValueClass(CORINFO_CLASS_HANDLE cls);
+// "System.Int32" ==> CORINFO_TYPE_INT..
+// "System.UInt32" ==> CORINFO_TYPE_UINT..
+CorInfoType getTypeForPrimitiveNumericClass(CORINFO_CLASS_HANDLE cls);
+
// TRUE if child is a subtype of parent
// if parent is an interface, then does child implement / extend parent
BOOL canCast(CORINFO_CLASS_HANDLE child, // subtype (extends parent)
diff --git a/src/ToolBox/superpmi/superpmi-shared/lwmlist.h b/src/ToolBox/superpmi/superpmi-shared/lwmlist.h
index 467d28ac55..6a8b77e246 100644
--- a/src/ToolBox/superpmi/superpmi-shared/lwmlist.h
+++ b/src/ToolBox/superpmi/superpmi-shared/lwmlist.h
@@ -126,6 +126,7 @@ LWM(GetThreadTLSIndex, DWORD, DLD)
LWM(GetTokenTypeAsHandle, GetTokenTypeAsHandleValue, DWORDLONG)
LWM(GetTypeForBox, DWORDLONG, DWORDLONG)
LWM(GetTypeForPrimitiveValueClass, DWORDLONG, DWORD)
+LWM(GetTypeForPrimitiveNumericClass, DWORDLONG, DWORD)
LWM(GetUnboxedEntry, DWORDLONG, DLD);
LWM(GetUnBoxHelper, DWORDLONG, DWORD)
LWM(GetUnmanagedCallConv, DWORDLONG, DWORD)
diff --git a/src/ToolBox/superpmi/superpmi-shared/methodcontext.cpp b/src/ToolBox/superpmi/superpmi-shared/methodcontext.cpp
index 601f73833e..4a16baf5b7 100644
--- a/src/ToolBox/superpmi/superpmi-shared/methodcontext.cpp
+++ b/src/ToolBox/superpmi/superpmi-shared/methodcontext.cpp
@@ -1916,6 +1916,29 @@ CorInfoType MethodContext::repGetTypeForPrimitiveValueClass(CORINFO_CLASS_HANDLE
return result;
}
+void MethodContext::recGetTypeForPrimitiveNumericClass(CORINFO_CLASS_HANDLE cls, CorInfoType result)
+{
+ if (GetTypeForPrimitiveNumericClass == nullptr)
+ GetTypeForPrimitiveNumericClass = new LightWeightMap<DWORDLONG, DWORD>();
+
+ GetTypeForPrimitiveNumericClass->Add((DWORDLONG)cls, result);
+ DEBUG_REC(dmpGetTypeForPrimitiveNumericClass((DWORDLONG)cls, (DWORD)result));
+}
+void MethodContext::dmpGetTypeForPrimitiveNumericClass(DWORDLONG key, DWORD value)
+{
+ printf("GetTypeForPrimitiveNumericClass key cls-%016llX, value cit-%u(%s)", key, value, toString((CorInfoType)value));
+}
+CorInfoType MethodContext::repGetTypeForPrimitiveNumericClass(CORINFO_CLASS_HANDLE cls)
+{
+ AssertCodeMsg(GetTypeForPrimitiveNumericClass != nullptr, EXCEPTIONCODE_MC,
+ "Encountered an empty LWM while looking for %016llX", (DWORDLONG)cls);
+ AssertCodeMsg(GetTypeForPrimitiveNumericClass->GetIndex((DWORDLONG)cls) != -1, EXCEPTIONCODE_MC,
+ "Didn't find %016llX", (DWORDLONG)cls);
+ CorInfoType result = (CorInfoType)GetTypeForPrimitiveNumericClass->Get((DWORDLONG)cls);
+ DEBUG_REP(dmpGetTypeForPrimitiveNumericClass((DWORDLONG)cls, (DWORD)result));
+ return result;
+}
+
void MethodContext::recGetParentType(CORINFO_CLASS_HANDLE cls, CORINFO_CLASS_HANDLE result)
{
if (GetParentType == nullptr)
diff --git a/src/ToolBox/superpmi/superpmi-shared/methodcontext.h b/src/ToolBox/superpmi/superpmi-shared/methodcontext.h
index 1174f070c7..abcfd4926e 100644
--- a/src/ToolBox/superpmi/superpmi-shared/methodcontext.h
+++ b/src/ToolBox/superpmi/superpmi-shared/methodcontext.h
@@ -728,6 +728,10 @@ public:
void dmpGetTypeForPrimitiveValueClass(DWORDLONG key, DWORD value);
CorInfoType repGetTypeForPrimitiveValueClass(CORINFO_CLASS_HANDLE cls);
+ void recGetTypeForPrimitiveNumericClass(CORINFO_CLASS_HANDLE cls, CorInfoType result);
+ void dmpGetTypeForPrimitiveNumericClass(DWORDLONG key, DWORD value);
+ CorInfoType repGetTypeForPrimitiveNumericClass(CORINFO_CLASS_HANDLE cls);
+
void recGetParentType(CORINFO_CLASS_HANDLE cls, CORINFO_CLASS_HANDLE result);
void dmpGetParentType(DWORDLONG key, DWORDLONG value);
CORINFO_CLASS_HANDLE repGetParentType(CORINFO_CLASS_HANDLE cls);
@@ -1283,7 +1287,7 @@ private:
};
// ********************* Please keep this up-to-date to ease adding more ***************
-// Highest packet number: 167
+// Highest packet number: 168
// *************************************************************************************
enum mcPackets
{
@@ -1400,6 +1404,7 @@ enum mcPackets
Packet_GetTokenTypeAsHandle = 89,
Packet_GetTypeForBox = 90,
Packet_GetTypeForPrimitiveValueClass = 91,
+ Packet_GetTypeForPrimitiveNumericClass = 168, // Added 12/7/2017
Packet_GetUnboxedEntry = 165, // Added 10/26/17
Packet_GetUnBoxHelper = 92,
Packet_GetReadyToRunHelper = 150, // Added 10/10/2014
diff --git a/src/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp b/src/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp
index b6bc4fa8fb..d63a8acb90 100644
--- a/src/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp
+++ b/src/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp
@@ -906,6 +906,16 @@ CorInfoType interceptor_ICJI::getTypeForPrimitiveValueClass(CORINFO_CLASS_HANDLE
return temp;
}
+// "System.Int32" ==> CORINFO_TYPE_INT..
+// "System.UInt32" ==> CORINFO_TYPE_UINT..
+CorInfoType interceptor_ICJI::getTypeForPrimitiveNumericClass(CORINFO_CLASS_HANDLE cls)
+{
+ mc->cr->AddCall("getTypeForPrimitiveNumericClass");
+ CorInfoType temp = original_ICorJitInfo->getTypeForPrimitiveNumericClass(cls);
+ mc->recGetTypeForPrimitiveNumericClass(cls, temp);
+ return temp;
+}
+
// TRUE if child is a subtype of parent
// if parent is an interface, then does child implement / extend parent
BOOL interceptor_ICJI::canCast(CORINFO_CLASS_HANDLE child, // subtype (extends parent)
diff --git a/src/ToolBox/superpmi/superpmi-shim-counter/icorjitinfo.cpp b/src/ToolBox/superpmi/superpmi-shim-counter/icorjitinfo.cpp
index 91d6b3388d..1b18072f41 100644
--- a/src/ToolBox/superpmi/superpmi-shim-counter/icorjitinfo.cpp
+++ b/src/ToolBox/superpmi/superpmi-shim-counter/icorjitinfo.cpp
@@ -704,6 +704,14 @@ CorInfoType interceptor_ICJI::getTypeForPrimitiveValueClass(CORINFO_CLASS_HANDLE
return original_ICorJitInfo->getTypeForPrimitiveValueClass(cls);
}
+// "System.Int32" ==> CORINFO_TYPE_INT..
+// "System.UInt32" ==> CORINFO_TYPE_UINT..
+CorInfoType interceptor_ICJI::getTypeForPrimitiveNumericClass(CORINFO_CLASS_HANDLE cls)
+{
+ mcs->AddCall("getTypeForPrimitiveNumericClass");
+ return original_ICorJitInfo->getTypeForPrimitiveNumericClass(cls);
+}
+
// TRUE if child is a subtype of parent
// if parent is an interface, then does child implement / extend parent
BOOL interceptor_ICJI::canCast(CORINFO_CLASS_HANDLE child, // subtype (extends parent)
diff --git a/src/ToolBox/superpmi/superpmi-shim-simple/icorjitinfo.cpp b/src/ToolBox/superpmi/superpmi-shim-simple/icorjitinfo.cpp
index d23f727fd9..ac7a6d9f30 100644
--- a/src/ToolBox/superpmi/superpmi-shim-simple/icorjitinfo.cpp
+++ b/src/ToolBox/superpmi/superpmi-shim-simple/icorjitinfo.cpp
@@ -629,6 +629,13 @@ CorInfoType interceptor_ICJI::getTypeForPrimitiveValueClass(CORINFO_CLASS_HANDLE
return original_ICorJitInfo->getTypeForPrimitiveValueClass(cls);
}
+// "System.Int32" ==> CORINFO_TYPE_INT..
+// "System.UInt32" ==> CORINFO_TYPE_UINT..
+CorInfoType interceptor_ICJI::getTypeForPrimitiveNumericClass(CORINFO_CLASS_HANDLE cls)
+{
+ return original_ICorJitInfo->getTypeForPrimitiveNumericClass(cls);
+}
+
// TRUE if child is a subtype of parent
// if parent is an interface, then does child implement / extend parent
BOOL interceptor_ICJI::canCast(CORINFO_CLASS_HANDLE child, // subtype (extends parent)
diff --git a/src/ToolBox/superpmi/superpmi/icorjitinfo.cpp b/src/ToolBox/superpmi/superpmi/icorjitinfo.cpp
index 81ff74db71..852b1147e6 100644
--- a/src/ToolBox/superpmi/superpmi/icorjitinfo.cpp
+++ b/src/ToolBox/superpmi/superpmi/icorjitinfo.cpp
@@ -763,6 +763,14 @@ CorInfoType MyICJI::getTypeForPrimitiveValueClass(CORINFO_CLASS_HANDLE cls)
return jitInstance->mc->repGetTypeForPrimitiveValueClass(cls);
}
+// "System.Int32" ==> CORINFO_TYPE_INT..
+// "System.UInt32" ==> CORINFO_TYPE_UINT..
+CorInfoType MyICJI::getTypeForPrimitiveNumericClass(CORINFO_CLASS_HANDLE cls)
+{
+ jitInstance->mc->cr->AddCall("getTypeForPrimitiveNumericClass");
+ return jitInstance->mc->repGetTypeForPrimitiveNumericClass(cls);
+}
+
// TRUE if child is a subtype of parent
// if parent is an interface, then does child implement / extend parent
BOOL MyICJI::canCast(CORINFO_CLASS_HANDLE child, // subtype (extends parent)
diff --git a/src/inc/corinfo.h b/src/inc/corinfo.h
index 287f44fc92..b32e18714b 100644
--- a/src/inc/corinfo.h
+++ b/src/inc/corinfo.h
@@ -213,13 +213,14 @@ TODO: Talk about initializing strutures before use
#define SELECTANY extern __declspec(selectany)
#endif
-SELECTANY const GUID JITEEVersionIdentifier = { /* 01c3d216-a404-4290-8278-ac27a4793d31 */
- 0x01c3d216,
- 0xa404,
- 0x4290,
- {0x82, 0x78, 0xac, 0x27, 0xa4, 0x79, 0x3d, 0x31}
+SELECTANY const GUID JITEEVersionIdentifier = { /* 19258069-1777-4691-87DF-DADF8F352875 */
+ 0x19258069,
+ 0x1777,
+ 0x4691,
+ { 0x87, 0xdf, 0xda, 0xdf, 0x8f, 0x35, 0x28, 0x75 }
};
+
//////////////////////////////////////////////////////////////////////////////////////////////////////////
//
// END JITEEVersionIdentifier
@@ -2508,6 +2509,12 @@ public:
CORINFO_CLASS_HANDLE cls
) = 0;
+ // "System.Int32" ==> CORINFO_TYPE_INT..
+ // "System.UInt32" ==> CORINFO_TYPE_UINT..
+ virtual CorInfoType getTypeForPrimitiveNumericClass(
+ CORINFO_CLASS_HANDLE cls
+ ) = 0;
+
// TRUE if child is a subtype of parent
// if parent is an interface, then does child implement / extend parent
virtual BOOL canCast(
diff --git a/src/jit/ICorJitInfo_API_names.h b/src/jit/ICorJitInfo_API_names.h
index 0a8117da59..c9edd544be 100644
--- a/src/jit/ICorJitInfo_API_names.h
+++ b/src/jit/ICorJitInfo_API_names.h
@@ -71,6 +71,7 @@ DEF_CLR_API(initClass)
DEF_CLR_API(classMustBeLoadedBeforeCodeIsRun)
DEF_CLR_API(getBuiltinClass)
DEF_CLR_API(getTypeForPrimitiveValueClass)
+DEF_CLR_API(getTypeForPrimitiveNumericClass)
DEF_CLR_API(canCast)
DEF_CLR_API(areTypesEquivalent)
DEF_CLR_API(mergeClasses)
diff --git a/src/jit/ICorJitInfo_API_wrapper.hpp b/src/jit/ICorJitInfo_API_wrapper.hpp
index f298ea9173..9eaeb5d179 100644
--- a/src/jit/ICorJitInfo_API_wrapper.hpp
+++ b/src/jit/ICorJitInfo_API_wrapper.hpp
@@ -675,6 +675,15 @@ CorInfoType WrapICorJitInfo::getTypeForPrimitiveValueClass(
return temp;
}
+CorInfoType WrapICorJitInfo::getTypeForPrimitiveNumericClass(
+ CORINFO_CLASS_HANDLE cls)
+{
+ API_ENTER(getTypeForPrimitiveNumericClass);
+ CorInfoType temp = wrapHnd->getTypeForPrimitiveNumericClass(cls);
+ API_LEAVE(getTypeForPrimitiveNumericClass);
+ return temp;
+}
+
BOOL WrapICorJitInfo::canCast(
CORINFO_CLASS_HANDLE child,
CORINFO_CLASS_HANDLE parent )
diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h
index ec3252fd17..5804fa8445 100644
--- a/src/jit/codegenlinear.h
+++ b/src/jit/codegenlinear.h
@@ -114,7 +114,7 @@ void genPutArgStkSIMD12(GenTree* treeNode);
#endif // _TARGET_X86_
#endif // FEATURE_SIMD
-#if FEATURE_HW_INTRINSICS
+#if FEATURE_HW_INTRINSICS && defined(_TARGET_XARCH_)
void genHWIntrinsic(GenTreeHWIntrinsic* node);
void genSSEIntrinsic(GenTreeHWIntrinsic* node);
void genSSE2Intrinsic(GenTreeHWIntrinsic* node);
diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp
index aaebe353b5..347062c422 100644
--- a/src/jit/compiler.cpp
+++ b/src/jit/compiler.cpp
@@ -2116,7 +2116,27 @@ void Compiler::compInit(ArenaAllocator* pAlloc, InlineInfo* inlineInfo)
SIMDVector3Handle = nullptr;
SIMDVector4Handle = nullptr;
SIMDVectorHandle = nullptr;
-#endif
+#if FEATURE_HW_INTRINSICS
+ Vector128FloatHandle = nullptr;
+ Vector128DoubleHandle = nullptr;
+ Vector128IntHandle = nullptr;
+ Vector128UShortHandle = nullptr;
+ Vector128UByteHandle = nullptr;
+ Vector128ShortHandle = nullptr;
+ Vector128ByteHandle = nullptr;
+ Vector128LongHandle = nullptr;
+ Vector128UIntHandle = nullptr;
+ Vector256FloatHandle = nullptr;
+ Vector256DoubleHandle = nullptr;
+ Vector256IntHandle = nullptr;
+ Vector256UShortHandle = nullptr;
+ Vector256UByteHandle = nullptr;
+ Vector256ShortHandle = nullptr;
+ Vector256ByteHandle = nullptr;
+ Vector256LongHandle = nullptr;
+ Vector256UIntHandle = nullptr;
+#endif // FEATURE_HW_INTRINSICS
+#endif // FEATURE_SIMD
compUsesThrowHelper = false;
}
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index d17211d356..5b47689fe9 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -2068,6 +2068,7 @@ public:
GenTree* op2,
NamedIntrinsic hwIntrinsicID);
GenTree* gtNewMustThrowException(unsigned helper, var_types type);
+ CORINFO_CLASS_HANDLE gtGetStructHandleForHWSIMD(var_types simdType, var_types simdBaseType);
#endif // FEATURE_HW_INTRINSICS
GenTreePtr gtNewLclLNode(unsigned lnum, var_types type, IL_OFFSETX ILoffs = BAD_IL_OFFSET);
@@ -3025,6 +3026,8 @@ protected:
InstructionSet lookupHWIntrinsicISA(const char* className);
NamedIntrinsic lookupHWIntrinsic(const char* methodName, InstructionSet isa);
InstructionSet isaOfHWIntrinsic(NamedIntrinsic intrinsic);
+ bool isIntrinsicAnIsSupportedPropertyGetter(NamedIntrinsic intrinsic);
+#ifdef _TARGET_XARCH_
GenTree* impX86HWIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
GenTree* impSSEIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
GenTree* impSSE2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
@@ -3041,6 +3044,7 @@ protected:
GenTree* impLZCNTIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
GenTree* impPCLMULQDQIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
GenTree* impPOPCNTIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig);
+#endif // _TARGET_XARCH_
#endif // FEATURE_HW_INTRINSICS
GenTreePtr impArrayAccessIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
CORINFO_SIG_INFO* sig,
@@ -7393,6 +7397,29 @@ private:
CORINFO_CLASS_HANDLE SIMDVector4Handle;
CORINFO_CLASS_HANDLE SIMDVectorHandle;
+#if FEATURE_HW_INTRINSICS
+ CORINFO_CLASS_HANDLE Vector128FloatHandle;
+ CORINFO_CLASS_HANDLE Vector128DoubleHandle;
+ CORINFO_CLASS_HANDLE Vector128IntHandle;
+ CORINFO_CLASS_HANDLE Vector128UShortHandle;
+ CORINFO_CLASS_HANDLE Vector128UByteHandle;
+ CORINFO_CLASS_HANDLE Vector128ShortHandle;
+ CORINFO_CLASS_HANDLE Vector128ByteHandle;
+ CORINFO_CLASS_HANDLE Vector128LongHandle;
+ CORINFO_CLASS_HANDLE Vector128UIntHandle;
+ CORINFO_CLASS_HANDLE Vector128ULongHandle;
+ CORINFO_CLASS_HANDLE Vector256FloatHandle;
+ CORINFO_CLASS_HANDLE Vector256DoubleHandle;
+ CORINFO_CLASS_HANDLE Vector256IntHandle;
+ CORINFO_CLASS_HANDLE Vector256UShortHandle;
+ CORINFO_CLASS_HANDLE Vector256UByteHandle;
+ CORINFO_CLASS_HANDLE Vector256ShortHandle;
+ CORINFO_CLASS_HANDLE Vector256ByteHandle;
+ CORINFO_CLASS_HANDLE Vector256LongHandle;
+ CORINFO_CLASS_HANDLE Vector256UIntHandle;
+ CORINFO_CLASS_HANDLE Vector256ULongHandle;
+#endif
+
// Get the handle for a SIMD type.
CORINFO_CLASS_HANDLE gtGetStructHandleForSIMD(var_types simdType, var_types simdBaseType)
{
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
index b29e65319a..fe46c19886 100644
--- a/src/jit/emitxarch.cpp
+++ b/src/jit/emitxarch.cpp
@@ -4796,6 +4796,24 @@ void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNu
emitAdjustStackDepthPushPop(ins);
}
+#if FEATURE_HW_INTRINSICS
+void emitter::emitIns_SIMD_R_R_R(instruction ins, regNumber reg, regNumber reg1, regNumber reg2, var_types simdtype)
+{
+ if (UseVEXEncoding() && reg1 != reg)
+ {
+ emitIns_R_R_R(ins, emitTypeSize(simdtype), reg, reg1, reg2);
+ }
+ else
+ {
+ if (reg1 != reg)
+ {
+ emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1);
+ }
+ emitIns_R_R(ins, emitTypeSize(simdtype), reg, reg2);
+ }
+}
+#endif
+
/*****************************************************************************
*
* The following add instructions referencing stack-based local variables.
diff --git a/src/jit/emitxarch.h b/src/jit/emitxarch.h
index c0ea1c3b69..746c26b954 100644
--- a/src/jit/emitxarch.h
+++ b/src/jit/emitxarch.h
@@ -423,6 +423,10 @@ void emitIns_R_AX(instruction ins, emitAttr attr, regNumber ireg, regNumber reg,
void emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp);
+#if FEATURE_HW_INTRINSICS
+void emitIns_SIMD_R_R_R(instruction ins, regNumber reg, regNumber reg1, regNumber reg2, var_types simdtype);
+#endif
+
#if FEATURE_STACK_FP_X87
void emitIns_F_F0(instruction ins, unsigned fpreg);
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
index a07fe34d88..e418723831 100644
--- a/src/jit/gentree.cpp
+++ b/src/jit/gentree.cpp
@@ -17000,7 +17000,13 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree)
#ifdef FEATURE_SIMD
case GT_SIMD:
structHnd = gtGetStructHandleForSIMD(tree->gtType, tree->AsSIMD()->gtSIMDBaseType);
+ break;
#endif // FEATURE_SIMD
+#if FEATURE_HW_INTRINSICS
+ case GT_HWIntrinsic:
+ structHnd = gtGetStructHandleForHWSIMD(tree->gtType, tree->AsHWIntrinsic()->gtSIMDBaseType);
+ break;
+#endif
break;
}
}
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
index 432d279883..f30713bed4 100644
--- a/src/jit/gentree.h
+++ b/src/jit/gentree.h
@@ -1584,6 +1584,15 @@ public:
return OperIsSIMD(gtOper);
}
+#if FEATURE_HW_INTRINSICS
+ inline bool OperIsSimdHWIntrinsic() const;
+#else
+ inline bool OperIsSimdHWIntrinsic() const
+ {
+ return false;
+ }
+#endif
+
// This is here for cleaner GT_LONG #ifdefs.
static bool OperIsLong(genTreeOps gtOper)
{
@@ -4171,7 +4180,7 @@ struct GenTreeJitIntrinsic : public GenTreeOp
{
}
- bool isSIMD()
+ bool isSIMD() const
{
return gtSIMDSize != 0;
}
@@ -4235,6 +4244,17 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic
}
#endif
};
+
+inline bool GenTree::OperIsSimdHWIntrinsic() const
+{
+ if (gtOper == GT_HWIntrinsic)
+ {
+ // We cannot use AsHWIntrinsic() as it is not declared const
+ const GenTreeHWIntrinsic* hwIntrinsic = reinterpret_cast<const GenTreeHWIntrinsic*>(this);
+ return hwIntrinsic->isSIMD();
+ }
+ return false;
+}
#endif // FEATURE_HW_INTRINSICS
/* gtIndex -- array access */
diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp
index 763647e7bf..52c06191df 100644
--- a/src/jit/hwintrinsiccodegenxarch.cpp
+++ b/src/jit/hwintrinsiccodegenxarch.cpp
@@ -83,12 +83,90 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
{
- NYI("Implement SSE intrinsic code generation");
+ NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
+ GenTree* op1 = node->gtGetOp1();
+ GenTree* op2 = node->gtGetOp2();
+ regNumber targetReg = node->gtRegNum;
+ var_types targetType = node->TypeGet();
+ var_types baseType = node->gtSIMDBaseType;
+
+ regNumber op1Reg = op1->gtRegNum;
+ regNumber op2Reg = REG_NA;
+ emitter* emit = getEmitter();
+
+ genConsumeOperands(node);
+
+ switch (intrinsicID)
+ {
+ case NI_SSE_Add:
+ assert(baseType == TYP_FLOAT);
+ op2Reg = op2->gtRegNum;
+ emit->emitIns_SIMD_R_R_R(INS_addps, targetReg, op1Reg, op2Reg, TYP_SIMD16);
+ break;
+ default:
+ unreached();
+ break;
+ }
+ genProduceReg(node);
}
void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
{
- NYI("Implement SSE2 intrinsic code generation");
+ NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
+ GenTree* op1 = node->gtGetOp1();
+ GenTree* op2 = node->gtGetOp2();
+ regNumber targetReg = node->gtRegNum;
+ var_types targetType = node->TypeGet();
+ var_types baseType = node->gtSIMDBaseType;
+
+ regNumber op1Reg = op1->gtRegNum;
+ regNumber op2Reg = REG_NA;
+ emitter* emit = getEmitter();
+
+ genConsumeOperands(node);
+
+ switch (intrinsicID)
+ {
+ case NI_SSE2_Add:
+ {
+ op2Reg = op2->gtRegNum;
+
+ instruction ins;
+ switch (baseType)
+ {
+ case TYP_DOUBLE:
+ ins = INS_addpd;
+ break;
+ case TYP_INT:
+ case TYP_UINT:
+ ins = INS_paddd;
+ break;
+ case TYP_LONG:
+ case TYP_ULONG:
+ ins = INS_paddq;
+ break;
+ case TYP_BYTE:
+ case TYP_UBYTE:
+ ins = INS_paddb;
+ break;
+ case TYP_CHAR:
+ case TYP_SHORT:
+ case TYP_USHORT:
+ ins = INS_paddw;
+ break;
+ default:
+ unreached();
+ break;
+ }
+
+ emit->emitIns_SIMD_R_R_R(ins, targetReg, op1Reg, op2Reg, TYP_SIMD16);
+ break;
+ }
+ default:
+ unreached();
+ break;
+ }
+ genProduceReg(node);
}
void CodeGen::genSSE3Intrinsic(GenTreeHWIntrinsic* node)
@@ -150,12 +228,103 @@ void CodeGen::genSSE42Intrinsic(GenTreeHWIntrinsic* node)
void CodeGen::genAVXIntrinsic(GenTreeHWIntrinsic* node)
{
- NYI("Implement AVX intrinsic code generation");
+ NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
+ GenTree* op1 = node->gtGetOp1();
+ GenTree* op2 = node->gtGetOp2();
+ regNumber targetReg = node->gtRegNum;
+ var_types targetType = node->TypeGet();
+ var_types baseType = node->gtSIMDBaseType;
+
+ regNumber op1Reg = op1->gtRegNum;
+ regNumber op2Reg = REG_NA;
+
+ genConsumeOperands(node);
+
+ emitter* emit = getEmitter();
+ switch (intrinsicID)
+ {
+ case NI_AVX_Add:
+ {
+ op2Reg = op2->gtRegNum;
+
+ instruction ins;
+ switch (baseType)
+ {
+ case TYP_DOUBLE:
+ ins = INS_addpd;
+ break;
+ case TYP_FLOAT:
+ ins = INS_addps;
+ break;
+ default:
+ unreached();
+ break;
+ }
+
+ emit->emitIns_R_R_R(ins, emitTypeSize(TYP_SIMD32), targetReg, op1Reg, op2Reg);
+ break;
+ }
+ default:
+ unreached();
+ break;
+ }
+ genProduceReg(node);
}
void CodeGen::genAVX2Intrinsic(GenTreeHWIntrinsic* node)
{
- NYI("Implement AVX2 intrinsic code generation");
+ NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
+ GenTree* op1 = node->gtGetOp1();
+ GenTree* op2 = node->gtGetOp2();
+ regNumber targetReg = node->gtRegNum;
+ var_types targetType = node->TypeGet();
+ var_types baseType = node->gtSIMDBaseType;
+
+ regNumber op1Reg = op1->gtRegNum;
+ regNumber op2Reg = REG_NA;
+
+ genConsumeOperands(node);
+
+ emitter* emit = getEmitter();
+ switch (intrinsicID)
+ {
+ case NI_AVX2_Add:
+ {
+ op2Reg = op2->gtRegNum;
+
+ instruction ins;
+ switch (baseType)
+ {
+ case TYP_INT:
+ case TYP_UINT:
+ ins = INS_paddd;
+ break;
+ case TYP_LONG:
+ case TYP_ULONG:
+ ins = INS_paddq;
+ break;
+ case TYP_BYTE:
+ case TYP_UBYTE:
+ ins = INS_paddb;
+ break;
+ case TYP_CHAR:
+ case TYP_SHORT:
+ case TYP_USHORT:
+ ins = INS_paddw;
+ break;
+ default:
+ unreached();
+ break;
+ }
+
+ emit->emitIns_R_R_R(ins, emitTypeSize(TYP_SIMD32), targetReg, op1Reg, op2Reg);
+ break;
+ }
+ default:
+ unreached();
+ break;
+ }
+ genProduceReg(node);
}
void CodeGen::genAESIntrinsic(GenTreeHWIntrinsic* node)
diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h
index 7db3e5c78f..e834f7a019 100644
--- a/src/jit/hwintrinsiclistxarch.h
+++ b/src/jit/hwintrinsiclistxarch.h
@@ -14,9 +14,11 @@
// Intrinsic ID Function name ISA
// SSE Intrinsics
HARDWARE_INTRINSIC(SSE_IsSupported, "get_IsSupported", SSE)
+HARDWARE_INTRINSIC(SSE_Add, "Add", SSE)
// SSE2 Intrinsics
HARDWARE_INTRINSIC(SSE2_IsSupported, "get_IsSupported", SSE2)
+HARDWARE_INTRINSIC(SSE2_Add, "Add", SSE2)
// SSE3 Intrinsics
HARDWARE_INTRINSIC(SSE3_IsSupported, "get_IsSupported", SSE3)
@@ -33,9 +35,11 @@ HARDWARE_INTRINSIC(SSE42_Crc32, "Crc32",
// AVX Intrinsics
HARDWARE_INTRINSIC(AVX_IsSupported, "get_IsSupported", AVX)
+HARDWARE_INTRINSIC(AVX_Add, "Add", AVX)
// AVX2 Intrinsics
HARDWARE_INTRINSIC(AVX2_IsSupported, "get_IsSupported", AVX2)
+HARDWARE_INTRINSIC(AVX2_Add, "Add", AVX2)
// AES Intrinsics
HARDWARE_INTRINSIC(AES_IsSupported, "get_IsSupported", AES)
diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp
index 15888c98c8..d76ce77a2f 100644
--- a/src/jit/hwintrinsicxarch.cpp
+++ b/src/jit/hwintrinsicxarch.cpp
@@ -153,6 +153,40 @@ InstructionSet Compiler::isaOfHWIntrinsic(NamedIntrinsic intrinsic)
}
//------------------------------------------------------------------------
+// isIntrinsicAnIsSupportedPropertyGetter: return true if the intrinsic is "get_IsSupported"
+//
+// Arguments:
+// intrinsic -- id of the intrinsic function.
+//
+// Return Value:
+// true if the intrinsic is "get_IsSupported"
+// Sometimes we need to specially treat "get_IsSupported"
+bool Compiler::isIntrinsicAnIsSupportedPropertyGetter(NamedIntrinsic intrinsic)
+{
+ switch (intrinsic)
+ {
+ case NI_SSE_IsSupported:
+ case NI_SSE2_IsSupported:
+ case NI_SSE3_IsSupported:
+ case NI_SSSE3_IsSupported:
+ case NI_SSE41_IsSupported:
+ case NI_SSE42_IsSupported:
+ case NI_AVX_IsSupported:
+ case NI_AVX2_IsSupported:
+ case NI_AES_IsSupported:
+ case NI_BMI1_IsSupported:
+ case NI_BMI2_IsSupported:
+ case NI_FMA_IsSupported:
+ case NI_LZCNT_IsSupported:
+ case NI_PCLMULQDQ_IsSupported:
+ case NI_POPCNT_IsSupported:
+ return true;
+ default:
+ return false;
+ }
+}
+
+//------------------------------------------------------------------------
// impX86HWIntrinsic: dispatch hardware intrinsics to their own implementation
// function
//
@@ -167,7 +201,12 @@ InstructionSet Compiler::isaOfHWIntrinsic(NamedIntrinsic intrinsic)
GenTree* Compiler::impX86HWIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig)
{
InstructionSet isa = isaOfHWIntrinsic(intrinsic);
- if (!compSupports(isa) && strcmp("get_IsSupported", getHWIntrinsicName(intrinsic)) != 0)
+ // Will throw PlatformNotSupportedException if
+ // - calling hardware intrinsics on unsupported hardware
+ // - calling SIMD hardware intrinsics with featureSIMD=false
+ if ((!compSupports(isa) || (!featureSIMD && isa != InstructionSet_BMI1 && isa != InstructionSet_BMI2 &&
+ isa != InstructionSet_LZCNT && isa != InstructionSet_POPCNT)) &&
+ !isIntrinsicAnIsSupportedPropertyGetter(intrinsic))
{
for (unsigned i = 0; i < sig->numArgs; i++)
{
@@ -213,28 +252,120 @@ GenTree* Compiler::impX86HWIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HA
}
}
+CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleForHWSIMD(var_types simdType, var_types simdBaseType)
+{
+ if (simdType == TYP_SIMD16)
+ {
+ switch (simdBaseType)
+ {
+ case TYP_FLOAT:
+ return Vector128FloatHandle;
+ case TYP_DOUBLE:
+ return Vector128DoubleHandle;
+ case TYP_INT:
+ return Vector128IntHandle;
+ case TYP_CHAR:
+ case TYP_USHORT:
+ return Vector128UShortHandle;
+ case TYP_UBYTE:
+ return Vector128UByteHandle;
+ case TYP_SHORT:
+ return Vector128ShortHandle;
+ case TYP_BYTE:
+ return Vector128ByteHandle;
+ case TYP_LONG:
+ return Vector128LongHandle;
+ case TYP_UINT:
+ return Vector128UIntHandle;
+ case TYP_ULONG:
+ return Vector128ULongHandle;
+ default:
+ assert(!"Didn't find a class handle for simdType");
+ }
+ }
+ else if (simdType == TYP_SIMD32)
+ {
+ switch (simdBaseType)
+ {
+ case TYP_FLOAT:
+ return Vector256FloatHandle;
+ case TYP_DOUBLE:
+ return Vector256DoubleHandle;
+ case TYP_INT:
+ return Vector256IntHandle;
+ case TYP_CHAR:
+ case TYP_USHORT:
+ return Vector256UShortHandle;
+ case TYP_UBYTE:
+ return Vector256UByteHandle;
+ case TYP_SHORT:
+ return Vector256ShortHandle;
+ case TYP_BYTE:
+ return Vector256ByteHandle;
+ case TYP_LONG:
+ return Vector256LongHandle;
+ case TYP_UINT:
+ return Vector256UIntHandle;
+ case TYP_ULONG:
+ return Vector256ULongHandle;
+ default:
+ assert(!"Didn't find a class handle for simdType");
+ }
+ }
+
+ return NO_CLASS_HANDLE;
+}
+
GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig)
{
+ GenTree* retNode = nullptr;
+ GenTree* op1 = nullptr;
+ GenTree* op2 = nullptr;
switch (intrinsic)
{
case NI_SSE_IsSupported:
- return gtNewIconNode(compSupports(InstructionSet_SSE));
+ retNode = gtNewIconNode(featureSIMD && compSupports(InstructionSet_SSE));
+ break;
+
+ case NI_SSE_Add:
+ assert(sig->numArgs == 2);
+ op2 = impSIMDPopStack(TYP_SIMD16);
+ op1 = impSIMDPopStack(TYP_SIMD16);
+ retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, NI_SSE_Add, TYP_FLOAT, 16);
+ break;
default:
- return nullptr;
+ JITDUMP("Not implemented hardware intrinsic");
+ break;
}
+ return retNode;
}
GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig)
{
+ GenTree* retNode = nullptr;
+ GenTree* op1 = nullptr;
+ GenTree* op2 = nullptr;
+ var_types baseType = TYP_UNKNOWN;
switch (intrinsic)
{
case NI_SSE2_IsSupported:
- return gtNewIconNode(compSupports(InstructionSet_SSE2));
+ retNode = gtNewIconNode(featureSIMD && compSupports(InstructionSet_SSE2));
+ break;
+
+ case NI_SSE2_Add:
+ assert(sig->numArgs == 2);
+ op2 = impSIMDPopStack(TYP_SIMD16);
+ op1 = impSIMDPopStack(TYP_SIMD16);
+ baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
+ retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, NI_SSE2_Add, baseType, 16);
+ break;
default:
- return nullptr;
+ JITDUMP("Not implemented hardware intrinsic");
+ break;
}
+ return retNode;
}
GenTree* Compiler::impSSE3Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig)
@@ -242,7 +373,7 @@ GenTree* Compiler::impSSE3Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAN
switch (intrinsic)
{
case NI_SSE3_IsSupported:
- return gtNewIconNode(compSupports(InstructionSet_SSE3));
+ return gtNewIconNode(featureSIMD && compSupports(InstructionSet_SSE3));
default:
return nullptr;
@@ -254,7 +385,7 @@ GenTree* Compiler::impSSSE3Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HA
switch (intrinsic)
{
case NI_SSSE3_IsSupported:
- return gtNewIconNode(compSupports(InstructionSet_SSSE3));
+ return gtNewIconNode(featureSIMD && compSupports(InstructionSet_SSSE3));
default:
return nullptr;
@@ -266,7 +397,7 @@ GenTree* Compiler::impSSE41Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HA
switch (intrinsic)
{
case NI_SSE41_IsSupported:
- return gtNewIconNode(compSupports(InstructionSet_SSE41));
+ return gtNewIconNode(featureSIMD && compSupports(InstructionSet_SSE41));
default:
return nullptr;
@@ -286,7 +417,7 @@ GenTree* Compiler::impSSE42Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HA
switch (intrinsic)
{
case NI_SSE42_IsSupported:
- retNode = gtNewIconNode(compSupports(InstructionSet_SSE42));
+ retNode = gtNewIconNode(featureSIMD && compSupports(InstructionSet_SSE42));
break;
case NI_SSE42_Crc32:
@@ -318,26 +449,56 @@ GenTree* Compiler::impSSE42Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HA
GenTree* Compiler::impAVXIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig)
{
+ GenTree* retNode = nullptr;
+ GenTree* op1 = nullptr;
+ GenTree* op2 = nullptr;
+ var_types baseType = TYP_UNKNOWN;
switch (intrinsic)
{
case NI_AVX_IsSupported:
- return gtNewIconNode(compSupports(InstructionSet_AVX));
+ retNode = gtNewIconNode(featureSIMD && compSupports(InstructionSet_AVX));
+ break;
+
+ case NI_AVX_Add:
+ assert(sig->numArgs == 2);
+ op2 = impSIMDPopStack(TYP_SIMD32);
+ op1 = impSIMDPopStack(TYP_SIMD32);
+ baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
+ retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op1, op2, NI_AVX_Add, baseType, 32);
+ break;
default:
- return nullptr;
+ JITDUMP("Not implemented hardware intrinsic");
+ break;
}
+ return retNode;
}
GenTree* Compiler::impAVX2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig)
{
+ GenTree* retNode = nullptr;
+ GenTree* op1 = nullptr;
+ GenTree* op2 = nullptr;
+ var_types baseType = TYP_UNKNOWN;
switch (intrinsic)
{
case NI_AVX2_IsSupported:
- return gtNewIconNode(compSupports(InstructionSet_AVX2));
+ retNode = gtNewIconNode(featureSIMD && compSupports(InstructionSet_AVX2));
+ break;
+
+ case NI_AVX2_Add:
+ assert(sig->numArgs == 2);
+ op2 = impSIMDPopStack(TYP_SIMD32);
+ op1 = impSIMDPopStack(TYP_SIMD32);
+ baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
+ retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op1, op2, NI_AVX2_Add, baseType, 32);
+ break;
default:
- return nullptr;
+ JITDUMP("Not implemented hardware intrinsic");
+ break;
}
+ return retNode;
}
GenTree* Compiler::impAESIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig)
@@ -345,7 +506,7 @@ GenTree* Compiler::impAESIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAND
switch (intrinsic)
{
case NI_AES_IsSupported:
- return gtNewIconNode(compSupports(InstructionSet_AES));
+ return gtNewIconNode(featureSIMD && compSupports(InstructionSet_AES));
default:
return nullptr;
@@ -381,7 +542,7 @@ GenTree* Compiler::impFMAIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAND
switch (intrinsic)
{
case NI_FMA_IsSupported:
- return gtNewIconNode(compSupports(InstructionSet_FMA));
+ return gtNewIconNode(featureSIMD && compSupports(InstructionSet_FMA));
default:
return nullptr;
@@ -424,7 +585,7 @@ GenTree* Compiler::impPCLMULQDQIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHO
switch (intrinsic)
{
case NI_PCLMULQDQ_IsSupported:
- return gtNewIconNode(compSupports(InstructionSet_PCLMULQDQ));
+ return gtNewIconNode(featureSIMD && compSupports(InstructionSet_PCLMULQDQ));
default:
return nullptr;
diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp
index 95e98c0cc9..539650891f 100644
--- a/src/jit/importer.cpp
+++ b/src/jit/importer.cpp
@@ -1097,14 +1097,16 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr destAddr,
assert(src->gtOper == GT_LCL_VAR || src->gtOper == GT_FIELD || src->gtOper == GT_IND || src->gtOper == GT_OBJ ||
src->gtOper == GT_CALL || src->gtOper == GT_MKREFANY || src->gtOper == GT_RET_EXPR ||
src->gtOper == GT_COMMA || src->gtOper == GT_ADDR ||
- (src->TypeGet() != TYP_STRUCT && (GenTree::OperIsSIMD(src->gtOper) || src->gtOper == GT_LCL_FLD)));
+ (src->TypeGet() != TYP_STRUCT &&
+ (GenTree::OperIsSIMD(src->gtOper) || src->OperIsSimdHWIntrinsic() || src->gtOper == GT_LCL_FLD)));
#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
assert(varTypeIsStruct(src));
assert(src->gtOper == GT_LCL_VAR || src->gtOper == GT_FIELD || src->gtOper == GT_IND || src->gtOper == GT_OBJ ||
src->gtOper == GT_CALL || src->gtOper == GT_MKREFANY || src->gtOper == GT_RET_EXPR ||
src->gtOper == GT_COMMA ||
- (src->TypeGet() != TYP_STRUCT && (GenTree::OperIsSIMD(src->gtOper) || src->gtOper == GT_LCL_FLD)));
+ (src->TypeGet() != TYP_STRUCT &&
+ (GenTree::OperIsSIMD(src->gtOper) || src->OperIsSimdHWIntrinsic() || src->gtOper == GT_LCL_FLD)));
#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
if (destAddr->OperGet() == GT_ADDR)
{
@@ -1610,6 +1612,11 @@ GenTreePtr Compiler::impNormStructVal(GenTreePtr structVal,
assert(varTypeIsSIMD(structVal) && (structVal->gtType == structType));
break;
#endif // FEATURE_SIMD
+#if FEATURE_HW_INTRINSICS
+ case GT_HWIntrinsic:
+ assert(varTypeIsSIMD(structVal) && (structVal->gtType == structType));
+ break;
+#endif
case GT_COMMA:
{
@@ -1644,6 +1651,14 @@ GenTreePtr Compiler::impNormStructVal(GenTreePtr structVal,
}
else
#endif
+#if FEATURE_HW_INTRINSICS
+ if (blockNode->OperGet() == GT_HWIntrinsic && blockNode->AsHWIntrinsic()->isSIMD())
+ {
+ parent->gtOp.gtOp2 = impNormStructVal(blockNode, structHnd, curLevel, forceNormalization);
+ alreadyNormalized = true;
+ }
+ else
+#endif
{
noway_assert(blockNode->OperIsBlk());
@@ -3872,7 +3887,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,
{
assert(retNode == nullptr);
const NamedIntrinsic ni = lookupNamedIntrinsic(method);
-#if FEATURE_HW_INTRINSICS
+#if FEATURE_HW_INTRINSICS && defined(_TARGET_XARCH_)
if (ni > NI_HW_INTRINSIC_START && ni < NI_HW_INTRINSIC_END)
{
return impX86HWIntrinsic(ni, method, sig);
@@ -4100,7 +4115,7 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)
}
}
-#if FEATURE_HW_INTRINSICS
+#if FEATURE_HW_INTRINSICS && defined(_TARGET_XARCH_)
if ((namespaceName != nullptr) && strcmp(namespaceName, "System.Runtime.Intrinsics.X86") == 0)
{
InstructionSet isa = lookupHWIntrinsicISA(className);
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index 970793db95..37388d48f5 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -388,7 +388,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
addr->ClearContained();
}
}
- else if (!source->IsMultiRegCall() && !source->OperIsSIMD())
+ else if (!source->IsMultiRegCall() && !source->OperIsSIMD() && !source->OperIsSimdHWIntrinsic())
{
assert(source->IsLocal());
MakeSrcContained(blkNode, source);
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp
index 68c587beb2..4265289ac7 100644
--- a/src/jit/morph.cpp
+++ b/src/jit/morph.cpp
@@ -10175,7 +10175,7 @@ GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigne
needsIndirection = false;
effectiveVal = indirTree->Addr()->gtGetOp1();
}
- if (effectiveVal->OperIsSIMD())
+ if (effectiveVal->OperIsSIMD() || effectiveVal->OperIsSimdHWIntrinsic())
{
needsIndirection = false;
}
diff --git a/src/jit/simd.cpp b/src/jit/simd.cpp
index 568a8f7084..6019972050 100644
--- a/src/jit/simd.cpp
+++ b/src/jit/simd.cpp
@@ -129,236 +129,493 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u
return TYP_UNKNOWN;
}
-#if FEATURE_HW_INTRINSICS && DEBUG
- if (isIntrinsicType(typeHnd))
- {
- JITDUMP("\nFound Vector Type: %s with base type %s\n", getClassNameFromMetadata(typeHnd, nullptr),
- getClassNameFromMetadata(getTypeInstantiationArgument(typeHnd, 0), nullptr));
- }
-#endif
-
// fast path search using cached type handles of important types
var_types simdBaseType = TYP_UNKNOWN;
unsigned size = 0;
- // Early return if it is not a SIMD module.
- if (!isSIMDClass(typeHnd))
- {
- return TYP_UNKNOWN;
- }
-
- // The most likely to be used type handles are looked up first followed by
- // less likely to be used type handles
- if (typeHnd == SIMDFloatHandle)
- {
- simdBaseType = TYP_FLOAT;
- JITDUMP(" Known type SIMD Vector<Float>\n");
- }
- else if (typeHnd == SIMDIntHandle)
- {
- simdBaseType = TYP_INT;
- JITDUMP(" Known type SIMD Vector<Int>\n");
- }
- else if (typeHnd == SIMDVector2Handle)
- {
- simdBaseType = TYP_FLOAT;
- size = 2 * genTypeSize(TYP_FLOAT);
- assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
- JITDUMP(" Known type Vector2\n");
- }
- else if (typeHnd == SIMDVector3Handle)
- {
- simdBaseType = TYP_FLOAT;
- size = 3 * genTypeSize(TYP_FLOAT);
- assert(size == info.compCompHnd->getClassSize(typeHnd));
- JITDUMP(" Known type Vector3\n");
- }
- else if (typeHnd == SIMDVector4Handle)
- {
- simdBaseType = TYP_FLOAT;
- size = 4 * genTypeSize(TYP_FLOAT);
- assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
- JITDUMP(" Known type Vector4\n");
- }
- else if (typeHnd == SIMDVectorHandle)
- {
- JITDUMP(" Known type Vector\n");
- }
- else if (typeHnd == SIMDUShortHandle)
- {
- simdBaseType = TYP_CHAR;
- JITDUMP(" Known type SIMD Vector<ushort>\n");
- }
- else if (typeHnd == SIMDUByteHandle)
+ // TODO - Optimize SIMD type recognition by IntrinsicAttribute
+ if (isSIMDClass(typeHnd))
{
- simdBaseType = TYP_UBYTE;
- JITDUMP(" Known type SIMD Vector<ubyte>\n");
- }
- else if (typeHnd == SIMDDoubleHandle)
- {
- simdBaseType = TYP_DOUBLE;
- JITDUMP(" Known type SIMD Vector<Double>\n");
- }
- else if (typeHnd == SIMDLongHandle)
- {
- simdBaseType = TYP_LONG;
- JITDUMP(" Known type SIMD Vector<Long>\n");
- }
- else if (typeHnd == SIMDShortHandle)
- {
- simdBaseType = TYP_SHORT;
- JITDUMP(" Known type SIMD Vector<short>\n");
- }
- else if (typeHnd == SIMDByteHandle)
- {
- simdBaseType = TYP_BYTE;
- JITDUMP(" Known type SIMD Vector<byte>\n");
- }
- else if (typeHnd == SIMDUIntHandle)
- {
- simdBaseType = TYP_UINT;
- JITDUMP(" Known type SIMD Vector<uint>\n");
- }
- else if (typeHnd == SIMDULongHandle)
- {
- simdBaseType = TYP_ULONG;
- JITDUMP(" Known type SIMD Vector<ulong>\n");
- }
-
- // slow path search
- if (simdBaseType == TYP_UNKNOWN)
- {
- // Doesn't match with any of the cached type handles.
- // Obtain base type by parsing fully qualified class name.
- //
- // TODO-Throughput: implement product shipping solution to query base type.
- WCHAR className[256] = {0};
- WCHAR* pbuf = &className[0];
- int len = _countof(className);
- info.compCompHnd->appendClassName(&pbuf, &len, typeHnd, TRUE, FALSE, FALSE);
- noway_assert(pbuf < &className[256]);
- JITDUMP("SIMD Candidate Type %S\n", className);
+ // The most likely to be used type handles are looked up first followed by
+ // less likely to be used type handles
+ if (typeHnd == SIMDFloatHandle)
+ {
+ simdBaseType = TYP_FLOAT;
+ JITDUMP(" Known type SIMD Vector<Float>\n");
+ }
+ else if (typeHnd == SIMDIntHandle)
+ {
+ simdBaseType = TYP_INT;
+ JITDUMP(" Known type SIMD Vector<Int>\n");
+ }
+ else if (typeHnd == SIMDVector2Handle)
+ {
+ simdBaseType = TYP_FLOAT;
+ size = 2 * genTypeSize(TYP_FLOAT);
+ assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
+ JITDUMP(" Known type Vector2\n");
+ }
+ else if (typeHnd == SIMDVector3Handle)
+ {
+ simdBaseType = TYP_FLOAT;
+ size = 3 * genTypeSize(TYP_FLOAT);
+ assert(size == info.compCompHnd->getClassSize(typeHnd));
+ JITDUMP(" Known type Vector3\n");
+ }
+ else if (typeHnd == SIMDVector4Handle)
+ {
+ simdBaseType = TYP_FLOAT;
+ size = 4 * genTypeSize(TYP_FLOAT);
+ assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
+ JITDUMP(" Known type Vector4\n");
+ }
+ else if (typeHnd == SIMDVectorHandle)
+ {
+ JITDUMP(" Known type Vector\n");
+ }
+ else if (typeHnd == SIMDUShortHandle)
+ {
+ simdBaseType = TYP_CHAR;
+ JITDUMP(" Known type SIMD Vector<ushort>\n");
+ }
+ else if (typeHnd == SIMDUByteHandle)
+ {
+ simdBaseType = TYP_UBYTE;
+ JITDUMP(" Known type SIMD Vector<ubyte>\n");
+ }
+ else if (typeHnd == SIMDDoubleHandle)
+ {
+ simdBaseType = TYP_DOUBLE;
+ JITDUMP(" Known type SIMD Vector<Double>\n");
+ }
+ else if (typeHnd == SIMDLongHandle)
+ {
+ simdBaseType = TYP_LONG;
+ JITDUMP(" Known type SIMD Vector<Long>\n");
+ }
+ else if (typeHnd == SIMDShortHandle)
+ {
+ simdBaseType = TYP_SHORT;
+ JITDUMP(" Known type SIMD Vector<short>\n");
+ }
+ else if (typeHnd == SIMDByteHandle)
+ {
+ simdBaseType = TYP_BYTE;
+ JITDUMP(" Known type SIMD Vector<byte>\n");
+ }
+ else if (typeHnd == SIMDUIntHandle)
+ {
+ simdBaseType = TYP_UINT;
+ JITDUMP(" Known type SIMD Vector<uint>\n");
+ }
+ else if (typeHnd == SIMDULongHandle)
+ {
+ simdBaseType = TYP_ULONG;
+ JITDUMP(" Known type SIMD Vector<ulong>\n");
+ }
- if (wcsncmp(className, W("System.Numerics."), 16) == 0)
+ // slow path search
+ if (simdBaseType == TYP_UNKNOWN)
{
- if (wcsncmp(&(className[16]), W("Vector`1["), 9) == 0)
+ // Doesn't match with any of the cached type handles.
+ // Obtain base type by parsing fully qualified class name.
+ //
+ // TODO-Throughput: implement product shipping solution to query base type.
+ WCHAR className[256] = {0};
+ WCHAR* pbuf = &className[0];
+ int len = _countof(className);
+ info.compCompHnd->appendClassName(&pbuf, &len, typeHnd, TRUE, FALSE, FALSE);
+ noway_assert(pbuf < &className[256]);
+ JITDUMP("SIMD Candidate Type %S\n", className);
+
+ if (wcsncmp(className, W("System.Numerics."), 16) == 0)
{
- if (wcsncmp(&(className[25]), W("System.Single"), 13) == 0)
- {
- SIMDFloatHandle = typeHnd;
- simdBaseType = TYP_FLOAT;
- JITDUMP(" Found type SIMD Vector<Float>\n");
- }
- else if (wcsncmp(&(className[25]), W("System.Int32"), 12) == 0)
- {
- SIMDIntHandle = typeHnd;
- simdBaseType = TYP_INT;
- JITDUMP(" Found type SIMD Vector<Int>\n");
- }
- else if (wcsncmp(&(className[25]), W("System.UInt16"), 13) == 0)
+ if (wcsncmp(&(className[16]), W("Vector`1["), 9) == 0)
{
- SIMDUShortHandle = typeHnd;
- simdBaseType = TYP_CHAR;
- JITDUMP(" Found type SIMD Vector<ushort>\n");
- }
- else if (wcsncmp(&(className[25]), W("System.Byte"), 11) == 0)
- {
- SIMDUByteHandle = typeHnd;
- simdBaseType = TYP_UBYTE;
- JITDUMP(" Found type SIMD Vector<ubyte>\n");
- }
- else if (wcsncmp(&(className[25]), W("System.Double"), 13) == 0)
- {
- SIMDDoubleHandle = typeHnd;
- simdBaseType = TYP_DOUBLE;
- JITDUMP(" Found type SIMD Vector<Double>\n");
- }
- else if (wcsncmp(&(className[25]), W("System.Int64"), 12) == 0)
- {
- SIMDLongHandle = typeHnd;
- simdBaseType = TYP_LONG;
- JITDUMP(" Found type SIMD Vector<Long>\n");
+ if (wcsncmp(&(className[25]), W("System.Single"), 13) == 0)
+ {
+ SIMDFloatHandle = typeHnd;
+ simdBaseType = TYP_FLOAT;
+ JITDUMP(" Found type SIMD Vector<Float>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.Int32"), 12) == 0)
+ {
+ SIMDIntHandle = typeHnd;
+ simdBaseType = TYP_INT;
+ JITDUMP(" Found type SIMD Vector<Int>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.UInt16"), 13) == 0)
+ {
+ SIMDUShortHandle = typeHnd;
+ simdBaseType = TYP_CHAR;
+ JITDUMP(" Found type SIMD Vector<ushort>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.Byte"), 11) == 0)
+ {
+ SIMDUByteHandle = typeHnd;
+ simdBaseType = TYP_UBYTE;
+ JITDUMP(" Found type SIMD Vector<ubyte>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.Double"), 13) == 0)
+ {
+ SIMDDoubleHandle = typeHnd;
+ simdBaseType = TYP_DOUBLE;
+ JITDUMP(" Found type SIMD Vector<Double>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.Int64"), 12) == 0)
+ {
+ SIMDLongHandle = typeHnd;
+ simdBaseType = TYP_LONG;
+ JITDUMP(" Found type SIMD Vector<Long>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.Int16"), 12) == 0)
+ {
+ SIMDShortHandle = typeHnd;
+ simdBaseType = TYP_SHORT;
+ JITDUMP(" Found type SIMD Vector<short>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.SByte"), 12) == 0)
+ {
+ SIMDByteHandle = typeHnd;
+ simdBaseType = TYP_BYTE;
+ JITDUMP(" Found type SIMD Vector<byte>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.UInt32"), 13) == 0)
+ {
+ SIMDUIntHandle = typeHnd;
+ simdBaseType = TYP_UINT;
+ JITDUMP(" Found type SIMD Vector<uint>\n");
+ }
+ else if (wcsncmp(&(className[25]), W("System.UInt64"), 13) == 0)
+ {
+ SIMDULongHandle = typeHnd;
+ simdBaseType = TYP_ULONG;
+ JITDUMP(" Found type SIMD Vector<ulong>\n");
+ }
+ else
+ {
+ JITDUMP(" Unknown SIMD Vector<T>\n");
+ }
}
- else if (wcsncmp(&(className[25]), W("System.Int16"), 12) == 0)
+ else if (wcsncmp(&(className[16]), W("Vector2"), 8) == 0)
{
- SIMDShortHandle = typeHnd;
- simdBaseType = TYP_SHORT;
- JITDUMP(" Found type SIMD Vector<short>\n");
+ SIMDVector2Handle = typeHnd;
+
+ simdBaseType = TYP_FLOAT;
+ size = 2 * genTypeSize(TYP_FLOAT);
+ assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
+ JITDUMP(" Found Vector2\n");
}
- else if (wcsncmp(&(className[25]), W("System.SByte"), 12) == 0)
+ else if (wcsncmp(&(className[16]), W("Vector3"), 8) == 0)
{
- SIMDByteHandle = typeHnd;
- simdBaseType = TYP_BYTE;
- JITDUMP(" Found type SIMD Vector<byte>\n");
+ SIMDVector3Handle = typeHnd;
+
+ simdBaseType = TYP_FLOAT;
+ size = 3 * genTypeSize(TYP_FLOAT);
+ assert(size == info.compCompHnd->getClassSize(typeHnd));
+ JITDUMP(" Found Vector3\n");
}
- else if (wcsncmp(&(className[25]), W("System.UInt32"), 13) == 0)
+ else if (wcsncmp(&(className[16]), W("Vector4"), 8) == 0)
{
- SIMDUIntHandle = typeHnd;
- simdBaseType = TYP_UINT;
- JITDUMP(" Found type SIMD Vector<uint>\n");
+ SIMDVector4Handle = typeHnd;
+
+ simdBaseType = TYP_FLOAT;
+ size = 4 * genTypeSize(TYP_FLOAT);
+ assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
+ JITDUMP(" Found Vector4\n");
}
- else if (wcsncmp(&(className[25]), W("System.UInt64"), 13) == 0)
+ else if (wcsncmp(&(className[16]), W("Vector"), 6) == 0)
{
- SIMDULongHandle = typeHnd;
- simdBaseType = TYP_ULONG;
- JITDUMP(" Found type SIMD Vector<ulong>\n");
+ SIMDVectorHandle = typeHnd;
+ JITDUMP(" Found type Vector\n");
}
else
{
- JITDUMP(" Unknown SIMD Vector<T>\n");
+ JITDUMP(" Unknown SIMD Type\n");
}
}
- else if (wcsncmp(&(className[16]), W("Vector2"), 8) == 0)
+ }
+ if (simdBaseType != TYP_UNKNOWN && sizeBytes != nullptr)
+ {
+ // If not a fixed size vector then its size is same as SIMD vector
+ // register length in bytes
+ if (size == 0)
{
- SIMDVector2Handle = typeHnd;
-
- simdBaseType = TYP_FLOAT;
- size = 2 * genTypeSize(TYP_FLOAT);
- assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
- JITDUMP(" Found Vector2\n");
+ size = getSIMDVectorRegisterByteLength();
}
- else if (wcsncmp(&(className[16]), W("Vector3"), 8) == 0)
- {
- SIMDVector3Handle = typeHnd;
- simdBaseType = TYP_FLOAT;
- size = 3 * genTypeSize(TYP_FLOAT);
- assert(size == info.compCompHnd->getClassSize(typeHnd));
- JITDUMP(" Found Vector3\n");
- }
- else if (wcsncmp(&(className[16]), W("Vector4"), 8) == 0)
- {
- SIMDVector4Handle = typeHnd;
+ *sizeBytes = size;
+ setUsesSIMDTypes(true);
+ }
+ }
+#if FEATURE_HW_INTRINSICS
+ else if (isIntrinsicType(typeHnd))
+ {
+ if (typeHnd == Vector256FloatHandle)
+ {
+ simdBaseType = TYP_FLOAT;
+ size = YMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector256<float>\n");
+ }
+ else if (typeHnd == Vector256DoubleHandle)
+ {
+ simdBaseType = TYP_DOUBLE;
+ size = YMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector256<double>\n");
+ }
+ else if (typeHnd == Vector256IntHandle)
+ {
+ simdBaseType = TYP_INT;
+ size = YMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector256<int>\n");
+ }
+ else if (typeHnd == Vector256UIntHandle)
+ {
+ simdBaseType = TYP_UINT;
+ size = YMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector256<uint>\n");
+ }
+ else if (typeHnd == Vector256ShortHandle)
+ {
+ simdBaseType = TYP_SHORT;
+ size = YMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector256<short>\n");
+ }
+ else if (typeHnd == Vector256UShortHandle)
+ {
+ simdBaseType = TYP_CHAR; // TODO TYP_USHORT;
+ size = YMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector256<ushort>\n");
+ }
+ else if (typeHnd == Vector256ByteHandle)
+ {
+ simdBaseType = TYP_BYTE;
+ size = YMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector256<sbyte>\n");
+ }
+ else if (typeHnd == Vector256UByteHandle)
+ {
+ simdBaseType = TYP_UBYTE;
+ size = YMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector256<byte>\n");
+ }
+ else if (typeHnd == Vector256LongHandle)
+ {
+ simdBaseType = TYP_LONG;
+ size = YMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector256<long>\n");
+ }
+ else if (typeHnd == Vector256ULongHandle)
+ {
+ simdBaseType = TYP_ULONG;
+ size = YMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector256<ulong>\n");
+ }
+ else if (typeHnd == Vector256FloatHandle)
+ {
+ simdBaseType = TYP_FLOAT;
+ size = YMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector128<float>\n");
+ }
+ else if (typeHnd == Vector128DoubleHandle)
+ {
+ simdBaseType = TYP_DOUBLE;
+ size = XMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector128<double>\n");
+ }
+ else if (typeHnd == Vector128IntHandle)
+ {
+ simdBaseType = TYP_INT;
+ size = XMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector128<int>\n");
+ }
+ else if (typeHnd == Vector128UIntHandle)
+ {
+ simdBaseType = TYP_UINT;
+ size = XMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector128<uint>\n");
+ }
+ else if (typeHnd == Vector128ShortHandle)
+ {
+ simdBaseType = TYP_SHORT;
+ size = XMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector128<short>\n");
+ }
+ else if (typeHnd == Vector128UShortHandle)
+ {
+ simdBaseType = TYP_CHAR; // TODO TYP_USHORT;
+ size = XMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector128<ushort>\n");
+ }
+ else if (typeHnd == Vector128ByteHandle)
+ {
+ simdBaseType = TYP_BYTE;
+ size = XMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector128<sbyte>\n");
+ }
+ else if (typeHnd == Vector128UByteHandle)
+ {
+ simdBaseType = TYP_UBYTE;
+ size = XMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector128<byte>\n");
+ }
+ else if (typeHnd == Vector128LongHandle)
+ {
+ simdBaseType = TYP_LONG;
+ size = XMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector128<long>\n");
+ }
+ else if (typeHnd == Vector128ULongHandle)
+ {
+ simdBaseType = TYP_ULONG;
+ size = XMM_REGSIZE_BYTES;
+ JITDUMP(" Known type Vector128<ulong>\n");
+ }
- simdBaseType = TYP_FLOAT;
- size = 4 * genTypeSize(TYP_FLOAT);
- assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
- JITDUMP(" Found Vector4\n");
- }
- else if (wcsncmp(&(className[16]), W("Vector"), 6) == 0)
- {
- SIMDVectorHandle = typeHnd;
- JITDUMP(" Found type Vector\n");
- }
- else
+ // slow path search
+ if (simdBaseType == TYP_UNKNOWN)
+ {
+ // Doesn't match with any of the cached type handles.
+ const char* className = getClassNameFromMetadata(typeHnd, nullptr);
+ CORINFO_CLASS_HANDLE baseTypeHnd = getTypeInstantiationArgument(typeHnd, 0);
+
+ if (baseTypeHnd != nullptr)
{
- JITDUMP(" Unknown SIMD Type\n");
+ CorInfoType type = info.compCompHnd->getTypeForPrimitiveNumericClass(baseTypeHnd);
+
+ JITDUMP("HW Intrinsic SIMD Candidate Type %s with Base Type %s\n", className,
+ getClassNameFromMetadata(baseTypeHnd, nullptr));
+
+ if (strcmp(className, "Vector256`1") == 0)
+ {
+ size = YMM_REGSIZE_BYTES;
+ switch (type)
+ {
+ case CORINFO_TYPE_FLOAT:
+ Vector256FloatHandle = typeHnd;
+ simdBaseType = TYP_FLOAT;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector256<float>\n");
+ break;
+ case CORINFO_TYPE_DOUBLE:
+ Vector256DoubleHandle = typeHnd;
+ simdBaseType = TYP_DOUBLE;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector256<double>\n");
+ break;
+ case CORINFO_TYPE_INT:
+ Vector256IntHandle = typeHnd;
+ simdBaseType = TYP_INT;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector256<int>\n");
+ break;
+ case CORINFO_TYPE_UINT:
+ Vector256UIntHandle = typeHnd;
+ simdBaseType = TYP_UINT;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector256<uint>\n");
+ break;
+ case CORINFO_TYPE_SHORT:
+ Vector256ShortHandle = typeHnd;
+ simdBaseType = TYP_SHORT;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector256<short>\n");
+ break;
+ case CORINFO_TYPE_USHORT:
+ Vector256UShortHandle = typeHnd;
+ simdBaseType = TYP_CHAR; // TODO TYP_USHORT;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector256<ushort>\n");
+ break;
+ case CORINFO_TYPE_LONG:
+ Vector256LongHandle = typeHnd;
+ simdBaseType = TYP_LONG;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector256<long>\n");
+ break;
+ case CORINFO_TYPE_ULONG:
+ Vector256ULongHandle = typeHnd;
+ simdBaseType = TYP_ULONG;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector256<ulong>\n");
+ break;
+ case CORINFO_TYPE_UBYTE:
+ Vector256UByteHandle = typeHnd;
+ simdBaseType = TYP_UBYTE;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector256<byte>\n");
+ break;
+ case CORINFO_TYPE_BYTE:
+ Vector256ByteHandle = typeHnd;
+ simdBaseType = TYP_BYTE;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector256<sbyte>\n");
+ break;
+
+ default:
+ JITDUMP(" Unknown Hardware Intrinsic SIMD Type Vector256<T>\n");
+ }
+ }
+ else if (strcmp(className, "Vector128`1") == 0)
+ {
+ size = XMM_REGSIZE_BYTES;
+ switch (type)
+ {
+ case CORINFO_TYPE_FLOAT:
+ Vector128FloatHandle = typeHnd;
+ simdBaseType = TYP_FLOAT;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector128<float>\n");
+ break;
+ case CORINFO_TYPE_DOUBLE:
+ Vector128DoubleHandle = typeHnd;
+ simdBaseType = TYP_DOUBLE;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector128<double>\n");
+ break;
+ case CORINFO_TYPE_INT:
+ Vector128IntHandle = typeHnd;
+ simdBaseType = TYP_INT;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector128<int>\n");
+ break;
+ case CORINFO_TYPE_UINT:
+ Vector128UIntHandle = typeHnd;
+ simdBaseType = TYP_UINT;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector128<uint>\n");
+ break;
+ case CORINFO_TYPE_SHORT:
+ Vector128ShortHandle = typeHnd;
+ simdBaseType = TYP_SHORT;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector128<short>\n");
+ break;
+ case CORINFO_TYPE_USHORT:
+ Vector128UShortHandle = typeHnd;
+ simdBaseType = TYP_CHAR; // TODO TYP_USHORT;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector128<ushort>\n");
+ break;
+ case CORINFO_TYPE_LONG:
+ Vector128LongHandle = typeHnd;
+ simdBaseType = TYP_LONG;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector128<long>\n");
+ break;
+ case CORINFO_TYPE_ULONG:
+ Vector128ULongHandle = typeHnd;
+ simdBaseType = TYP_ULONG;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector128<ulong>\n");
+ break;
+ case CORINFO_TYPE_UBYTE:
+ Vector128UByteHandle = typeHnd;
+ simdBaseType = TYP_UBYTE;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector128<byte>\n");
+ break;
+ case CORINFO_TYPE_BYTE:
+ Vector128ByteHandle = typeHnd;
+ simdBaseType = TYP_BYTE;
+ JITDUMP(" Found type Hardware Intrinsic SIMD Vector128<sbyte>\n");
+ break;
+
+ default:
+ JITDUMP(" Unknown Hardware Intrinsic SIMD Type Vector128<T>\n");
+ }
+ }
}
}
- }
- if (simdBaseType != TYP_UNKNOWN && sizeBytes != nullptr)
- {
- // If not a fixed size vector then its size is same as SIMD vector
- // register length in bytes
- if (size == 0)
+ if (simdBaseType != TYP_UNKNOWN && sizeBytes != nullptr)
{
- size = getSIMDVectorRegisterByteLength();
+ *sizeBytes = size;
+ setUsesSIMDTypes(true);
}
-
- *sizeBytes = size;
- setUsesSIMDTypes(true);
}
+#endif // FEATURE_HW_INTRINSICS
return simdBaseType;
}
diff --git a/src/vm/jitinterface.cpp b/src/vm/jitinterface.cpp
index ccddf9830e..9bd710fb0b 100644
--- a/src/vm/jitinterface.cpp
+++ b/src/vm/jitinterface.cpp
@@ -4503,6 +4503,49 @@ CorInfoType CEEInfo::getTypeForPrimitiveValueClass(
return result;
}
+/*********************************************************************/
+CorInfoType CEEInfo::getTypeForPrimitiveNumericClass(
+ CORINFO_CLASS_HANDLE clsHnd)
+{
+ CONTRACTL {
+ SO_TOLERANT;
+ THROWS;
+ GC_TRIGGERS;
+ MODE_PREEMPTIVE;
+ } CONTRACTL_END;
+
+ CorInfoType result = CORINFO_TYPE_UNDEF;
+
+ JIT_TO_EE_TRANSITION_LEAF();
+
+ TypeHandle th(clsHnd);
+ _ASSERTE (!th.IsGenericVariable());
+
+ CorElementType ty = th.GetSignatureCorElementType();
+ switch (ty)
+ {
+ case ELEMENT_TYPE_I1:
+ case ELEMENT_TYPE_U1:
+ case ELEMENT_TYPE_I2:
+ case ELEMENT_TYPE_U2:
+ case ELEMENT_TYPE_I4:
+ case ELEMENT_TYPE_U4:
+ case ELEMENT_TYPE_I8:
+ case ELEMENT_TYPE_U8:
+ case ELEMENT_TYPE_R4:
+ case ELEMENT_TYPE_R8:
+ result = asCorInfoType(ty);
+ break;
+
+ default:
+ break;
+ }
+
+ JIT_TO_EE_TRANSITION_LEAF();
+
+ return result;
+}
+
void CEEInfo::getGSCookie(GSCookie * pCookieVal, GSCookie ** ppCookieVal)
{
diff --git a/src/vm/jitinterface.h b/src/vm/jitinterface.h
index 71872d31e9..74397c8d01 100644
--- a/src/vm/jitinterface.h
+++ b/src/vm/jitinterface.h
@@ -551,6 +551,12 @@ public:
CORINFO_CLASS_HANDLE cls
);
+ // "System.Int32" ==> CORINFO_TYPE_INT..
+ // "System.UInt32" ==> CORINFO_TYPE_UINT..
+ CorInfoType getTypeForPrimitiveNumericClass(
+ CORINFO_CLASS_HANDLE cls
+ );
+
// TRUE if child is a subtype of parent
// if parent is an interface, then does child implement / extend parent
BOOL canCast(
diff --git a/src/zap/zapinfo.cpp b/src/zap/zapinfo.cpp
index 215f4a7360..02edde0f17 100644
--- a/src/zap/zapinfo.cpp
+++ b/src/zap/zapinfo.cpp
@@ -3106,6 +3106,11 @@ CorInfoType ZapInfo::getTypeForPrimitiveValueClass(CORINFO_CLASS_HANDLE cls)
return m_pEEJitInfo->getTypeForPrimitiveValueClass(cls);
}
+CorInfoType ZapInfo::getTypeForPrimitiveNumericClass(CORINFO_CLASS_HANDLE cls)
+{
+ return m_pEEJitInfo->getTypeForPrimitiveNumericClass(cls);
+}
+
BOOL ZapInfo::canCast(CORINFO_CLASS_HANDLE child,
CORINFO_CLASS_HANDLE parent)
{
diff --git a/src/zap/zapinfo.h b/src/zap/zapinfo.h
index 621ffbdc30..0e2bf9dcaf 100644
--- a/src/zap/zapinfo.h
+++ b/src/zap/zapinfo.h
@@ -573,6 +573,7 @@ public:
CORINFO_METHOD_HANDLE mapMethodDeclToMethodImpl(CORINFO_METHOD_HANDLE methHnd);
CORINFO_CLASS_HANDLE getBuiltinClass(CorInfoClassId classId);
CorInfoType getTypeForPrimitiveValueClass(CORINFO_CLASS_HANDLE cls);
+ CorInfoType getTypeForPrimitiveNumericClass(CORINFO_CLASS_HANDLE cls);
BOOL canCast(CORINFO_CLASS_HANDLE child, CORINFO_CLASS_HANDLE parent);
BOOL areTypesEquivalent(CORINFO_CLASS_HANDLE cls1, CORINFO_CLASS_HANDLE cls2);
TypeCompareState compareTypesForCast(CORINFO_CLASS_HANDLE fromClass, CORINFO_CLASS_HANDLE toClass);
diff --git a/tests/src/JIT/HardwareIntrinsics/Add.cs b/tests/src/JIT/HardwareIntrinsics/Add.cs
new file mode 100644
index 0000000000..cf39254c74
--- /dev/null
+++ b/tests/src/JIT/HardwareIntrinsics/Add.cs
@@ -0,0 +1,428 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
+using System.Runtime.Intrinsics;
+
+namespace IntelHardwareIntrinsicTest
+{
+ class Program
+ {
+ const int Pass = 100;
+ const int Fail = 0;
+
+ static unsafe int Main(string[] args)
+ {
+ int testResult = Pass;
+
+ if (Avx.IsSupported)
+ {
+ using (TestTable<float> floatTable = new TestTable<float>(new float[8] { 1, -5, 100, 0, 1, -5, 100, 0 }, new float[8] { 22, -1, -50, 0, 22, -1, -50, 0 }, new float[8]))
+ using (TestTable<double> doubleTable = new TestTable<double>(new double[4] { 1, -5, 100, 0 }, new double[4] { 22, -1, -50, 0 }, new double[4]))
+ {
+ var vf1 = Unsafe.Read<Vector256<float>>(floatTable.inArray1Ptr);
+ var vf2 = Unsafe.Read<Vector256<float>>(floatTable.inArray2Ptr);
+ var vf3 = Avx.Add(vf1, vf2);
+ Unsafe.Write(floatTable.outArrayPtr, vf3);
+
+ var vd1 = Unsafe.Read<Vector256<double>>(doubleTable.inArray1Ptr);
+ var vd2 = Unsafe.Read<Vector256<double>>(doubleTable.inArray2Ptr);
+ var vd3 = Avx.Add(vd1, vd2);
+ Unsafe.Write(doubleTable.outArrayPtr, vd3);
+
+ if (!floatTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("AVX Add failed on float:");
+ foreach (var item in floatTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!doubleTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("AVX Add failed on double:");
+ foreach (var item in doubleTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+ }
+ }
+
+ if (Avx2.IsSupported)
+ {
+ using (TestTable<int> intTable = new TestTable<int>(new int[8] { 1, -5, 100, 0, 1, -5, 100, 0 }, new int[8] { 22, -1, -50, 0, 22, -1, -50, 0 }, new int[8]))
+ using (TestTable<long> longTable = new TestTable<long>(new long[4] { 1, -5, 100, 0 }, new long[4] { 22, -1, -50, 0 }, new long[4]))
+ using (TestTable<uint> uintTable = new TestTable<uint>(new uint[8] { 1, 5, 100, 0, 1, 5, 100, 0 }, new uint[8] { 22, 1, 50, 0, 22, 1, 50, 0 }, new uint[8]))
+ using (TestTable<ulong> ulongTable = new TestTable<ulong>(new ulong[4] { 1, 5, 100, 0 }, new ulong[4] { 22, 1, 50, 0 }, new ulong[4]))
+ using (TestTable<short> shortTable = new TestTable<short>(new short[16] { 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0 }, new short[16] { 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0 }, new short[16]))
+ using (TestTable<ushort> ushortTable = new TestTable<ushort>(new ushort[16] { 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0 }, new ushort[16] { 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0 }, new ushort[16]))
+ using (TestTable<sbyte> sbyteTable = new TestTable<sbyte>(new sbyte[32] { 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0 }, new sbyte[32] { 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0 }, new sbyte[32]))
+ using (TestTable<byte> byteTable = new TestTable<byte>(new byte[32] { 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0 }, new byte[32] { 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0 }, new byte[32]))
+ {
+
+ var vi1 = Unsafe.Read<Vector256<int>>(intTable.inArray1Ptr);
+ var vi2 = Unsafe.Read<Vector256<int>>(intTable.inArray2Ptr);
+ var vi3 = Avx2.Add(vi1, vi2);
+ Unsafe.Write(intTable.outArrayPtr, vi3);
+
+ var vl1 = Unsafe.Read<Vector256<long>>(longTable.inArray1Ptr);
+ var vl2 = Unsafe.Read<Vector256<long>>(longTable.inArray2Ptr);
+ var vl3 = Avx2.Add(vl1, vl2);
+ Unsafe.Write(longTable.outArrayPtr, vl3);
+
+ var vui1 = Unsafe.Read<Vector256<uint>>(uintTable.inArray1Ptr);
+ var vui2 = Unsafe.Read<Vector256<uint>>(uintTable.inArray2Ptr);
+ var vui3 = Avx2.Add(vui1, vui2);
+ Unsafe.Write(uintTable.outArrayPtr, vui3);
+
+ var vul1 = Unsafe.Read<Vector256<ulong>>(ulongTable.inArray1Ptr);
+ var vul2 = Unsafe.Read<Vector256<ulong>>(ulongTable.inArray2Ptr);
+ var vul3 = Avx2.Add(vul1, vul2);
+ Unsafe.Write(ulongTable.outArrayPtr, vul3);
+
+ var vs1 = Unsafe.Read<Vector256<short>>(shortTable.inArray1Ptr);
+ var vs2 = Unsafe.Read<Vector256<short>>(shortTable.inArray2Ptr);
+ var vs3 = Avx2.Add(vs1, vs2);
+ Unsafe.Write(shortTable.outArrayPtr, vs3);
+
+ var vus1 = Unsafe.Read<Vector256<ushort>>(ushortTable.inArray1Ptr);
+ var vus2 = Unsafe.Read<Vector256<ushort>>(ushortTable.inArray2Ptr);
+ var vus3 = Avx2.Add(vus1, vus2);
+ Unsafe.Write(ushortTable.outArrayPtr, vus3);
+
+ var vsb1 = Unsafe.Read<Vector256<sbyte>>(sbyteTable.inArray1Ptr);
+ var vsb2 = Unsafe.Read<Vector256<sbyte>>(sbyteTable.inArray2Ptr);
+ var vsb3 = Avx2.Add(vsb1, vsb2);
+ Unsafe.Write(sbyteTable.outArrayPtr, vsb3);
+
+ var vb1 = Unsafe.Read<Vector256<byte>>(byteTable.inArray1Ptr);
+ var vb2 = Unsafe.Read<Vector256<byte>>(byteTable.inArray2Ptr);
+ var vb3 = Avx2.Add(vb1, vb2);
+ Unsafe.Write(byteTable.outArrayPtr, vb3);
+
+ if (!intTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("AVX2 Add failed on int:");
+ foreach (var item in intTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!longTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("AVX2 Add failed on long:");
+ foreach (var item in longTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!uintTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("AVX2 Add failed on uint:");
+ foreach (var item in uintTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!ulongTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("AVX2 Add failed on ulong:");
+ foreach (var item in ulongTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!shortTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("AVX2 Add failed on short:");
+ foreach (var item in shortTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!ushortTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("AVX2 Add failed on ushort:");
+ foreach (var item in ushortTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!sbyteTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("AVX2 Add failed on sbyte:");
+ foreach (var item in sbyteTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!byteTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("AVX2 Add failed on byte:");
+ foreach (var item in byteTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+ }
+
+ }
+
+ if (Sse2.IsSupported)
+ {
+ using (TestTable<float> floatTable = new TestTable<float>(new float[4] { 1, -5, 100, 0 }, new float[4] { 22, -1, -50, 0 }, new float[4]))
+ using (TestTable<double> doubleTable = new TestTable<double>(new double[2] { 1, -5 }, new double[2] { 22, -1 }, new double[2]))
+ using (TestTable<int> intTable = new TestTable<int>(new int[4] { 1, -5, 100, 0 }, new int[4] { 22, -1, -50, 0 }, new int[4]))
+ using (TestTable<long> longTable = new TestTable<long>(new long[2] { 1, -5 }, new long[2] { 22, -1 }, new long[2]))
+ using (TestTable<uint> uintTable = new TestTable<uint>(new uint[4] { 1, 5, 100, 0 }, new uint[4] { 22, 1, 50, 0 }, new uint[4]))
+ using (TestTable<ulong> ulongTable = new TestTable<ulong>(new ulong[2] { 1, 5 }, new ulong[2] { 22, 1 }, new ulong[2]))
+ using (TestTable<short> shortTable = new TestTable<short>(new short[8] { 1, -5, 100, 0, 1, -5, 100, 0 }, new short[8] { 22, -1, -50, 0, 22, -1, -50, 0 }, new short[8]))
+ using (TestTable<ushort> ushortTable = new TestTable<ushort>(new ushort[8] { 1, 5, 100, 0, 1, 5, 100, 0 }, new ushort[8] { 22, 1, 50, 0, 22, 1, 50, 0 }, new ushort[8]))
+ using (TestTable<sbyte> sbyteTable = new TestTable<sbyte>(new sbyte[16] { 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0, 1, -5, 100, 0 }, new sbyte[16] { 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0, 22, -1, -50, 0 }, new sbyte[16]))
+ using (TestTable<byte> byteTable = new TestTable<byte>(new byte[16] { 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0, 1, 5, 100, 0 }, new byte[16] { 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0, 22, 1, 50, 0 }, new byte[16]))
+ {
+
+ var vf1 = Unsafe.Read<Vector128<float>>(floatTable.inArray1Ptr);
+ var vf2 = Unsafe.Read<Vector128<float>>(floatTable.inArray2Ptr);
+ var vf3 = Sse.Add(vf1, vf2);
+ Unsafe.Write(floatTable.outArrayPtr, vf3);
+
+ var vd1 = Unsafe.Read<Vector128<double>>(doubleTable.inArray1Ptr);
+ var vd2 = Unsafe.Read<Vector128<double>>(doubleTable.inArray2Ptr);
+ var vd3 = Sse2.Add(vd1, vd2);
+ Unsafe.Write(doubleTable.outArrayPtr, vd3);
+ var vi1 = Unsafe.Read<Vector128<int>>(intTable.inArray1Ptr);
+ var vi2 = Unsafe.Read<Vector128<int>>(intTable.inArray2Ptr);
+ var vi3 = Sse2.Add(vi1, vi2);
+ Unsafe.Write(intTable.outArrayPtr, vi3);
+
+ var vl1 = Unsafe.Read<Vector128<long>>(longTable.inArray1Ptr);
+ var vl2 = Unsafe.Read<Vector128<long>>(longTable.inArray2Ptr);
+ var vl3 = Sse2.Add(vl1, vl2);
+ Unsafe.Write(longTable.outArrayPtr, vl3);
+
+ var vui1 = Unsafe.Read<Vector128<uint>>(uintTable.inArray1Ptr);
+ var vui2 = Unsafe.Read<Vector128<uint>>(uintTable.inArray2Ptr);
+ var vui3 = Sse2.Add(vui1, vui2);
+ Unsafe.Write(uintTable.outArrayPtr, vui3);
+
+ var vul1 = Unsafe.Read<Vector128<ulong>>(ulongTable.inArray1Ptr);
+ var vul2 = Unsafe.Read<Vector128<ulong>>(ulongTable.inArray2Ptr);
+ var vul3 = Sse2.Add(vul1, vul2);
+ Unsafe.Write(ulongTable.outArrayPtr, vul3);
+
+ var vs1 = Unsafe.Read<Vector128<short>>(shortTable.inArray1Ptr);
+ var vs2 = Unsafe.Read<Vector128<short>>(shortTable.inArray2Ptr);
+ var vs3 = Sse2.Add(vs1, vs2);
+ Unsafe.Write(shortTable.outArrayPtr, vs3);
+
+ var vus1 = Unsafe.Read<Vector128<ushort>>(ushortTable.inArray1Ptr);
+ var vus2 = Unsafe.Read<Vector128<ushort>>(ushortTable.inArray2Ptr);
+ var vus3 = Sse2.Add(vus1, vus2);
+ Unsafe.Write(ushortTable.outArrayPtr, vus3);
+
+ var vsb1 = Unsafe.Read<Vector128<sbyte>>(sbyteTable.inArray1Ptr);
+ var vsb2 = Unsafe.Read<Vector128<sbyte>>(sbyteTable.inArray2Ptr);
+ var vsb3 = Sse2.Add(vsb1, vsb2);
+ Unsafe.Write(sbyteTable.outArrayPtr, vsb3);
+
+ var vb1 = Unsafe.Read<Vector128<byte>>(byteTable.inArray1Ptr);
+ var vb2 = Unsafe.Read<Vector128<byte>>(byteTable.inArray2Ptr);
+ var vb3 = Sse2.Add(vb1, vb2);
+ Unsafe.Write(byteTable.outArrayPtr, vb3);
+
+ if (!intTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("SSE2 Add failed on int:");
+ foreach (var item in intTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!longTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("SSE2 Add failed on long:");
+ foreach (var item in longTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!uintTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("SSE2 Add failed on uint:");
+ foreach (var item in uintTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!ulongTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("SSE2 Add failed on ulong:");
+ foreach (var item in ulongTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!shortTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("SSE2 Add failed on short:");
+ foreach (var item in shortTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!ushortTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("SSE2 Add failed on ushort:");
+ foreach (var item in ushortTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!floatTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("SSE Add failed on float:");
+ foreach (var item in floatTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!doubleTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("SSE2 Add failed on double:");
+ foreach (var item in doubleTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!sbyteTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("SSE2 Add failed on sbyte:");
+ foreach (var item in sbyteTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+
+ if (!byteTable.CheckResult((x, y, z) => x + y == z))
+ {
+ Console.WriteLine("SSE2 Add failed on byte:");
+ foreach (var item in byteTable.outArray)
+ {
+ Console.Write(item + ", ");
+ }
+ Console.WriteLine();
+ testResult = Fail;
+ }
+ }
+ }
+
+
+ return testResult;
+ }
+
+ public unsafe struct TestTable<T> : IDisposable where T : struct
+ {
+ public T[] inArray1;
+ public T[] inArray2;
+ public T[] outArray;
+
+ public void* inArray1Ptr => inHandle1.AddrOfPinnedObject().ToPointer();
+ public void* inArray2Ptr => inHandle2.AddrOfPinnedObject().ToPointer();
+ public void* outArrayPtr => outHandle.AddrOfPinnedObject().ToPointer();
+
+ GCHandle inHandle1;
+ GCHandle inHandle2;
+ GCHandle outHandle;
+ public TestTable(T[] a, T[] b, T[] c)
+ {
+ this.inArray1 = a;
+ this.inArray2 = b;
+ this.outArray = c;
+
+ inHandle1 = GCHandle.Alloc(inArray1, GCHandleType.Pinned);
+ inHandle2 = GCHandle.Alloc(inArray2, GCHandleType.Pinned);
+ outHandle = GCHandle.Alloc(outArray, GCHandleType.Pinned);
+ }
+ public bool CheckResult(Func<T, T, T, bool> check)
+ {
+ for (int i = 0; i < inArray1.Length; i++)
+ {
+ if (!check(inArray1[i], inArray2[i], outArray[i]))
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ public void Dispose()
+ {
+ inHandle1.Free();
+ inHandle2.Free();
+ outHandle.Free();
+ }
+ }
+
+ }
+} \ No newline at end of file
diff --git a/tests/src/JIT/HardwareIntrinsics/Add_r.csproj b/tests/src/JIT/HardwareIntrinsics/Add_r.csproj
new file mode 100644
index 0000000000..6179540b29
--- /dev/null
+++ b/tests/src/JIT/HardwareIntrinsics/Add_r.csproj
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <SchemaVersion>2.0</SchemaVersion>
+ <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+ <OutputType>Exe</OutputType>
+ <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+ <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+ <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+ </PropertyGroup>
+ <!-- Default configurations to help VS understand the configurations -->
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+ <ItemGroup>
+ <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+ <Visible>False</Visible>
+ </CodeAnalysisDependentAssemblyPaths>
+ </ItemGroup>
+ <PropertyGroup>
+ <DebugType>None</DebugType>
+ <Optimize></Optimize>
+ </PropertyGroup>
+ <ItemGroup>
+ <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="Add.cs" />
+ </ItemGroup>
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+ <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project> \ No newline at end of file
diff --git a/tests/src/JIT/HardwareIntrinsics/Add_ro.csproj b/tests/src/JIT/HardwareIntrinsics/Add_ro.csproj
new file mode 100644
index 0000000000..7c5ee7ce34
--- /dev/null
+++ b/tests/src/JIT/HardwareIntrinsics/Add_ro.csproj
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <SchemaVersion>2.0</SchemaVersion>
+ <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
+ <OutputType>Exe</OutputType>
+ <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+ <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+ <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+ </PropertyGroup>
+ <!-- Default configurations to help VS understand the configurations -->
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
+ <ItemGroup>
+ <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+ <Visible>False</Visible>
+ </CodeAnalysisDependentAssemblyPaths>
+ </ItemGroup>
+ <PropertyGroup>
+ <DebugType>None</DebugType>
+ <Optimize>True</Optimize>
+ </PropertyGroup>
+ <ItemGroup>
+ <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="Add.cs" />
+ </ItemGroup>
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+ <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project> \ No newline at end of file