summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichal Strehovský <MichalStrehovsky@users.noreply.github.com>2019-05-30 09:48:07 +0200
committerGitHub <noreply@github.com>2019-05-30 09:48:07 +0200
commitd4fadf03b9964ba9a1bf96f5a2136c2c8f4f38fe (patch)
treeef4d2adbaad884dca16c2ad3b56b69be83d7df63
parent5d16005b984b9b27ce59245bca87aa225b2ca0b7 (diff)
downloadcoreclr-d4fadf03b9964ba9a1bf96f5a2136c2c8f4f38fe.tar.gz
coreclr-d4fadf03b9964ba9a1bf96f5a2136c2c8f4f38fe.tar.bz2
coreclr-d4fadf03b9964ba9a1bf96f5a2136c2c8f4f38fe.zip
Allow generating HW intrinsics in crossgen (#24689)
We currently don't precompile methods that use hardware intrinsics because we don't know the CPU that the generated code will run on. Jitting these methods slows down startup and accounts for 3% of startup time in PowerShell. With this change, we're going to lift this restriction for CoreLib (the thing that matters for startup) and support generating HW intrinsics for our minimum supported target ISA (SSE/SSE2).
-rw-r--r--src/vm/methodtablebuilder.cpp32
-rw-r--r--src/zap/zapper.cpp9
2 files changed, 32 insertions, 9 deletions
diff --git a/src/vm/methodtablebuilder.cpp b/src/vm/methodtablebuilder.cpp
index e1075cb035..31c4b0a5ee 100644
--- a/src/vm/methodtablebuilder.cpp
+++ b/src/vm/methodtablebuilder.cpp
@@ -22,6 +22,7 @@
#include "ecmakey.h"
#include "customattribute.h"
#include "typestring.h"
+#include "compile.h"
//*******************************************************************************
// Helper functions to sort GCdescs by offset (decending order)
@@ -1517,12 +1518,20 @@ MethodTableBuilder::BuildMethodTableThrowing(
if (hr == S_OK && (strcmp(nameSpace, "System.Runtime.Intrinsics.X86") == 0))
#endif
{
- if (IsCompilationProcess())
+#if defined(CROSSGEN_COMPILE)
+#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
+ if ((!IsNgenPDBCompilationProcess()
+ && GetAppDomain()->ToCompilationDomain()->GetTargetModule() != g_pObjectClass->GetModule())
+ || (strcmp(className, "Sse") != 0 && strcmp(className, "Sse2") != 0))
+#endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
{
- // Disable AOT compiling for managed implementation of hardware intrinsics in mscorlib.
+ // Disable AOT compiling for managed implementation of hardware intrinsics.
// We specially treat them here to ensure correct ISA features are set during compilation
+ // The only exception to this rule are SSE and SSE2 intrinsics in CoreLib - we can
+ // safely expand those because we require them to be always available.
COMPlusThrow(kTypeLoadException, IDS_EE_HWINTRINSIC_NGEN_DISALLOWED);
}
+#endif // defined(CROSSGEN_COMPILE)
bmtProp->fIsHardwareIntrinsic = true;
}
}
@@ -9546,16 +9555,21 @@ void MethodTableBuilder::CheckForSystemTypes()
// These __m128 and __m256 types, among other requirements, are special in that they must always
// be aligned properly.
- if (IsCompilationProcess())
+#ifdef CROSSGEN_COMPILE
+ // Disable AOT compiling for the SIMD hardware intrinsic types. These types require special
+ // ABI handling as they represent fundamental data types (__m64, __m128, and __m256) and not
+ // aggregate or union types. See https://github.com/dotnet/coreclr/issues/15943
+ //
+ // Once they are properly handled according to the ABI requirements, we can remove this check
+ // and allow them to be used in crossgen/AOT scenarios.
+ //
+ // We can allow these to AOT compile in CoreLib since CoreLib versions with the runtime.
+ if (!IsNgenPDBCompilationProcess() &&
+ GetAppDomain()->ToCompilationDomain()->GetTargetModule() != g_pObjectClass->GetModule())
{
- // Disable AOT compiling for the SIMD hardware intrinsic types. These types require special
- // ABI handling as they represent fundamental data types (__m64, __m128, and __m256) and not
- // aggregate or union types. See https://github.com/dotnet/coreclr/issues/15943
- //
- // Once they are properly handled according to the ABI requirements, we can remove this check
- // and allow them to be used in crossgen/AOT scenarios.
COMPlusThrow(kTypeLoadException, IDS_EE_HWINTRINSIC_NGEN_DISALLOWED);
}
+#endif
if (strcmp(name, g_Vector64Name) == 0)
{
diff --git a/src/zap/zapper.cpp b/src/zap/zapper.cpp
index 26d9c4e2cf..5e5d190346 100644
--- a/src/zap/zapper.cpp
+++ b/src/zap/zapper.cpp
@@ -1188,6 +1188,15 @@ void Zapper::InitializeCompilerFlags(CORCOMPILE_VERSION_INFO * pVersionInfo)
#endif // _TARGET_X86_
+#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+ // If we're compiling CoreLib, allow RyuJIT to generate SIMD code so that we can expand some
+ // of the hardware intrinsics.
+ if (m_pEECompileInfo->GetAssemblyModule(m_hAssembly) == m_pEECompileInfo->GetLoaderModuleForMscorlib())
+ {
+ m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_FEATURE_SIMD);
+ }
+#endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+
if ( m_pOpt->m_compilerFlags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_DEBUG_INFO)
&& m_pOpt->m_compilerFlags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_DEBUG_CODE)
&& m_pOpt->m_compilerFlags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_PROF_ENTERLEAVE))