summaryrefslogtreecommitdiff
path: root/src/zap
diff options
context:
space:
mode:
authorMichal Strehovský <MichalStrehovsky@users.noreply.github.com>2019-06-14 00:24:59 +0200
committerSergiy Kuryata <sergeyk@microsoft.com>2019-06-13 15:24:59 -0700
commite73c8e6c4a98b7ef6335afa31052bed7313fd816 (patch)
treedb14e93657a8ef482d51f8ad10f2fed720703a0e /src/zap
parent6bcbd9348c277287539b2f97befc7b5d25b142e8 (diff)
downloadcoreclr-e73c8e6c4a98b7ef6335afa31052bed7313fd816.tar.gz
coreclr-e73c8e6c4a98b7ef6335afa31052bed7313fd816.tar.bz2
coreclr-e73c8e6c4a98b7ef6335afa31052bed7313fd816.zip
Allow pregenerating most HW intrinsics in CoreLib (#24917)
* Allow pregenerating all HW intrinsics in CoreLib This is a follow up to #24689 that lets us pregenerate all hardware intrinsics in CoreLib. We ensures the potentially unsupported code will never be reachable at runtime on CPUs that don't support it by not reporting the `IsSupported` property as intrinsic in crossgen. This ensures the support checks are always JITted. JITting the support checks is very cheap. There is cost in the form of an extra call and failure to do constant propagation of the return value, but the cost is negligible in practice and gets eliminated once the tiered JIT tiers the method up. We only do this in CoreLib because user code could technically not guard intrinsic use in `IsSupported` checks and pregenerating the code could lead to illegal instruction traps at runtime (instead of `PlatformNotSupportedException` throws) - it's a bad user experience.
Diffstat (limited to 'src/zap')
-rw-r--r--src/zap/zapinfo.cpp118
-rw-r--r--src/zap/zapper.cpp24
2 files changed, 138 insertions, 4 deletions
diff --git a/src/zap/zapinfo.cpp b/src/zap/zapinfo.cpp
index 8d295a44ef..d45012d150 100644
--- a/src/zap/zapinfo.cpp
+++ b/src/zap/zapinfo.cpp
@@ -442,7 +442,9 @@ void ZapInfo::CompileMethod()
// this they can add the hint and reduce the perf cost at runtime.
m_pImage->m_pPreloader->PrePrepareMethodIfNecessary(m_currentMethodHandle);
- DWORD methodAttribs = getMethodAttribs(m_currentMethodHandle);
+ // Retrieve method attributes from EEJitInfo - the ZapInfo's version updates
+ // some of the flags related to hardware intrinsics but we don't want that.
+ DWORD methodAttribs = m_pEEJitInfo->getMethodAttribs(m_currentMethodHandle);
if (methodAttribs & CORINFO_FLG_AGGRESSIVE_OPT)
{
// Skip methods marked with MethodImplOptions.AggressiveOptimization, they will be jitted instead. In the future,
@@ -453,6 +455,27 @@ void ZapInfo::CompileMethod()
return;
}
+#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
+ if (methodAttribs & CORINFO_FLG_JIT_INTRINSIC)
+ {
+ // Skip generating hardware intrinsic method bodies.
+ //
+ // We don't know what the implementation should do (whether it can do the actual intrinsic thing, or whether
+ // it should throw a PlatformNotSupportedException).
+
+ const char* namespaceName;
+ getMethodNameFromMetadata(m_currentMethodHandle, nullptr, &namespaceName, nullptr);
+ if (strcmp(namespaceName, "System.Runtime.Intrinsics.X86") == 0
+ || strcmp(namespaceName, "System.Runtime.Intrinsics.Arm.Arm64") == 0
+ || strcmp(namespaceName, "System.Runtime.Intrinsics") == 0)
+ {
+ if (m_zapper->m_pOpt->m_verbose)
+ m_zapper->Info(W("Skipped due to being a hardware intrinsic\n"));
+ return;
+ }
+ }
+#endif
+
m_jitFlags = ComputeJitFlags(m_currentMethodHandle);
#ifdef FEATURE_READYTORUN_COMPILER
@@ -2089,6 +2112,94 @@ void ZapInfo::GetProfilingHandle(BOOL *pbHookFunction,
*pbIndirectedHandles = TRUE;
}
+//
+// This strips the CORINFO_FLG_JIT_INTRINSIC flag from some of the hardware intrinsic methods.
+//
+DWORD FilterHardwareIntrinsicMethodAttribs(DWORD attribs, CORINFO_METHOD_HANDLE ftn, ICorDynamicInfo* pJitInfo)
+{
+ if (attribs & CORINFO_FLG_JIT_INTRINSIC)
+ {
+ // Figure out which intrinsic we are dealing with.
+ const char* namespaceName;
+ const char* className;
+ const char* enclosingClassName;
+ const char* methodName = pJitInfo->getMethodNameFromMetadata(ftn, &className, &namespaceName, &enclosingClassName);
+
+ // Is this the get_IsSupported method that checks whether intrinsic is supported?
+ bool fIsGetIsSupportedMethod = strcmp(methodName, "get_IsSupported") == 0;
+
+#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
+ bool fIsX86intrinsic = strcmp(namespaceName, "System.Runtime.Intrinsics.X86") == 0;
+
+ // If it's anything related to Sse/Sse2, we can expand unconditionally since this is a baseline
+ // requirement of CoreCLR.
+ if (fIsX86intrinsic
+ && (
+ strcmp(className, "Sse") == 0 || strcmp(className, "Sse2") == 0
+ || (
+ strcmp(className, "X64") == 0
+ && (
+ strcmp(enclosingClassName, "Sse") == 0 || strcmp(enclosingClassName, "Sse2") == 0
+ )
+ )
+ )
+ )
+ {
+ return attribs;
+ }
+
+ // If it's an intrinsic that requires VEX encoding, do not report as intrinsic
+ // to force this to become a regular method call.
+ // We don't allow RyuJIT to use VEX encoding at AOT compilation time, so these
+ // cannot be pregenerated. Not reporting them as intrinsic will make sure
+ // it will do the right thing at runtime (the called method will be JITted).
+ // It will be slower, but correct.
+ if (fIsX86intrinsic
+ && (
+ strcmp(className, "Avx") == 0 || strcmp(className, "Fma") == 0 || strcmp(className, "Avx2") == 0 || strcmp(className, "Bmi1") == 0 || strcmp(className, "Bmi2") == 0
+ || (
+ strcmp(className, "X64") == 0
+ && (
+ strcmp(enclosingClassName, "Bmi1") == 0 || strcmp(enclosingClassName, "Bmi2") == 0
+ )
+ )
+ )
+ )
+ {
+ // We do want the IsSupported for VEX instructions to be recognized as intrinsic so that the
+ // potentially worse quality code doesn't actually run until tiered JIT starts
+ // kicking in and recompiling methods. Reporting this as intrinsic makes RyuJIT expand it
+ // into `return false`.
+ if (fIsGetIsSupportedMethod)
+ return attribs;
+
+ // Treat other intrinsic methods as a regular method call (into a JITted method).
+ return (attribs & ~CORINFO_FLG_JIT_INTRINSIC) | CORINFO_FLG_DONT_INLINE;
+ }
+
+#endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
+
+ // Do not report the get_IsSupported method as an intrinsic if it's an intrinsic on the architecture
+ // we are targeting. This will turn the call into a regular call.
+ // We also make sure none of the hardware intrinsic method bodies get pregenerated in crossgen
+ // (see ZapInfo::CompileMethod) but get JITted instead. The JITted method will have the correct
+ // answer for the CPU the code is running on.
+ if (fIsGetIsSupportedMethod && (
+#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
+ fIsX86intrinsic ||
+#elif _TARGET_ARM64_
+ strcmp(namespaceName, "System.Runtime.Intrinsics.Arm.Arm64") == 0 ||
+#endif
+ strcmp(namespaceName, "System.Runtime.Intrinsics") == 0))
+ {
+ // Treat as a regular method call (into a JITted method).
+ return (attribs & ~CORINFO_FLG_JIT_INTRINSIC) | CORINFO_FLG_DONT_INLINE;
+ }
+ }
+
+ return attribs;
+}
+
//return a callable stub that will do the virtual or interface call
@@ -2114,6 +2225,8 @@ void ZapInfo::getCallInfo(CORINFO_RESOLVED_TOKEN * pResolvedToken,
(CORINFO_CALLINFO_FLAGS)(flags | CORINFO_CALLINFO_KINDONLY),
pResult);
+ pResult->methodFlags = FilterHardwareIntrinsicMethodAttribs(pResult->methodFlags, pResult->hMethod, m_pEEJitInfo);
+
#ifdef FEATURE_READYTORUN_COMPILER
if (IsReadyToRunCompilation())
{
@@ -3696,7 +3809,8 @@ unsigned ZapInfo::getMethodHash(CORINFO_METHOD_HANDLE ftn)
DWORD ZapInfo::getMethodAttribs(CORINFO_METHOD_HANDLE ftn)
{
- return m_pEEJitInfo->getMethodAttribs(ftn);
+ DWORD result = m_pEEJitInfo->getMethodAttribs(ftn);
+ return FilterHardwareIntrinsicMethodAttribs(result, ftn, m_pEEJitInfo);
}
void ZapInfo::setMethodAttribs(CORINFO_METHOD_HANDLE ftn, CorInfoMethodRuntimeFlags attribs)
diff --git a/src/zap/zapper.cpp b/src/zap/zapper.cpp
index 5e5d190346..e511b5726c 100644
--- a/src/zap/zapper.cpp
+++ b/src/zap/zapper.cpp
@@ -1189,11 +1189,31 @@ void Zapper::InitializeCompilerFlags(CORCOMPILE_VERSION_INFO * pVersionInfo)
#endif // _TARGET_X86_
#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
- // If we're compiling CoreLib, allow RyuJIT to generate SIMD code so that we can expand some
- // of the hardware intrinsics.
+ // If we're crossgenning CoreLib, allow generating non-VEX intrinsics. The generated code might
+ // not actually be supported by the processor at runtime so we compensate for it by
+ // not letting the get_IsSupported method to be intrinsically expanded in crossgen
+ // (see special handling around CORINFO_FLG_JIT_INTRINSIC in ZapInfo).
+ // That way the actual support checks will always be jitted.
+ // We only do this for CoreLib because forgetting to wrap intrinsics under IsSupported
+ // checks can lead to illegal instruction traps (instead of a nice managed exception).
if (m_pEECompileInfo->GetAssemblyModule(m_hAssembly) == m_pEECompileInfo->GetLoaderModuleForMscorlib())
{
m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_FEATURE_SIMD);
+
+#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
+ m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_AES);
+ m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_PCLMULQDQ);
+ m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE3);
+ m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSSE3);
+ m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE41);
+ m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE42);
+ m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_POPCNT);
+ // Leaving out CORJIT_FLAGS::CORJIT_FLAG_USE_AVX, CORJIT_FLAGS::CORJIT_FLAG_USE_FMA
+ // CORJIT_FLAGS::CORJIT_FLAG_USE_AVX2, CORJIT_FLAGS::CORJIT_FLAG_USE_BMI1,
+ // CORJIT_FLAGS::CORJIT_FLAG_USE_BMI2 on purpose - these require VEX encodings
+ // and the JIT doesn't support generating code for methods with mixed encodings.
+ m_pOpt->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_USE_LZCNT);
+#endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
}
#endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)