summaryrefslogtreecommitdiff
path: root/src/jit
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit')
-rw-r--r--src/jit/CMakeLists.txt27
-rw-r--r--src/jit/ICorJitInfo_API_names.h171
-rw-r--r--src/jit/ICorJitInfo_API_wrapper.hpp1666
-rw-r--r--src/jit/assertionprop.cpp119
-rw-r--r--src/jit/bitsetasuint64.h2
-rw-r--r--src/jit/block.cpp37
-rw-r--r--src/jit/block.h24
-rwxr-xr-xsrc/jit/codegen.h36
-rw-r--r--src/jit/codegenarm.cpp1078
-rw-r--r--src/jit/codegenarm64.cpp1441
-rw-r--r--src/jit/codegenclassic.h7
-rw-r--r--[-rwxr-xr-x]src/jit/codegencommon.cpp622
-rw-r--r--src/jit/codegeninterface.h6
-rw-r--r--src/jit/codegenlegacy.cpp448
-rw-r--r--src/jit/codegenlinear.cpp1773
-rw-r--r--src/jit/codegenlinear.h78
-rw-r--r--src/jit/codegenxarch.cpp3899
-rw-r--r--src/jit/compatjit/.gitmirror1
-rw-r--r--src/jit/compatjit/CMakeLists.txt66
-rw-r--r--src/jit/compiler.cpp1399
-rw-r--r--src/jit/compiler.h297
-rw-r--r--src/jit/compiler.hpp110
-rw-r--r--src/jit/compphases.h15
-rw-r--r--src/jit/crossgen/CMakeLists.txt4
-rw-r--r--src/jit/decomposelongs.cpp1056
-rw-r--r--src/jit/decomposelongs.h12
-rw-r--r--src/jit/dll/CMakeLists.txt10
-rw-r--r--src/jit/dll/jit.nativeproj6
-rw-r--r--src/jit/earlyprop.cpp42
-rw-r--r--[-rwxr-xr-x]src/jit/ee_il_dll.cpp53
-rw-r--r--src/jit/ee_il_dll.hpp4
-rw-r--r--src/jit/emit.cpp80
-rw-r--r--src/jit/emit.h27
-rw-r--r--src/jit/emitarm.cpp20
-rw-r--r--src/jit/emitarm64.cpp2
-rw-r--r--src/jit/emitxarch.cpp710
-rw-r--r--src/jit/emitxarch.h118
-rw-r--r--src/jit/error.cpp4
-rw-r--r--src/jit/error.h199
-rw-r--r--src/jit/flowgraph.cpp2347
-rw-r--r--src/jit/gcencode.cpp267
-rw-r--r--src/jit/gentree.cpp1311
-rw-r--r--src/jit/gentree.h563
-rw-r--r--src/jit/gschecks.cpp48
-rw-r--r--src/jit/gtlist.h333
-rw-r--r--src/jit/gtstructs.h4
-rw-r--r--src/jit/importer.cpp760
-rw-r--r--src/jit/inline.cpp138
-rw-r--r--src/jit/inline.def10
-rw-r--r--src/jit/inline.h17
-rw-r--r--src/jit/inlinepolicy.cpp232
-rw-r--r--src/jit/inlinepolicy.h98
-rw-r--r--src/jit/instr.cpp14
-rw-r--r--src/jit/instr.h10
-rw-r--r--src/jit/instrsxarch.h19
-rw-r--r--src/jit/jit.h49
-rw-r--r--src/jit/jit.settings.targets3
-rw-r--r--src/jit/jitconfig.h2
-rw-r--r--src/jit/jitconfigvalues.h50
-rw-r--r--src/jit/jitee.h264
-rw-r--r--src/jit/jiteh.cpp12
-rw-r--r--src/jit/jitgcinfo.h3
-rw-r--r--src/jit/lclvars.cpp97
-rw-r--r--src/jit/legacyjit/.gitmirror1
-rw-r--r--src/jit/legacyjit/CMakeLists.txt62
-rw-r--r--src/jit/lir.cpp49
-rw-r--r--src/jit/liveness.cpp37
-rw-r--r--src/jit/loopcloning.cpp2
-rw-r--r--src/jit/lower.cpp742
-rw-r--r--src/jit/lower.h18
-rw-r--r--src/jit/lowerarm.cpp138
-rw-r--r--src/jit/lowerarm64.cpp149
-rw-r--r--src/jit/lowerxarch.cpp1104
-rw-r--r--src/jit/lsra.cpp1051
-rw-r--r--src/jit/lsra.h87
-rw-r--r--src/jit/morph.cpp1656
-rw-r--r--src/jit/nodeinfo.h47
-rw-r--r--src/jit/optcse.cpp88
-rw-r--r--src/jit/optimizer.cpp688
-rw-r--r--src/jit/protojit/CMakeLists.txt8
-rw-r--r--src/jit/rangecheck.cpp5
-rw-r--r--src/jit/rationalize.cpp250
-rw-r--r--src/jit/regalloc.cpp208
-rw-r--r--src/jit/regalloc.h24
-rw-r--r--src/jit/registerfp.cpp6
-rw-r--r--src/jit/regset.cpp10
-rw-r--r--src/jit/scopeinfo.cpp18
-rw-r--r--src/jit/sideeffects.h6
-rw-r--r--src/jit/simd.cpp137
-rw-r--r--src/jit/simd.h15
-rw-r--r--src/jit/simdcodegenxarch.cpp698
-rw-r--r--src/jit/simdintrinsiclist.h9
-rw-r--r--src/jit/ssabuilder.cpp223
-rw-r--r--src/jit/stackfp.cpp23
-rw-r--r--src/jit/standalone/CMakeLists.txt21
-rw-r--r--src/jit/target.h47
-rw-r--r--src/jit/tinyarray.h2
-rw-r--r--src/jit/unwindamd64.cpp15
-rw-r--r--src/jit/utils.cpp24
-rw-r--r--src/jit/valuenum.cpp246
-rw-r--r--src/jit/valuenum.h59
-rw-r--r--src/jit/valuenumfuncs.h4
102 files changed, 18309 insertions, 11988 deletions
diff --git a/src/jit/CMakeLists.txt b/src/jit/CMakeLists.txt
index 6372e37852..96b8c496b9 100644
--- a/src/jit/CMakeLists.txt
+++ b/src/jit/CMakeLists.txt
@@ -7,9 +7,9 @@ include_directories("../inc")
# Enable the following for UNIX altjit on Windows
# add_definitions(-DALT_JIT)
-if (CLR_CMAKE_TARGET_ARCH_AMD64)
- add_definitions(-DFEATURE_SIMD)
- add_definitions(-DFEATURE_AVX_SUPPORT)
+if (CLR_CMAKE_TARGET_ARCH_AMD64 OR (CLR_CMAKE_TARGET_ARCH_I386 AND NOT CLR_CMAKE_PLATFORM_UNIX))
+ add_definitions(-DFEATURE_SIMD)
+ add_definitions(-DFEATURE_AVX_SUPPORT)
endif ()
@@ -23,6 +23,7 @@ set( JIT_SOURCES
bitset.cpp
block.cpp
codegencommon.cpp
+ codegenlinear.cpp
compiler.cpp
copyprop.cpp
disasm.cpp
@@ -194,19 +195,17 @@ endif()
add_custom_target(jit_exports DEPENDS ${JIT_EXPORTS_FILE})
-set(JIT_BASE_NAME clrjit)
-if (CLR_BUILD_JIT32)
- set(JIT_BASE_NAME ryujit)
-endif()
-
-if(WIN32)
- add_definitions(-DFX_VER_INTERNALNAME_STR=${JIT_BASE_NAME}.dll)
-endif(WIN32)
-
add_subdirectory(dll)
add_subdirectory(crossgen)
add_subdirectory(standalone)
-if (CLR_CMAKE_PLATFORM_ARCH_I386 OR CLR_CMAKE_PLATFORM_ARCH_ARM)
+if (CLR_CMAKE_PLATFORM_ARCH_ARM)
add_subdirectory(protojit)
-endif (CLR_CMAKE_PLATFORM_ARCH_I386 OR CLR_CMAKE_PLATFORM_ARCH_ARM)
+endif (CLR_CMAKE_PLATFORM_ARCH_ARM)
+
+if (CLR_CMAKE_PLATFORM_ARCH_I386)
+ add_subdirectory(legacyjit)
+ if (NOT CLR_BUILD_JIT32)
+ add_subdirectory(compatjit)
+ endif ()
+endif (CLR_CMAKE_PLATFORM_ARCH_I386)
diff --git a/src/jit/ICorJitInfo_API_names.h b/src/jit/ICorJitInfo_API_names.h
new file mode 100644
index 0000000000..601afbdfe1
--- /dev/null
+++ b/src/jit/ICorJitInfo_API_names.h
@@ -0,0 +1,171 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+DEF_CLR_API(getMethodAttribs)
+DEF_CLR_API(setMethodAttribs)
+DEF_CLR_API(getMethodSig)
+DEF_CLR_API(getMethodInfo)
+DEF_CLR_API(canInline)
+DEF_CLR_API(reportInliningDecision)
+DEF_CLR_API(canTailCall)
+DEF_CLR_API(reportTailCallDecision)
+DEF_CLR_API(getEHinfo)
+DEF_CLR_API(getMethodClass)
+DEF_CLR_API(getMethodModule)
+DEF_CLR_API(getMethodVTableOffset)
+DEF_CLR_API(getIntrinsicID)
+DEF_CLR_API(isInSIMDModule)
+DEF_CLR_API(getUnmanagedCallConv)
+DEF_CLR_API(pInvokeMarshalingRequired)
+DEF_CLR_API(satisfiesMethodConstraints)
+DEF_CLR_API(isCompatibleDelegate)
+DEF_CLR_API(isDelegateCreationAllowed)
+DEF_CLR_API(isInstantiationOfVerifiedGeneric)
+DEF_CLR_API(initConstraintsForVerification)
+DEF_CLR_API(canSkipMethodVerification)
+DEF_CLR_API(methodMustBeLoadedBeforeCodeIsRun)
+DEF_CLR_API(mapMethodDeclToMethodImpl)
+DEF_CLR_API(getGSCookie)
+DEF_CLR_API(resolveToken)
+DEF_CLR_API(tryResolveToken)
+DEF_CLR_API(findSig)
+DEF_CLR_API(findCallSiteSig)
+DEF_CLR_API(getTokenTypeAsHandle)
+DEF_CLR_API(canSkipVerification)
+DEF_CLR_API(isValidToken)
+DEF_CLR_API(isValidStringRef)
+DEF_CLR_API(shouldEnforceCallvirtRestriction)
+DEF_CLR_API(asCorInfoType)
+DEF_CLR_API(getClassName)
+DEF_CLR_API(appendClassName)
+DEF_CLR_API(isValueClass)
+DEF_CLR_API(canInlineTypeCheckWithObjectVTable)
+DEF_CLR_API(getClassAttribs)
+DEF_CLR_API(isStructRequiringStackAllocRetBuf)
+DEF_CLR_API(getClassModule)
+DEF_CLR_API(getModuleAssembly)
+DEF_CLR_API(getAssemblyName)
+DEF_CLR_API(LongLifetimeMalloc)
+DEF_CLR_API(LongLifetimeFree)
+DEF_CLR_API(getClassModuleIdForStatics)
+DEF_CLR_API(getClassSize)
+DEF_CLR_API(getClassAlignmentRequirement)
+DEF_CLR_API(getClassGClayout)
+DEF_CLR_API(getClassNumInstanceFields)
+DEF_CLR_API(getFieldInClass)
+DEF_CLR_API(checkMethodModifier)
+DEF_CLR_API(getNewHelper)
+DEF_CLR_API(getNewArrHelper)
+DEF_CLR_API(getCastingHelper)
+DEF_CLR_API(getSharedCCtorHelper)
+DEF_CLR_API(getSecurityPrologHelper)
+DEF_CLR_API(getTypeForBox)
+DEF_CLR_API(getBoxHelper)
+DEF_CLR_API(getUnBoxHelper)
+DEF_CLR_API(getReadyToRunHelper)
+DEF_CLR_API(getReadyToRunDelegateCtorHelper)
+DEF_CLR_API(getHelperName)
+DEF_CLR_API(initClass)
+DEF_CLR_API(classMustBeLoadedBeforeCodeIsRun)
+DEF_CLR_API(getBuiltinClass)
+DEF_CLR_API(getTypeForPrimitiveValueClass)
+DEF_CLR_API(canCast)
+DEF_CLR_API(areTypesEquivalent)
+DEF_CLR_API(mergeClasses)
+DEF_CLR_API(getParentType)
+DEF_CLR_API(getChildType)
+DEF_CLR_API(satisfiesClassConstraints)
+DEF_CLR_API(isSDArray)
+DEF_CLR_API(getArrayRank)
+DEF_CLR_API(getArrayInitializationData)
+DEF_CLR_API(canAccessClass)
+DEF_CLR_API(getFieldName)
+DEF_CLR_API(getFieldClass)
+DEF_CLR_API(getFieldType)
+DEF_CLR_API(getFieldOffset)
+DEF_CLR_API(isWriteBarrierHelperRequired)
+DEF_CLR_API(getFieldInfo)
+DEF_CLR_API(isFieldStatic)
+DEF_CLR_API(getBoundaries)
+DEF_CLR_API(setBoundaries)
+DEF_CLR_API(getVars)
+DEF_CLR_API(setVars)
+DEF_CLR_API(allocateArray)
+DEF_CLR_API(freeArray)
+DEF_CLR_API(getArgNext)
+DEF_CLR_API(getArgType)
+DEF_CLR_API(getArgClass)
+DEF_CLR_API(getHFAType)
+DEF_CLR_API(GetErrorHRESULT)
+DEF_CLR_API(GetErrorMessage)
+DEF_CLR_API(FilterException)
+DEF_CLR_API(HandleException)
+DEF_CLR_API(ThrowExceptionForJitResult)
+DEF_CLR_API(ThrowExceptionForHelper)
+DEF_CLR_API(getEEInfo)
+DEF_CLR_API(getJitTimeLogFilename)
+DEF_CLR_API(getMethodDefFromMethod)
+DEF_CLR_API(getMethodName)
+DEF_CLR_API(getMethodHash)
+DEF_CLR_API(findNameOfToken)
+DEF_CLR_API(getSystemVAmd64PassStructInRegisterDescriptor)
+DEF_CLR_API(getThreadTLSIndex)
+DEF_CLR_API(getInlinedCallFrameVptr)
+DEF_CLR_API(getAddrOfCaptureThreadGlobal)
+DEF_CLR_API(getAddrModuleDomainID)
+DEF_CLR_API(getHelperFtn)
+DEF_CLR_API(getFunctionEntryPoint)
+DEF_CLR_API(getFunctionFixedEntryPoint)
+DEF_CLR_API(getMethodSync)
+DEF_CLR_API(getLazyStringLiteralHelper)
+DEF_CLR_API(embedModuleHandle)
+DEF_CLR_API(embedClassHandle)
+DEF_CLR_API(embedMethodHandle)
+DEF_CLR_API(embedFieldHandle)
+DEF_CLR_API(embedGenericHandle)
+DEF_CLR_API(getLocationOfThisType)
+DEF_CLR_API(getPInvokeUnmanagedTarget)
+DEF_CLR_API(getAddressOfPInvokeFixup)
+DEF_CLR_API(getAddressOfPInvokeTarget)
+DEF_CLR_API(GetCookieForPInvokeCalliSig)
+DEF_CLR_API(canGetCookieForPInvokeCalliSig)
+DEF_CLR_API(getJustMyCodeHandle)
+DEF_CLR_API(GetProfilingHandle)
+DEF_CLR_API(getCallInfo)
+DEF_CLR_API(canAccessFamily)
+DEF_CLR_API(isRIDClassDomainID)
+DEF_CLR_API(getClassDomainID)
+DEF_CLR_API(getFieldAddress)
+DEF_CLR_API(getVarArgsHandle)
+DEF_CLR_API(canGetVarArgsHandle)
+DEF_CLR_API(constructStringLiteral)
+DEF_CLR_API(emptyStringLiteral)
+DEF_CLR_API(getFieldThreadLocalStoreID)
+DEF_CLR_API(setOverride)
+DEF_CLR_API(addActiveDependency)
+DEF_CLR_API(GetDelegateCtor)
+DEF_CLR_API(MethodCompileComplete)
+DEF_CLR_API(getTailCallCopyArgsThunk)
+DEF_CLR_API(getJitFlags)
+DEF_CLR_API(runWithErrorTrap)
+DEF_CLR_API(getMemoryManager)
+DEF_CLR_API(allocMem)
+DEF_CLR_API(reserveUnwindInfo)
+DEF_CLR_API(allocUnwindInfo)
+DEF_CLR_API(allocGCInfo)
+DEF_CLR_API(yieldExecution)
+DEF_CLR_API(setEHcount)
+DEF_CLR_API(setEHinfo)
+DEF_CLR_API(logMsg)
+DEF_CLR_API(doAssert)
+DEF_CLR_API(reportFatalError)
+DEF_CLR_API(allocBBProfileBuffer)
+DEF_CLR_API(getBBProfileData)
+DEF_CLR_API(recordCallSite)
+DEF_CLR_API(recordRelocation)
+DEF_CLR_API(getRelocTypeHint)
+DEF_CLR_API(getModuleNativeEntryPointRange)
+DEF_CLR_API(getExpectedTargetArchitecture)
+
+#undef DEF_CLR_API
diff --git a/src/jit/ICorJitInfo_API_wrapper.hpp b/src/jit/ICorJitInfo_API_wrapper.hpp
new file mode 100644
index 0000000000..4272b2755c
--- /dev/null
+++ b/src/jit/ICorJitInfo_API_wrapper.hpp
@@ -0,0 +1,1666 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#define API_ENTER(name) wrapComp->CLR_API_Enter(API_##name);
+#define API_LEAVE(name) wrapComp->CLR_API_Leave(API_##name);
+
+/**********************************************************************************/
+// clang-format off
+/**********************************************************************************/
+//
+// ICorMethodInfo
+//
+
+DWORD WrapICorJitInfo::getMethodAttribs(CORINFO_METHOD_HANDLE ftn /* IN */)
+{
+ API_ENTER(getMethodAttribs)
+ DWORD temp = wrapHnd->getMethodAttribs(ftn);
+ API_LEAVE(getMethodAttribs)
+ return temp;
+}
+
+void WrapICorJitInfo::setMethodAttribs(CORINFO_METHOD_HANDLE ftn,/* IN */
+ CorInfoMethodRuntimeFlags attribs/* IN */)
+{
+ API_ENTER(setMethodAttribs);
+ wrapHnd->setMethodAttribs(ftn, attribs);
+ API_LEAVE(setMethodAttribs);
+}
+
+void WrapICorJitInfo::getMethodSig(CORINFO_METHOD_HANDLE ftn, /* IN */
+ CORINFO_SIG_INFO *sig, /* OUT */
+ CORINFO_CLASS_HANDLE memberParent/* IN */)
+{
+ API_ENTER(getMethodSig);
+ wrapHnd->getMethodSig(ftn, sig, memberParent);
+ API_LEAVE(getMethodSig);
+}
+
+bool WrapICorJitInfo::getMethodInfo(
+ CORINFO_METHOD_HANDLE ftn, /* IN */
+ CORINFO_METHOD_INFO* info /* OUT */)
+{
+ API_ENTER(getMethodInfo);
+ bool temp = wrapHnd->getMethodInfo(ftn, info);
+ API_LEAVE(getMethodInfo);
+ return temp;
+}
+
+CorInfoInline WrapICorJitInfo::canInline(
+ CORINFO_METHOD_HANDLE callerHnd, /* IN */
+ CORINFO_METHOD_HANDLE calleeHnd, /* IN */
+ DWORD* pRestrictions /* OUT */)
+{
+ API_ENTER(canInline);
+ CorInfoInline temp = wrapHnd->canInline(callerHnd, calleeHnd, pRestrictions);
+ API_LEAVE(canInline);
+ return temp;
+}
+
+void WrapICorJitInfo::reportInliningDecision(CORINFO_METHOD_HANDLE inlinerHnd,
+ CORINFO_METHOD_HANDLE inlineeHnd,
+ CorInfoInline inlineResult,
+ const char * reason)
+{
+ API_ENTER(reportInliningDecision);
+ wrapHnd->reportInliningDecision(inlinerHnd, inlineeHnd, inlineResult, reason);
+ API_LEAVE(reportInliningDecision);
+}
+
+bool WrapICorJitInfo::canTailCall(
+ CORINFO_METHOD_HANDLE callerHnd, /* IN */
+ CORINFO_METHOD_HANDLE declaredCalleeHnd, /* IN */
+ CORINFO_METHOD_HANDLE exactCalleeHnd, /* IN */
+ bool fIsTailPrefix /* IN */)
+{
+ API_ENTER(canTailCall);
+ bool temp = wrapHnd->canTailCall(callerHnd, declaredCalleeHnd, exactCalleeHnd, fIsTailPrefix);
+ API_LEAVE(canTailCall);
+ return temp;
+}
+
+void WrapICorJitInfo::reportTailCallDecision(CORINFO_METHOD_HANDLE callerHnd,
+ CORINFO_METHOD_HANDLE calleeHnd,
+ bool fIsTailPrefix,
+ CorInfoTailCall tailCallResult,
+ const char * reason)
+{
+ API_ENTER(reportTailCallDecision);
+ wrapHnd->reportTailCallDecision(callerHnd, calleeHnd, fIsTailPrefix, tailCallResult, reason);
+ API_LEAVE(reportTailCallDecision);
+}
+
+void WrapICorJitInfo::getEHinfo(
+ CORINFO_METHOD_HANDLE ftn, /* IN */
+ unsigned EHnumber, /* IN */
+ CORINFO_EH_CLAUSE* clause /* OUT */)
+{
+ API_ENTER(getEHinfo);
+ wrapHnd->getEHinfo(ftn, EHnumber, clause);
+ API_LEAVE(getEHinfo);
+}
+
+CORINFO_CLASS_HANDLE WrapICorJitInfo::getMethodClass(
+ CORINFO_METHOD_HANDLE method)
+{
+ API_ENTER(getMethodClass);
+ CORINFO_CLASS_HANDLE temp = wrapHnd->getMethodClass(method);
+ API_LEAVE(getMethodClass);
+ return temp;
+}
+
+CORINFO_MODULE_HANDLE WrapICorJitInfo::getMethodModule(
+ CORINFO_METHOD_HANDLE method)
+{
+ API_ENTER(getMethodModule);
+ CORINFO_MODULE_HANDLE temp = wrapHnd->getMethodModule(method);
+ API_LEAVE(getMethodModule);
+ return temp;
+}
+
+void WrapICorJitInfo::getMethodVTableOffset(
+ CORINFO_METHOD_HANDLE method, /* IN */
+ unsigned* offsetOfIndirection, /* OUT */
+ unsigned* offsetAfterIndirection /* OUT */)
+{
+ API_ENTER(getMethodVTableOffset);
+ wrapHnd->getMethodVTableOffset(method, offsetOfIndirection, offsetAfterIndirection);
+ API_LEAVE(getMethodVTableOffset);
+}
+
+#if COR_JIT_EE_VERSION > 460
+
+CorInfoIntrinsics WrapICorJitInfo::getIntrinsicID(
+ CORINFO_METHOD_HANDLE method,
+ bool* pMustExpand /* OUT */)
+{
+ API_ENTER(getIntrinsicID);
+ CorInfoIntrinsics temp = wrapHnd->getIntrinsicID(method, pMustExpand);
+ API_LEAVE(getIntrinsicID);
+ return temp;
+}
+
+#else
+
+CorInfoIntrinsics WrapICorJitInfo::getIntrinsicID(CORINFO_METHOD_HANDLE method)
+{
+ API_ENTER(getIntrinsicID);
+ CorInfoIntrinsics temp = wrapHnd->getIntrinsicID(method);
+ API_LEAVE(getIntrinsicID);
+ return temp;
+}
+
+#endif
+
+bool WrapICorJitInfo::isInSIMDModule(CORINFO_CLASS_HANDLE classHnd)
+{
+ API_ENTER(isInSIMDModule);
+ bool temp = wrapHnd->isInSIMDModule(classHnd);
+ API_LEAVE(isInSIMDModule);
+ return temp;
+}
+
+CorInfoUnmanagedCallConv WrapICorJitInfo::getUnmanagedCallConv(
+ CORINFO_METHOD_HANDLE method)
+{
+ API_ENTER(getUnmanagedCallConv);
+ CorInfoUnmanagedCallConv temp = wrapHnd->getUnmanagedCallConv(method);
+ API_LEAVE(getUnmanagedCallConv);
+ return temp;
+}
+
+BOOL WrapICorJitInfo::pInvokeMarshalingRequired(
+ CORINFO_METHOD_HANDLE method,
+ CORINFO_SIG_INFO* callSiteSig)
+{
+ API_ENTER(pInvokeMarshalingRequired);
+ BOOL temp = wrapHnd->pInvokeMarshalingRequired(method, callSiteSig);
+ API_LEAVE(pInvokeMarshalingRequired);
+ return temp;
+}
+
+BOOL WrapICorJitInfo::satisfiesMethodConstraints(
+ CORINFO_CLASS_HANDLE parent, // the exact parent of the method
+ CORINFO_METHOD_HANDLE method)
+{
+ API_ENTER(satisfiesMethodConstraints);
+ BOOL temp = wrapHnd->satisfiesMethodConstraints(parent, method);
+ API_LEAVE(satisfiesMethodConstraints);
+ return temp;
+}
+
+BOOL WrapICorJitInfo::isCompatibleDelegate(
+ CORINFO_CLASS_HANDLE objCls,
+ CORINFO_CLASS_HANDLE methodParentCls,
+ CORINFO_METHOD_HANDLE method,
+ CORINFO_CLASS_HANDLE delegateCls,
+ BOOL *pfIsOpenDelegate)
+{
+ API_ENTER(isCompatibleDelegate);
+ BOOL temp = wrapHnd->isCompatibleDelegate(objCls, methodParentCls, method, delegateCls, pfIsOpenDelegate);
+ API_LEAVE(isCompatibleDelegate);
+ return temp;
+}
+
+BOOL WrapICorJitInfo::isDelegateCreationAllowed(
+ CORINFO_CLASS_HANDLE delegateHnd,
+ CORINFO_METHOD_HANDLE calleeHnd)
+{
+ API_ENTER(isDelegateCreationAllowed);
+ BOOL temp = wrapHnd->isDelegateCreationAllowed(delegateHnd, calleeHnd);
+ API_LEAVE(isDelegateCreationAllowed);
+ return temp;
+}
+
+
+CorInfoInstantiationVerification WrapICorJitInfo::isInstantiationOfVerifiedGeneric(
+ CORINFO_METHOD_HANDLE method /* IN */)
+{
+ API_ENTER(isInstantiationOfVerifiedGeneric);
+ CorInfoInstantiationVerification temp = wrapHnd->isInstantiationOfVerifiedGeneric(method);
+ API_LEAVE(isInstantiationOfVerifiedGeneric);
+ return temp;
+}
+
+void WrapICorJitInfo::initConstraintsForVerification(
+ CORINFO_METHOD_HANDLE method, /* IN */
+ BOOL *pfHasCircularClassConstraints, /* OUT */
+ BOOL *pfHasCircularMethodConstraint /* OUT */)
+{
+ API_ENTER(initConstraintsForVerification);
+ wrapHnd->initConstraintsForVerification(method, pfHasCircularClassConstraints, pfHasCircularMethodConstraint);
+ API_LEAVE(initConstraintsForVerification);
+}
+
+CorInfoCanSkipVerificationResult WrapICorJitInfo::canSkipMethodVerification(
+ CORINFO_METHOD_HANDLE ftnHandle)
+{
+ API_ENTER(canSkipMethodVerification);
+ CorInfoCanSkipVerificationResult temp = wrapHnd->canSkipMethodVerification(ftnHandle);
+ API_LEAVE(canSkipMethodVerification);
+ return temp;
+}
+
+void WrapICorJitInfo::methodMustBeLoadedBeforeCodeIsRun(
+ CORINFO_METHOD_HANDLE method)
+{
+ API_ENTER(methodMustBeLoadedBeforeCodeIsRun);
+ wrapHnd->methodMustBeLoadedBeforeCodeIsRun(method);
+ API_LEAVE(methodMustBeLoadedBeforeCodeIsRun);
+}
+
+CORINFO_METHOD_HANDLE WrapICorJitInfo::mapMethodDeclToMethodImpl(
+ CORINFO_METHOD_HANDLE method)
+{
+ API_ENTER(mapMethodDeclToMethodImpl);
+ CORINFO_METHOD_HANDLE temp = wrapHnd->mapMethodDeclToMethodImpl(method);
+ API_LEAVE(mapMethodDeclToMethodImpl);
+ return temp;
+}
+
+void WrapICorJitInfo::getGSCookie(
+ GSCookie * pCookieVal,
+ GSCookie ** ppCookieVal )
+{
+ API_ENTER(getGSCookie);
+ wrapHnd->getGSCookie(pCookieVal, ppCookieVal);
+ API_LEAVE(getGSCookie);
+}
+
+/**********************************************************************************/
+//
+// ICorModuleInfo
+//
+/**********************************************************************************/
+
+void WrapICorJitInfo::resolveToken(/* IN, OUT */ CORINFO_RESOLVED_TOKEN * pResolvedToken)
+{
+ API_ENTER(resolveToken);
+ wrapHnd->resolveToken(pResolvedToken);
+ API_LEAVE(resolveToken);
+}
+
+#if COR_JIT_EE_VERSION > 460
+
+bool WrapICorJitInfo::tryResolveToken(/* IN, OUT */ CORINFO_RESOLVED_TOKEN * pResolvedToken)
+{
+ API_ENTER(tryResolveToken);
+ bool success = wrapHnd->tryResolveToken(pResolvedToken);
+ API_LEAVE(tryResolveToken);
+ return success;
+}
+
+#endif
+
+void WrapICorJitInfo::findSig(
+ CORINFO_MODULE_HANDLE module,
+ unsigned sigTOK,
+ CORINFO_CONTEXT_HANDLE context,
+ CORINFO_SIG_INFO *sig )
+{
+ API_ENTER(findSig);
+ wrapHnd->findSig(module, sigTOK, context, sig);
+ API_LEAVE(findSig);
+}
+
+void WrapICorJitInfo::findCallSiteSig(
+ CORINFO_MODULE_HANDLE module, /* IN */
+ unsigned methTOK, /* IN */
+ CORINFO_CONTEXT_HANDLE context, /* IN */
+ CORINFO_SIG_INFO *sig /* OUT */)
+{
+ API_ENTER(findCallSiteSig);
+ wrapHnd->findCallSiteSig(module, methTOK, context, sig);
+ API_LEAVE(findCallSiteSig);
+}
+
+CORINFO_CLASS_HANDLE WrapICorJitInfo::getTokenTypeAsHandle(
+ CORINFO_RESOLVED_TOKEN * pResolvedToken /* IN */)
+{
+ API_ENTER(getTokenTypeAsHandle);
+ CORINFO_CLASS_HANDLE temp = wrapHnd->getTokenTypeAsHandle(pResolvedToken);
+ API_LEAVE(getTokenTypeAsHandle);
+ return temp;
+}
+
+CorInfoCanSkipVerificationResult WrapICorJitInfo::canSkipVerification(
+ CORINFO_MODULE_HANDLE module /* IN */)
+{
+ API_ENTER(canSkipVerification);
+ CorInfoCanSkipVerificationResult temp = wrapHnd->canSkipVerification(module);
+ API_LEAVE(canSkipVerification);
+ return temp;
+}
+
+BOOL WrapICorJitInfo::isValidToken(
+ CORINFO_MODULE_HANDLE module, /* IN */
+ unsigned metaTOK /* IN */)
+{
+ API_ENTER(isValidToken);
+ BOOL result = wrapHnd->isValidToken(module, metaTOK);
+ API_LEAVE(isValidToken);
+ return result;
+}
+
+BOOL WrapICorJitInfo::isValidStringRef(
+ CORINFO_MODULE_HANDLE module, /* IN */
+ unsigned metaTOK /* IN */)
+{
+ API_ENTER(isValidStringRef);
+ BOOL temp = wrapHnd->isValidStringRef(module, metaTOK);
+ API_LEAVE(isValidStringRef);
+ return temp;
+}
+
+BOOL WrapICorJitInfo::shouldEnforceCallvirtRestriction(
+ CORINFO_MODULE_HANDLE scope)
+{
+ API_ENTER(shouldEnforceCallvirtRestriction);
+ BOOL temp = wrapHnd->shouldEnforceCallvirtRestriction(scope);
+ API_LEAVE(shouldEnforceCallvirtRestriction);
+ return temp;
+}
+
+/**********************************************************************************/
+//
+// ICorClassInfo
+//
+/**********************************************************************************/
+
+CorInfoType WrapICorJitInfo::asCorInfoType(CORINFO_CLASS_HANDLE cls)
+{
+ API_ENTER(asCorInfoType);
+ CorInfoType temp = wrapHnd->asCorInfoType(cls);
+ API_LEAVE(asCorInfoType);
+ return temp;
+}
+
+const char* WrapICorJitInfo::getClassName(CORINFO_CLASS_HANDLE cls)
+{
+ API_ENTER(getClassName);
+ const char* result = wrapHnd->getClassName(cls);
+ API_LEAVE(getClassName);
+ return result;
+}
+
+int WrapICorJitInfo::appendClassName(
+ __deref_inout_ecount(*pnBufLen) WCHAR** ppBuf,
+ int* pnBufLen,
+ CORINFO_CLASS_HANDLE cls,
+ BOOL fNamespace,
+ BOOL fFullInst,
+ BOOL fAssembly)
+{
+ API_ENTER(appendClassName);
+ WCHAR* pBuf = *ppBuf;
+ int nLen = wrapHnd->appendClassName(ppBuf, pnBufLen, cls, fNamespace, fFullInst, fAssembly);
+ API_LEAVE(appendClassName);
+ return nLen;
+}
+
+BOOL WrapICorJitInfo::isValueClass(CORINFO_CLASS_HANDLE cls)
+{
+ API_ENTER(isValueClass);
+ BOOL temp = wrapHnd->isValueClass(cls);
+ API_LEAVE(isValueClass);
+ return temp;
+}
+
+BOOL WrapICorJitInfo::canInlineTypeCheckWithObjectVTable(CORINFO_CLASS_HANDLE cls)
+{
+ API_ENTER(canInlineTypeCheckWithObjectVTable);
+ BOOL temp = wrapHnd->canInlineTypeCheckWithObjectVTable(cls);
+ API_LEAVE(canInlineTypeCheckWithObjectVTable);
+ return temp;
+}
+
+DWORD WrapICorJitInfo::getClassAttribs(
+ CORINFO_CLASS_HANDLE cls)
+{
+ API_ENTER(getClassAttribs);
+ DWORD temp = wrapHnd->getClassAttribs(cls);
+ API_LEAVE(getClassAttribs);
+ return temp;
+}
+
+BOOL WrapICorJitInfo::isStructRequiringStackAllocRetBuf(CORINFO_CLASS_HANDLE cls)
+{
+ API_ENTER(isStructRequiringStackAllocRetBuf);
+ BOOL temp = wrapHnd->isStructRequiringStackAllocRetBuf(cls);
+ API_LEAVE(isStructRequiringStackAllocRetBuf);
+ return temp;
+}
+
+CORINFO_MODULE_HANDLE WrapICorJitInfo::getClassModule(
+ CORINFO_CLASS_HANDLE cls)
+{
+ API_ENTER(getClassModule);
+ CORINFO_MODULE_HANDLE result = wrapHnd->getClassModule(cls);
+ API_LEAVE(getClassModule);
+ return result;
+}
+
+CORINFO_ASSEMBLY_HANDLE WrapICorJitInfo::getModuleAssembly(
+ CORINFO_MODULE_HANDLE mod)
+{
+ API_ENTER(getModuleAssembly);
+ CORINFO_ASSEMBLY_HANDLE result = wrapHnd->getModuleAssembly(mod);
+ API_LEAVE(getModuleAssembly);
+ return result;
+}
+
+const char* WrapICorJitInfo::getAssemblyName(
+ CORINFO_ASSEMBLY_HANDLE assem)
+{
+ API_ENTER(getAssemblyName);
+ const char* result = wrapHnd->getAssemblyName(assem);
+ API_LEAVE(getAssemblyName);
+ return result;
+}
+
+void* WrapICorJitInfo::LongLifetimeMalloc(size_t sz)
+{
+ API_ENTER(LongLifetimeMalloc);
+ void* result = wrapHnd->LongLifetimeMalloc(sz);
+ API_LEAVE(LongLifetimeMalloc);
+ return result;
+}
+
+void WrapICorJitInfo::LongLifetimeFree(void* obj)
+{
+ API_ENTER(LongLifetimeFree);
+ wrapHnd->LongLifetimeFree(obj);
+ API_LEAVE(LongLifetimeFree);
+}
+
+size_t WrapICorJitInfo::getClassModuleIdForStatics(
+ CORINFO_CLASS_HANDLE cls,
+ CORINFO_MODULE_HANDLE *pModule,
+ void **ppIndirection)
+{
+ API_ENTER(getClassModuleIdForStatics);
+ size_t temp = wrapHnd->getClassModuleIdForStatics(cls, pModule, ppIndirection);
+ API_LEAVE(getClassModuleIdForStatics);
+ return temp;
+}
+
+unsigned WrapICorJitInfo::getClassSize(CORINFO_CLASS_HANDLE cls)
+{
+ API_ENTER(getClassSize);
+ unsigned temp = wrapHnd->getClassSize(cls);
+ API_LEAVE(getClassSize);
+ return temp;
+}
+
+unsigned WrapICorJitInfo::getClassAlignmentRequirement(
+ CORINFO_CLASS_HANDLE cls,
+ BOOL fDoubleAlignHint)
+{
+ API_ENTER(getClassAlignmentRequirement);
+ unsigned temp = wrapHnd->getClassAlignmentRequirement(cls, fDoubleAlignHint);
+ API_LEAVE(getClassAlignmentRequirement);
+ return temp;
+}
+
+unsigned WrapICorJitInfo::getClassGClayout(
+ CORINFO_CLASS_HANDLE cls, /* IN */
+ BYTE *gcPtrs /* OUT */)
+{
+ API_ENTER(getClassGClayout);
+ unsigned temp = wrapHnd->getClassGClayout(cls, gcPtrs);
+ API_LEAVE(getClassGClayout);
+ return temp;
+}
+
+unsigned WrapICorJitInfo::getClassNumInstanceFields(
+ CORINFO_CLASS_HANDLE cls /* IN */)
+{
+ API_ENTER(getClassNumInstanceFields);
+ unsigned temp = wrapHnd->getClassNumInstanceFields(cls);
+ API_LEAVE(getClassNumInstanceFields);
+ return temp;
+}
+
+CORINFO_FIELD_HANDLE WrapICorJitInfo::getFieldInClass(
+ CORINFO_CLASS_HANDLE clsHnd,
+ INT num)
+{
+ API_ENTER(getFieldInClass);
+ CORINFO_FIELD_HANDLE temp = wrapHnd->getFieldInClass(clsHnd, num);
+ API_LEAVE(getFieldInClass);
+ return temp;
+}
+
+BOOL WrapICorJitInfo::checkMethodModifier(
+ CORINFO_METHOD_HANDLE hMethod,
+ LPCSTR modifier,
+ BOOL fOptional)
+{
+ API_ENTER(checkMethodModifier);
+ BOOL result = wrapHnd->checkMethodModifier(hMethod, modifier, fOptional);
+ API_LEAVE(checkMethodModifier);
+ return result;
+}
+
+CorInfoHelpFunc WrapICorJitInfo::getNewHelper(
+ CORINFO_RESOLVED_TOKEN * pResolvedToken,
+ CORINFO_METHOD_HANDLE callerHandle)
+{
+ API_ENTER(getNewHelper);
+ CorInfoHelpFunc temp = wrapHnd->getNewHelper(pResolvedToken, callerHandle);
+ API_LEAVE(getNewHelper);
+ return temp;
+}
+
+CorInfoHelpFunc WrapICorJitInfo::getNewArrHelper(
+ CORINFO_CLASS_HANDLE arrayCls)
+{
+ API_ENTER(getNewArrHelper);
+ CorInfoHelpFunc temp = wrapHnd->getNewArrHelper(arrayCls);
+ API_LEAVE(getNewArrHelper);
+ return temp;
+}
+
+CorInfoHelpFunc WrapICorJitInfo::getCastingHelper(
+ CORINFO_RESOLVED_TOKEN * pResolvedToken,
+ bool fThrowing)
+{
+ API_ENTER(getCastingHelper);
+ CorInfoHelpFunc temp = wrapHnd->getCastingHelper(pResolvedToken, fThrowing);
+ API_LEAVE(getCastingHelper);
+ return temp;
+}
+
+CorInfoHelpFunc WrapICorJitInfo::getSharedCCtorHelper(
+ CORINFO_CLASS_HANDLE clsHnd)
+{
+ API_ENTER(getSharedCCtorHelper);
+ CorInfoHelpFunc temp = wrapHnd->getSharedCCtorHelper(clsHnd);
+ API_LEAVE(getSharedCCtorHelper);
+ return temp;
+}
+
+CorInfoHelpFunc WrapICorJitInfo::getSecurityPrologHelper(
+ CORINFO_METHOD_HANDLE ftn)
+{
+ API_ENTER(getSecurityPrologHelper);
+ CorInfoHelpFunc temp = wrapHnd->getSecurityPrologHelper(ftn);
+ API_LEAVE(getSecurityPrologHelper);
+ return temp;
+}
+
+CORINFO_CLASS_HANDLE WrapICorJitInfo::getTypeForBox(
+ CORINFO_CLASS_HANDLE cls)
+{
+ API_ENTER(getTypeForBox);
+ CORINFO_CLASS_HANDLE temp = wrapHnd->getTypeForBox(cls);
+ API_LEAVE(getTypeForBox);
+ return temp;
+}
+
+CorInfoHelpFunc WrapICorJitInfo::getBoxHelper(
+ CORINFO_CLASS_HANDLE cls)
+{
+ API_ENTER(getBoxHelper);
+ CorInfoHelpFunc temp = wrapHnd->getBoxHelper(cls);
+ API_LEAVE(getBoxHelper);
+ return temp;
+}
+
+CorInfoHelpFunc WrapICorJitInfo::getUnBoxHelper(
+ CORINFO_CLASS_HANDLE cls)
+{
+ API_ENTER(getUnBoxHelper);
+ CorInfoHelpFunc temp = wrapHnd->getUnBoxHelper(cls);
+ API_LEAVE(getUnBoxHelper);
+ return temp;
+}
+
+#if COR_JIT_EE_VERSION > 460
+
+bool WrapICorJitInfo::getReadyToRunHelper(
+ CORINFO_RESOLVED_TOKEN * pResolvedToken,
+ CORINFO_LOOKUP_KIND * pGenericLookupKind,
+ CorInfoHelpFunc id,
+ CORINFO_CONST_LOOKUP * pLookup)
+{
+ API_ENTER(getReadyToRunHelper);
+ bool result = wrapHnd->getReadyToRunHelper(pResolvedToken, pGenericLookupKind, id, pLookup);
+ API_LEAVE(getReadyToRunHelper);
+ return result;
+}
+
+void WrapICorJitInfo::getReadyToRunDelegateCtorHelper(
+ CORINFO_RESOLVED_TOKEN * pTargetMethod,
+ CORINFO_CLASS_HANDLE delegateType,
+ CORINFO_CONST_LOOKUP * pLookup)
+{
+ API_ENTER(getReadyToRunDelegateCtorHelper);
+ wrapHnd->getReadyToRunDelegateCtorHelper(pTargetMethod, delegateType, pLookup);
+ API_LEAVE(getReadyToRunDelegateCtorHelper);
+}
+
+#else
+
+void WrapICorJitInfo::getReadyToRunHelper(
+ CORINFO_RESOLVED_TOKEN * pResolvedToken,
+ CorInfoHelpFunc id,
+ CORINFO_CONST_LOOKUP * pLookup)
+{
+ API_ENTER(getReadyToRunHelper);
+ wrapHnd->getReadyToRunHelper(pResolvedToken, id, pLookup);
+ API_LEAVE(getReadyToRunHelper);
+}
+
+#endif
+
+const char* WrapICorJitInfo::getHelperName(
+ CorInfoHelpFunc funcNum)
+{
+ API_ENTER(getHelperName);
+ const char* temp = wrapHnd->getHelperName(funcNum);
+ API_LEAVE(getHelperName);
+ return temp;
+}
+
+CorInfoInitClassResult WrapICorJitInfo::initClass(
+ CORINFO_FIELD_HANDLE field,
+
+ CORINFO_METHOD_HANDLE method,
+ CORINFO_CONTEXT_HANDLE context,
+ BOOL speculative)
+{
+ API_ENTER(initClass);
+ CorInfoInitClassResult temp = wrapHnd->initClass(field, method, context, speculative);
+ API_LEAVE(initClass);
+ return temp;
+}
+
+void WrapICorJitInfo::classMustBeLoadedBeforeCodeIsRun(
+ CORINFO_CLASS_HANDLE cls)
+{
+ API_ENTER(classMustBeLoadedBeforeCodeIsRun);
+ wrapHnd->classMustBeLoadedBeforeCodeIsRun(cls);
+ API_LEAVE(classMustBeLoadedBeforeCodeIsRun);
+}
+
+CORINFO_CLASS_HANDLE WrapICorJitInfo::getBuiltinClass(
+ CorInfoClassId classId)
+{
+ API_ENTER(getBuiltinClass);
+ CORINFO_CLASS_HANDLE temp = wrapHnd->getBuiltinClass(classId);
+ API_LEAVE(getBuiltinClass);
+ return temp;
+}
+
+CorInfoType WrapICorJitInfo::getTypeForPrimitiveValueClass(
+ CORINFO_CLASS_HANDLE cls)
+{
+ API_ENTER(getTypeForPrimitiveValueClass);
+ CorInfoType temp = wrapHnd->getTypeForPrimitiveValueClass(cls);
+ API_LEAVE(getTypeForPrimitiveValueClass);
+ return temp;
+}
+
+BOOL WrapICorJitInfo::canCast(
+ CORINFO_CLASS_HANDLE child,
+ CORINFO_CLASS_HANDLE parent )
+{
+ API_ENTER(canCast);
+ BOOL temp = wrapHnd->canCast(child, parent);
+ API_LEAVE(canCast);
+ return temp;
+}
+
+BOOL WrapICorJitInfo::areTypesEquivalent(
+ CORINFO_CLASS_HANDLE cls1,
+ CORINFO_CLASS_HANDLE cls2)
+{
+ API_ENTER(areTypesEquivalent);
+ BOOL temp = wrapHnd->areTypesEquivalent(cls1, cls2);
+ API_LEAVE(areTypesEquivalent);
+ return temp;
+}
+
+CORINFO_CLASS_HANDLE WrapICorJitInfo::mergeClasses(
+ CORINFO_CLASS_HANDLE cls1,
+ CORINFO_CLASS_HANDLE cls2)
+{
+ API_ENTER(mergeClasses);
+ CORINFO_CLASS_HANDLE temp = wrapHnd->mergeClasses(cls1, cls2);
+ API_LEAVE(mergeClasses);
+ return temp;
+}
+
+CORINFO_CLASS_HANDLE WrapICorJitInfo::getParentType(
+ CORINFO_CLASS_HANDLE cls)
+{
+ API_ENTER(getParentType);
+ CORINFO_CLASS_HANDLE temp = wrapHnd->getParentType(cls);
+ API_LEAVE(getParentType);
+ return temp;
+}
+
+CorInfoType WrapICorJitInfo::getChildType(
+ CORINFO_CLASS_HANDLE clsHnd,
+ CORINFO_CLASS_HANDLE *clsRet)
+{
+ API_ENTER(getChildType);
+ CorInfoType temp = wrapHnd->getChildType(clsHnd, clsRet);
+ API_LEAVE(getChildType);
+ return temp;
+}
+
+BOOL WrapICorJitInfo::satisfiesClassConstraints(
+ CORINFO_CLASS_HANDLE cls)
+{
+ API_ENTER(satisfiesClassConstraints);
+ BOOL temp = wrapHnd->satisfiesClassConstraints(cls);
+ API_LEAVE(satisfiesClassConstraints);
+ return temp;
+
+}
+
+BOOL WrapICorJitInfo::isSDArray(
+ CORINFO_CLASS_HANDLE cls)
+{
+ API_ENTER(isSDArray);
+ BOOL temp = wrapHnd->isSDArray(cls);
+ API_LEAVE(isSDArray);
+ return temp;
+}
+
+unsigned WrapICorJitInfo::getArrayRank(
+ CORINFO_CLASS_HANDLE cls)
+{
+ API_ENTER(getArrayRank);
+ unsigned result = wrapHnd->getArrayRank(cls);
+ API_LEAVE(getArrayRank);
+ return result;
+}
+
+void * WrapICorJitInfo::getArrayInitializationData(
+ CORINFO_FIELD_HANDLE field,
+ DWORD size)
+{
+ API_ENTER(getArrayInitializationData);
+ void *temp = wrapHnd->getArrayInitializationData(field, size);
+ API_LEAVE(getArrayInitializationData);
+ return temp;
+}
+
+CorInfoIsAccessAllowedResult WrapICorJitInfo::canAccessClass(
+ CORINFO_RESOLVED_TOKEN * pResolvedToken,
+ CORINFO_METHOD_HANDLE callerHandle,
+ CORINFO_HELPER_DESC *pAccessHelper)
+{
+ API_ENTER(canAccessClass);
+ CorInfoIsAccessAllowedResult temp = wrapHnd->canAccessClass(pResolvedToken, callerHandle, pAccessHelper);
+ API_LEAVE(canAccessClass);
+ return temp;
+}
+
+/**********************************************************************************/
+//
+// ICorFieldInfo
+//
+/**********************************************************************************/
+
+const char* WrapICorJitInfo::getFieldName(
+ CORINFO_FIELD_HANDLE ftn, /* IN */
+ const char **moduleName /* OUT */)
+{
+ API_ENTER(getFieldName);
+ const char* temp = wrapHnd->getFieldName(ftn, moduleName);
+ API_LEAVE(getFieldName);
+ return temp;
+}
+
+CORINFO_CLASS_HANDLE WrapICorJitInfo::getFieldClass(
+ CORINFO_FIELD_HANDLE field)
+{
+ API_ENTER(getFieldClass);
+ CORINFO_CLASS_HANDLE temp = wrapHnd->getFieldClass(field);
+ API_LEAVE(getFieldClass);
+ return temp;
+}
+
+CorInfoType WrapICorJitInfo::getFieldType(
+ CORINFO_FIELD_HANDLE field,
+ CORINFO_CLASS_HANDLE *structType,
+ CORINFO_CLASS_HANDLE memberParent/* IN */)
+{
+ API_ENTER(getFieldType);
+ CorInfoType temp = wrapHnd->getFieldType(field, structType, memberParent);
+ API_LEAVE(getFieldType);
+ return temp;
+}
+
+unsigned WrapICorJitInfo::getFieldOffset(
+ CORINFO_FIELD_HANDLE field)
+{
+ API_ENTER(getFieldOffset);
+ unsigned temp = wrapHnd->getFieldOffset(field);
+ API_LEAVE(getFieldOffset);
+ return temp;
+}
+
+bool WrapICorJitInfo::isWriteBarrierHelperRequired(
+ CORINFO_FIELD_HANDLE field)
+{
+ API_ENTER(isWriteBarrierHelperRequired);
+ bool result = wrapHnd->isWriteBarrierHelperRequired(field);
+ API_LEAVE(isWriteBarrierHelperRequired);
+ return result;
+}
+
+void WrapICorJitInfo::getFieldInfo(CORINFO_RESOLVED_TOKEN * pResolvedToken,
+ CORINFO_METHOD_HANDLE callerHandle,
+ CORINFO_ACCESS_FLAGS flags,
+ CORINFO_FIELD_INFO *pResult)
+{
+ API_ENTER(getFieldInfo);
+ wrapHnd->getFieldInfo(pResolvedToken, callerHandle, flags, pResult);
+ API_LEAVE(getFieldInfo);
+}
+
+bool WrapICorJitInfo::isFieldStatic(CORINFO_FIELD_HANDLE fldHnd)
+{
+ API_ENTER(isFieldStatic);
+ bool result = wrapHnd->isFieldStatic(fldHnd);
+ API_LEAVE(isFieldStatic);
+ return result;
+}
+
+/*********************************************************************************/
+//
+// ICorDebugInfo
+//
+/*********************************************************************************/
+
+void WrapICorJitInfo::getBoundaries(
+ CORINFO_METHOD_HANDLE ftn,
+ unsigned int *cILOffsets,
+ DWORD **pILOffsets,
+
+ ICorDebugInfo::BoundaryTypes *implictBoundaries)
+{
+ API_ENTER(getBoundaries);
+ wrapHnd->getBoundaries(ftn, cILOffsets, pILOffsets, implictBoundaries);
+ API_LEAVE(getBoundaries);
+}
+
+void WrapICorJitInfo::setBoundaries(
+ CORINFO_METHOD_HANDLE ftn,
+ ULONG32 cMap,
+ ICorDebugInfo::OffsetMapping *pMap)
+{
+ API_ENTER(setBoundaries);
+ wrapHnd->setBoundaries(ftn, cMap, pMap);
+ API_LEAVE(setBoundaries);
+}
+
+void WrapICorJitInfo::getVars(
+ CORINFO_METHOD_HANDLE ftn,
+ ULONG32 *cVars,
+ ICorDebugInfo::ILVarInfo **vars,
+ bool *extendOthers)
+
+{
+ API_ENTER(getVars);
+ wrapHnd->getVars(ftn, cVars, vars, extendOthers);
+ API_LEAVE(getVars);
+}
+
+void WrapICorJitInfo::setVars(
+ CORINFO_METHOD_HANDLE ftn,
+ ULONG32 cVars,
+ ICorDebugInfo::NativeVarInfo *vars)
+
+{
+ API_ENTER(setVars);
+ wrapHnd->setVars(ftn, cVars, vars);
+ API_LEAVE(setVars);
+}
+
+void * WrapICorJitInfo::allocateArray(
+ ULONG cBytes)
+{
+ API_ENTER(allocateArray);
+ void *temp = wrapHnd->allocateArray(cBytes);
+ API_LEAVE(allocateArray);
+ return temp;
+}
+
+void WrapICorJitInfo::freeArray(
+ void *array)
+{
+ API_ENTER(freeArray);
+ wrapHnd->freeArray(array);
+ API_LEAVE(freeArray);
+}
+
+/*********************************************************************************/
+//
+// ICorArgInfo
+//
+/*********************************************************************************/
+
+CORINFO_ARG_LIST_HANDLE WrapICorJitInfo::getArgNext(
+ CORINFO_ARG_LIST_HANDLE args /* IN */)
+{
+ API_ENTER(getArgNext);
+ CORINFO_ARG_LIST_HANDLE temp = wrapHnd->getArgNext(args);
+ API_LEAVE(getArgNext);
+ return temp;
+}
+
+CorInfoTypeWithMod WrapICorJitInfo::getArgType(
+ CORINFO_SIG_INFO* sig, /* IN */
+ CORINFO_ARG_LIST_HANDLE args, /* IN */
+ CORINFO_CLASS_HANDLE *vcTypeRet /* OUT */)
+{
+ API_ENTER(getArgType);
+ CorInfoTypeWithMod temp = wrapHnd->getArgType(sig, args, vcTypeRet);
+ API_LEAVE(getArgType);
+ return temp;
+}
+
+CORINFO_CLASS_HANDLE WrapICorJitInfo::getArgClass(
+ CORINFO_SIG_INFO* sig, /* IN */
+ CORINFO_ARG_LIST_HANDLE args /* IN */)
+{
+ API_ENTER(getArgClass);
+ CORINFO_CLASS_HANDLE temp = wrapHnd->getArgClass(sig, args);
+ API_LEAVE(getArgClass);
+ return temp;
+}
+
+CorInfoType WrapICorJitInfo::getHFAType(
+ CORINFO_CLASS_HANDLE hClass)
+{
+ API_ENTER(getHFAType);
+ CorInfoType temp = wrapHnd->getHFAType(hClass);
+ API_LEAVE(getHFAType);
+ return temp;
+}
+
+HRESULT WrapICorJitInfo::GetErrorHRESULT(
+ struct _EXCEPTION_POINTERS *pExceptionPointers)
+{
+ API_ENTER(GetErrorHRESULT);
+ HRESULT temp = wrapHnd->GetErrorHRESULT(pExceptionPointers);
+ API_LEAVE(GetErrorHRESULT);
+ return temp;
+}
+
+ULONG WrapICorJitInfo::GetErrorMessage(
+ __inout_ecount(bufferLength) LPWSTR buffer,
+ ULONG bufferLength)
+{
+ API_ENTER(GetErrorMessage);
+ ULONG temp = wrapHnd->GetErrorMessage(buffer, bufferLength);
+ API_LEAVE(GetErrorMessage);
+ return temp;
+}
+
+int WrapICorJitInfo::FilterException(
+ struct _EXCEPTION_POINTERS *pExceptionPointers)
+{
+ API_ENTER(FilterException);
+ int temp = wrapHnd->FilterException(pExceptionPointers);
+ API_LEAVE(FilterException);
+ return temp;
+}
+
+void WrapICorJitInfo::HandleException(
+ struct _EXCEPTION_POINTERS *pExceptionPointers)
+{
+ API_ENTER(HandleException);
+ wrapHnd->HandleException(pExceptionPointers);
+ API_LEAVE(HandleException);
+}
+
+void WrapICorJitInfo::ThrowExceptionForJitResult(
+ HRESULT result)
+{
+ API_ENTER(ThrowExceptionForJitResult);
+ wrapHnd->ThrowExceptionForJitResult(result);
+ API_LEAVE(ThrowExceptionForJitResult);
+}
+
+void WrapICorJitInfo::ThrowExceptionForHelper(
+ const CORINFO_HELPER_DESC * throwHelper)
+{
+ API_ENTER(ThrowExceptionForHelper);
+ wrapHnd->ThrowExceptionForHelper(throwHelper);
+ API_LEAVE(ThrowExceptionForHelper);
+}
+
+void WrapICorJitInfo::getEEInfo(
+ CORINFO_EE_INFO *pEEInfoOut)
+{
+ API_ENTER(getEEInfo);
+ wrapHnd->getEEInfo(pEEInfoOut);
+ API_LEAVE(getEEInfo);
+}
+
+LPCWSTR WrapICorJitInfo::getJitTimeLogFilename()
+{
+ API_ENTER(getJitTimeLogFilename);
+ LPCWSTR temp = wrapHnd->getJitTimeLogFilename();
+ API_LEAVE(getJitTimeLogFilename);
+ return temp;
+}
+
+mdMethodDef WrapICorJitInfo::getMethodDefFromMethod(
+ CORINFO_METHOD_HANDLE hMethod)
+{
+ API_ENTER(getMethodDefFromMethod);
+ mdMethodDef result = wrapHnd->getMethodDefFromMethod(hMethod);
+ API_LEAVE(getMethodDefFromMethod);
+ return result;
+}
+
+const char* WrapICorJitInfo::getMethodName(
+ CORINFO_METHOD_HANDLE ftn, /* IN */
+ const char **moduleName /* OUT */)
+{
+ API_ENTER(getMethodName);
+ const char* temp = wrapHnd->getMethodName(ftn, moduleName);
+ API_LEAVE(getMethodName);
+ return temp;
+}
+
+unsigned WrapICorJitInfo::getMethodHash(
+ CORINFO_METHOD_HANDLE ftn /* IN */)
+{
+ API_ENTER(getMethodHash);
+ unsigned temp = wrapHnd->getMethodHash(ftn);
+ API_LEAVE(getMethodHash);
+ return temp;
+}
+
+size_t WrapICorJitInfo::findNameOfToken(
+ CORINFO_MODULE_HANDLE module, /* IN */
+ mdToken metaTOK, /* IN */
+ __out_ecount(FQNameCapacity) char * szFQName, /* OUT */
+ size_t FQNameCapacity /* IN */)
+{
+ API_ENTER(findNameOfToken);
+ size_t result = wrapHnd->findNameOfToken(module, metaTOK, szFQName, FQNameCapacity);
+ API_LEAVE(findNameOfToken);
+ return result;
+}
+
+#if COR_JIT_EE_VERSION > 460
+
+bool WrapICorJitInfo::getSystemVAmd64PassStructInRegisterDescriptor(
+ /* IN */ CORINFO_CLASS_HANDLE structHnd,
+ /* OUT */ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structPassInRegDescPtr)
+{
+ API_ENTER(getSystemVAmd64PassStructInRegisterDescriptor);
+ bool result = wrapHnd->getSystemVAmd64PassStructInRegisterDescriptor(structHnd, structPassInRegDescPtr);
+ API_LEAVE(getSystemVAmd64PassStructInRegisterDescriptor);
+ return result;
+}
+
+#endif
+
+DWORD WrapICorJitInfo::getThreadTLSIndex(
+ void **ppIndirection)
+{
+ API_ENTER(getThreadTLSIndex);
+ DWORD temp = wrapHnd->getThreadTLSIndex(ppIndirection);
+ API_LEAVE(getThreadTLSIndex);
+ return temp;
+}
+
+const void * WrapICorJitInfo::getInlinedCallFrameVptr(
+ void **ppIndirection)
+{
+ API_ENTER(getInlinedCallFrameVptr);
+ const void* temp = wrapHnd->getInlinedCallFrameVptr(ppIndirection);
+ API_LEAVE(getInlinedCallFrameVptr);
+ return temp;
+}
+
+LONG * WrapICorJitInfo::getAddrOfCaptureThreadGlobal(
+ void **ppIndirection)
+{
+ API_ENTER(getAddrOfCaptureThreadGlobal);
+ LONG * temp = wrapHnd->getAddrOfCaptureThreadGlobal(ppIndirection);
+ API_LEAVE(getAddrOfCaptureThreadGlobal);
+ return temp;
+}
+
+SIZE_T* WrapICorJitInfo::getAddrModuleDomainID(CORINFO_MODULE_HANDLE module)
+{
+ API_ENTER(getAddrModuleDomainID);
+ SIZE_T* result = wrapHnd->getAddrModuleDomainID(module);
+ API_LEAVE(getAddrModuleDomainID);
+ return result;
+}
+
+void* WrapICorJitInfo::getHelperFtn(
+ CorInfoHelpFunc ftnNum,
+ void **ppIndirection)
+{
+ API_ENTER(getHelperFtn);
+ void *temp = wrapHnd->getHelperFtn(ftnNum, ppIndirection);
+ API_LEAVE(getHelperFtn);
+ return temp;
+}
+
+void WrapICorJitInfo::getFunctionEntryPoint(
+ CORINFO_METHOD_HANDLE ftn, /* IN */
+ CORINFO_CONST_LOOKUP * pResult, /* OUT */
+ CORINFO_ACCESS_FLAGS accessFlags)
+{
+ API_ENTER(getFunctionEntryPoint);
+ wrapHnd->getFunctionEntryPoint(ftn, pResult, accessFlags);
+ API_LEAVE(getFunctionEntryPoint);
+}
+
+void WrapICorJitInfo::getFunctionFixedEntryPoint(
+ CORINFO_METHOD_HANDLE ftn,
+ CORINFO_CONST_LOOKUP * pResult)
+{
+ API_ENTER(getFunctionFixedEntryPoint);
+ wrapHnd->getFunctionFixedEntryPoint(ftn, pResult);
+ API_LEAVE(getFunctionFixedEntryPoint);
+}
+
+void* WrapICorJitInfo::getMethodSync(
+ CORINFO_METHOD_HANDLE ftn,
+ void **ppIndirection)
+{
+ API_ENTER(getMethodSync);
+ void *temp = wrapHnd->getMethodSync(ftn, ppIndirection);
+ API_LEAVE(getMethodSync);
+ return temp;
+}
+
+
+CorInfoHelpFunc WrapICorJitInfo::getLazyStringLiteralHelper(
+ CORINFO_MODULE_HANDLE handle)
+{
+ API_ENTER(getLazyStringLiteralHelper);
+ CorInfoHelpFunc temp = wrapHnd->getLazyStringLiteralHelper(handle);
+ API_LEAVE(getLazyStringLiteralHelper);
+ return temp;
+}
+
+CORINFO_MODULE_HANDLE WrapICorJitInfo::embedModuleHandle(
+ CORINFO_MODULE_HANDLE handle,
+ void **ppIndirection)
+{
+ API_ENTER(embedModuleHandle);
+ CORINFO_MODULE_HANDLE temp = wrapHnd->embedModuleHandle(handle, ppIndirection);
+ API_LEAVE(embedModuleHandle);
+ return temp;
+}
+
+CORINFO_CLASS_HANDLE WrapICorJitInfo::embedClassHandle(
+ CORINFO_CLASS_HANDLE handle,
+ void **ppIndirection)
+{
+ API_ENTER(embedClassHandle);
+ CORINFO_CLASS_HANDLE temp = wrapHnd->embedClassHandle(handle, ppIndirection);
+ API_LEAVE(embedClassHandle);
+ return temp;
+}
+
+CORINFO_METHOD_HANDLE WrapICorJitInfo::embedMethodHandle(
+ CORINFO_METHOD_HANDLE handle,
+ void **ppIndirection)
+{
+ API_ENTER(embedMethodHandle);
+ CORINFO_METHOD_HANDLE temp = wrapHnd->embedMethodHandle(handle, ppIndirection);
+ API_LEAVE(embedMethodHandle);
+ return temp;
+}
+
+CORINFO_FIELD_HANDLE WrapICorJitInfo::embedFieldHandle(
+ CORINFO_FIELD_HANDLE handle,
+ void **ppIndirection)
+{
+ API_ENTER(embedFieldHandle);
+ CORINFO_FIELD_HANDLE temp = wrapHnd->embedFieldHandle(handle, ppIndirection);
+ API_LEAVE(embedFieldHandle);
+ return temp;
+}
+
+void WrapICorJitInfo::embedGenericHandle(
+ CORINFO_RESOLVED_TOKEN * pResolvedToken,
+ BOOL fEmbedParent,
+ CORINFO_GENERICHANDLE_RESULT * pResult)
+{
+ API_ENTER(embedGenericHandle);
+ wrapHnd->embedGenericHandle(pResolvedToken, fEmbedParent, pResult);
+ API_LEAVE(embedGenericHandle);
+}
+
+CORINFO_LOOKUP_KIND WrapICorJitInfo::getLocationOfThisType(
+ CORINFO_METHOD_HANDLE context)
+{
+ API_ENTER(getLocationOfThisType);
+ CORINFO_LOOKUP_KIND temp = wrapHnd->getLocationOfThisType(context);
+ API_LEAVE(getLocationOfThisType);
+ return temp;
+}
+
+void* WrapICorJitInfo::getPInvokeUnmanagedTarget(
+ CORINFO_METHOD_HANDLE method,
+ void **ppIndirection)
+{
+ API_ENTER(getPInvokeUnmanagedTarget);
+ void *result = wrapHnd->getPInvokeUnmanagedTarget(method, ppIndirection);
+ API_LEAVE(getPInvokeUnmanagedTarget);
+ return result;
+}
+
+void* WrapICorJitInfo::getAddressOfPInvokeFixup(
+ CORINFO_METHOD_HANDLE method,
+ void **ppIndirection)
+{
+ API_ENTER(getAddressOfPInvokeFixup);
+ void *temp = wrapHnd->getAddressOfPInvokeFixup(method, ppIndirection);
+ API_LEAVE(getAddressOfPInvokeFixup);
+ return temp;
+}
+
+#if COR_JIT_EE_VERSION > 460
+
+void WrapICorJitInfo::getAddressOfPInvokeTarget(
+ CORINFO_METHOD_HANDLE method,
+ CORINFO_CONST_LOOKUP *pLookup)
+{
+ API_ENTER(getAddressOfPInvokeTarget);
+ wrapHnd->getAddressOfPInvokeTarget(method, pLookup);
+ API_LEAVE(getAddressOfPInvokeTarget);
+}
+
+#endif
+
+LPVOID WrapICorJitInfo::GetCookieForPInvokeCalliSig(
+ CORINFO_SIG_INFO* szMetaSig,
+ void ** ppIndirection)
+{
+ API_ENTER(GetCookieForPInvokeCalliSig);
+ LPVOID temp = wrapHnd->GetCookieForPInvokeCalliSig(szMetaSig, ppIndirection);
+ API_LEAVE(GetCookieForPInvokeCalliSig);
+ return temp;
+}
+
+bool WrapICorJitInfo::canGetCookieForPInvokeCalliSig(
+ CORINFO_SIG_INFO* szMetaSig)
+{
+ API_ENTER(canGetCookieForPInvokeCalliSig);
+ bool temp = wrapHnd->canGetCookieForPInvokeCalliSig(szMetaSig);
+ API_LEAVE(canGetCookieForPInvokeCalliSig);
+ return temp;
+}
+
+CORINFO_JUST_MY_CODE_HANDLE WrapICorJitInfo::getJustMyCodeHandle(
+ CORINFO_METHOD_HANDLE method,
+ CORINFO_JUST_MY_CODE_HANDLE**ppIndirection)
+{
+ API_ENTER(getJustMyCodeHandle);
+ CORINFO_JUST_MY_CODE_HANDLE temp = wrapHnd->getJustMyCodeHandle(method, ppIndirection);
+ API_LEAVE(getJustMyCodeHandle);
+ return temp;
+}
+
+void WrapICorJitInfo::GetProfilingHandle(
+ BOOL *pbHookFunction,
+ void **pProfilerHandle,
+ BOOL *pbIndirectedHandles)
+{
+ API_ENTER(GetProfilingHandle);
+ wrapHnd->GetProfilingHandle(pbHookFunction, pProfilerHandle, pbIndirectedHandles);
+ API_LEAVE(GetProfilingHandle);
+}
+
+void WrapICorJitInfo::getCallInfo(
+ CORINFO_RESOLVED_TOKEN * pResolvedToken,
+ CORINFO_RESOLVED_TOKEN * pConstrainedResolvedToken,
+ CORINFO_METHOD_HANDLE callerHandle,
+ CORINFO_CALLINFO_FLAGS flags,
+ CORINFO_CALL_INFO *pResult)
+{
+ API_ENTER(getCallInfo);
+ wrapHnd->getCallInfo(pResolvedToken, pConstrainedResolvedToken, callerHandle, flags, pResult);
+ API_LEAVE(getCallInfo);
+}
+
+BOOL WrapICorJitInfo::canAccessFamily(CORINFO_METHOD_HANDLE hCaller,
+ CORINFO_CLASS_HANDLE hInstanceType)
+{
+ API_ENTER(canAccessFamily);
+ BOOL temp = wrapHnd->canAccessFamily(hCaller, hInstanceType);
+ API_LEAVE(canAccessFamily);
+ return temp;
+}
+
+BOOL WrapICorJitInfo::isRIDClassDomainID(CORINFO_CLASS_HANDLE cls)
+{
+ API_ENTER(isRIDClassDomainID);
+ BOOL result = wrapHnd->isRIDClassDomainID(cls);
+ API_LEAVE(isRIDClassDomainID);
+ return result;
+}
+
+unsigned WrapICorJitInfo::getClassDomainID(
+ CORINFO_CLASS_HANDLE cls,
+ void **ppIndirection)
+{
+ API_ENTER(getClassDomainID);
+ unsigned temp = wrapHnd->getClassDomainID(cls, ppIndirection);
+ API_LEAVE(getClassDomainID);
+ return temp;
+}
+
+void* WrapICorJitInfo::getFieldAddress(
+ CORINFO_FIELD_HANDLE field,
+ void **ppIndirection)
+{
+ API_ENTER(getFieldAddress);
+ void *temp = wrapHnd->getFieldAddress(field, ppIndirection);
+ API_LEAVE(getFieldAddress);
+ return temp;
+}
+
+CORINFO_VARARGS_HANDLE WrapICorJitInfo::getVarArgsHandle(
+ CORINFO_SIG_INFO *pSig,
+ void **ppIndirection)
+{
+ API_ENTER(getVarArgsHandle);
+ CORINFO_VARARGS_HANDLE temp = wrapHnd->getVarArgsHandle(pSig, ppIndirection);
+ API_LEAVE(getVarArgsHandle);
+ return temp;
+}
+
+bool WrapICorJitInfo::canGetVarArgsHandle(
+ CORINFO_SIG_INFO *pSig)
+{
+ API_ENTER(canGetVarArgsHandle);
+ bool temp = wrapHnd->canGetVarArgsHandle(pSig);
+ API_LEAVE(canGetVarArgsHandle);
+ return temp;
+}
+
+InfoAccessType WrapICorJitInfo::constructStringLiteral(
+ CORINFO_MODULE_HANDLE module,
+ mdToken metaTok,
+ void **ppValue)
+{
+ API_ENTER(constructStringLiteral);
+ InfoAccessType temp = wrapHnd->constructStringLiteral(module, metaTok, ppValue);
+ API_LEAVE(constructStringLiteral);
+ return temp;
+}
+
+InfoAccessType WrapICorJitInfo::emptyStringLiteral(void **ppValue)
+{
+ API_ENTER(emptyStringLiteral);
+ InfoAccessType temp = wrapHnd->emptyStringLiteral(ppValue);
+ API_LEAVE(emptyStringLiteral);
+ return temp;
+}
+
+DWORD WrapICorJitInfo::getFieldThreadLocalStoreID(
+ CORINFO_FIELD_HANDLE field,
+ void **ppIndirection)
+{
+ API_ENTER(getFieldThreadLocalStoreID);
+ DWORD temp = wrapHnd->getFieldThreadLocalStoreID(field, ppIndirection);
+ API_LEAVE(getFieldThreadLocalStoreID);
+ return temp;
+}
+
+void WrapICorJitInfo::setOverride(
+ ICorDynamicInfo *pOverride,
+ CORINFO_METHOD_HANDLE currentMethod)
+{
+ API_ENTER(setOverride);
+ wrapHnd->setOverride(pOverride, currentMethod);
+ API_LEAVE(setOverride);
+}
+
+void WrapICorJitInfo::addActiveDependency(
+ CORINFO_MODULE_HANDLE moduleFrom,
+ CORINFO_MODULE_HANDLE moduleTo)
+{
+ API_ENTER(addActiveDependency);
+ wrapHnd->addActiveDependency(moduleFrom, moduleTo);
+ API_LEAVE(addActiveDependency);
+}
+
+CORINFO_METHOD_HANDLE WrapICorJitInfo::GetDelegateCtor(
+ CORINFO_METHOD_HANDLE methHnd,
+ CORINFO_CLASS_HANDLE clsHnd,
+ CORINFO_METHOD_HANDLE targetMethodHnd,
+ DelegateCtorArgs * pCtorData)
+{
+ API_ENTER(GetDelegateCtor);
+ CORINFO_METHOD_HANDLE temp = wrapHnd->GetDelegateCtor(methHnd, clsHnd, targetMethodHnd, pCtorData);
+ API_LEAVE(GetDelegateCtor);
+ return temp;
+}
+
+void WrapICorJitInfo::MethodCompileComplete(
+ CORINFO_METHOD_HANDLE methHnd)
+{
+ API_ENTER(MethodCompileComplete);
+ wrapHnd->MethodCompileComplete(methHnd);
+ API_LEAVE(MethodCompileComplete);
+}
+
+void* WrapICorJitInfo::getTailCallCopyArgsThunk(
+ CORINFO_SIG_INFO *pSig,
+ CorInfoHelperTailCallSpecialHandling flags)
+{
+ API_ENTER(getTailCallCopyArgsThunk);
+ void *result = wrapHnd->getTailCallCopyArgsThunk(pSig, flags);
+ API_LEAVE(getTailCallCopyArgsThunk);
+ return result;
+}
+
+/*********************************************************************************/
+//
+// ICorJitInfo
+//
+/*********************************************************************************/
+
+#if COR_JIT_EE_VERSION > 460
+
+DWORD WrapICorJitInfo::getJitFlags(CORJIT_FLAGS *jitFlags, DWORD sizeInBytes)
+{
+ API_ENTER(getJitFlags);
+ DWORD result = wrapHnd->getJitFlags(jitFlags, sizeInBytes);
+ API_LEAVE(getJitFlags);
+ return result;
+}
+
+bool WrapICorJitInfo::runWithErrorTrap(void(*function)(void*), void *param)
+{
+ return wrapHnd->runWithErrorTrap(function, param);
+}
+
+#endif
+
+IEEMemoryManager* WrapICorJitInfo::getMemoryManager()
+{
+ API_ENTER(getMemoryManager);
+ IEEMemoryManager * temp = wrapHnd->getMemoryManager();
+ API_LEAVE(getMemoryManager);
+ return temp;
+}
+
+void WrapICorJitInfo::allocMem(
+ ULONG hotCodeSize, /* IN */
+ ULONG coldCodeSize, /* IN */
+ ULONG roDataSize, /* IN */
+ ULONG xcptnsCount, /* IN */
+ CorJitAllocMemFlag flag, /* IN */
+ void ** hotCodeBlock, /* OUT */
+ void ** coldCodeBlock, /* OUT */
+ void ** roDataBlock /* OUT */)
+{
+ API_ENTER(allocMem);
+ wrapHnd->allocMem(hotCodeSize, coldCodeSize, roDataSize, xcptnsCount, flag, hotCodeBlock, coldCodeBlock, roDataBlock);
+ API_LEAVE(allocMem);
+}
+
+void WrapICorJitInfo::reserveUnwindInfo(
+ BOOL isFunclet, /* IN */
+ BOOL isColdCode, /* IN */
+ ULONG unwindSize /* IN */)
+{
+ API_ENTER(reserveUnwindInfo);
+ wrapHnd->reserveUnwindInfo(isFunclet, isColdCode, unwindSize);
+ API_LEAVE(reserveUnwindInfo);
+}
+
+void WrapICorJitInfo::allocUnwindInfo(
+ BYTE * pHotCode, /* IN */
+ BYTE * pColdCode, /* IN */
+ ULONG startOffset, /* IN */
+ ULONG endOffset, /* IN */
+ ULONG unwindSize, /* IN */
+ BYTE * pUnwindBlock, /* IN */
+ CorJitFuncKind funcKind /* IN */)
+{
+ API_ENTER(allocUnwindInfo);
+ wrapHnd->allocUnwindInfo(pHotCode, pColdCode, startOffset, endOffset, unwindSize, pUnwindBlock, funcKind);
+ API_LEAVE(allocUnwindInfo);
+}
+
+void *WrapICorJitInfo::allocGCInfo(size_t size /* IN */)
+{
+ API_ENTER(allocGCInfo);
+ void *temp = wrapHnd->allocGCInfo(size);
+ API_LEAVE(allocGCInfo);
+ return temp;
+}
+
+void WrapICorJitInfo::yieldExecution()
+{
+ API_ENTER(yieldExecution); //Nothing to record
+ wrapHnd->yieldExecution();
+ API_LEAVE(yieldExecution); //Nothing to recor)
+}
+
+void WrapICorJitInfo::setEHcount(unsigned cEH /* IN */)
+{
+ API_ENTER(setEHcount);
+ wrapHnd->setEHcount(cEH);
+ API_LEAVE(setEHcount);
+}
+
+void WrapICorJitInfo::setEHinfo(
+ unsigned EHnumber, /* IN */
+ const CORINFO_EH_CLAUSE *clause /* IN */)
+{
+ API_ENTER(setEHinfo);
+ wrapHnd->setEHinfo(EHnumber, clause);
+ API_LEAVE(setEHinfo);
+}
+
+BOOL WrapICorJitInfo::logMsg(unsigned level, const char* fmt, va_list args)
+{
+ API_ENTER(logMsg);
+ BOOL result = wrapHnd->logMsg(level, fmt, args);
+ API_LEAVE(logMsg);
+ return result;
+}
+
+int WrapICorJitInfo::doAssert(const char* szFile, int iLine, const char* szExpr)
+{
+ API_ENTER(doAssert);
+ int result = wrapHnd->doAssert(szFile, iLine, szExpr);
+ API_LEAVE(doAssert);
+ return result;
+}
+
+void WrapICorJitInfo::reportFatalError(CorJitResult result)
+{
+ API_ENTER(reportFatalError);
+ wrapHnd->reportFatalError(result);
+ API_LEAVE(reportFatalError);
+}
+
+HRESULT WrapICorJitInfo::allocBBProfileBuffer(
+ ULONG count,
+ ProfileBuffer **profileBuffer)
+{
+ API_ENTER(allocBBProfileBuffer);
+ HRESULT result = wrapHnd->allocBBProfileBuffer(count, profileBuffer);
+ API_LEAVE(allocBBProfileBuffer);
+ return result;
+}
+
+HRESULT WrapICorJitInfo::getBBProfileData(
+ CORINFO_METHOD_HANDLE ftnHnd,
+ ULONG *count,
+ ProfileBuffer **profileBuffer,
+ ULONG *numRuns)
+{
+ API_ENTER(getBBProfileData);
+ HRESULT temp = wrapHnd->getBBProfileData(ftnHnd, count, profileBuffer, numRuns);
+ API_LEAVE(getBBProfileData);
+ return temp;
+}
+
+void WrapICorJitInfo::recordCallSite(
+ ULONG instrOffset, /* IN */
+ CORINFO_SIG_INFO * callSig, /* IN */
+ CORINFO_METHOD_HANDLE methodHandle /* IN */)
+{
+ API_ENTER(recordCallSite);
+ wrapHnd->recordCallSite(instrOffset, callSig, methodHandle);
+ API_LEAVE(recordCallSite);
+}
+
+void WrapICorJitInfo::recordRelocation(
+ void *location, /* IN */
+ void *target, /* IN */
+ WORD fRelocType, /* IN */
+ WORD slotNum, /* IN */
+ INT32 addlDelta /* IN */)
+{
+ API_ENTER(recordRelocation);
+ wrapHnd->recordRelocation(location, target, fRelocType, slotNum, addlDelta);
+ API_LEAVE(recordRelocation);
+}
+
+WORD WrapICorJitInfo::getRelocTypeHint(void *target)
+{
+ API_ENTER(getRelocTypeHint);
+ WORD result = wrapHnd->getRelocTypeHint(target);
+ API_LEAVE(getRelocTypeHint);
+ return result;
+}
+
+void WrapICorJitInfo::getModuleNativeEntryPointRange(
+ void **pStart, /* OUT */
+ void **pEnd /* OUT */)
+{
+ API_ENTER(getModuleNativeEntryPointRange);
+ wrapHnd->getModuleNativeEntryPointRange(pStart, pEnd);
+ API_LEAVE(getModuleNativeEntryPointRange);
+}
+
+DWORD WrapICorJitInfo::getExpectedTargetArchitecture()
+{
+ API_ENTER(getExpectedTargetArchitecture);
+ DWORD result = wrapHnd->getExpectedTargetArchitecture();
+ API_LEAVE(getExpectedTargetArchitecture);
+ return result;
+}
+
+/**********************************************************************************/
+// clang-format on
+/**********************************************************************************/
diff --git a/src/jit/assertionprop.cpp b/src/jit/assertionprop.cpp
index fe35c3b780..cb0832fe47 100644
--- a/src/jit/assertionprop.cpp
+++ b/src/jit/assertionprop.cpp
@@ -1100,11 +1100,6 @@ Compiler::AssertionIndex Compiler::optCreateAssertion(GenTreePtr op1,
CNS_COMMON:
{
- // TODO-1stClassStructs: handle constant propagation to struct types.
- if (varTypeIsStruct(lclVar))
- {
- goto DONE_ASSERTION;
- }
//
// Must either be an OAK_EQUAL or an OAK_NOT_EQUAL assertion
//
@@ -2034,12 +2029,7 @@ void Compiler::optAssertionGen(GenTreePtr tree)
{
case GT_ASG:
// VN takes care of non local assertions for assignments and data flow.
- // TODO-1stClassStructs: Enable assertion prop for struct types.
- if (varTypeIsStruct(tree))
- {
- // Do nothing.
- }
- else if (optLocalAssertionProp)
+ if (optLocalAssertionProp)
{
assertionIndex = optCreateAssertion(tree->gtOp.gtOp1, tree->gtOp.gtOp2, OAK_EQUAL);
}
@@ -2052,26 +2042,15 @@ void Compiler::optAssertionGen(GenTreePtr tree)
case GT_OBJ:
case GT_BLK:
case GT_DYN_BLK:
- // TODO-1stClassStructs: These should always be considered to create a non-null
- // assertion, but previously, when these indirections were implicit due to a block
- // copy or init, they were not being considered to do so.
- break;
case GT_IND:
- // TODO-1stClassStructs: All indirections should be considered to create a non-null
- // assertion, but previously, when these indirections were implicit due to a block
- // copy or init, they were not being considered to do so.
- if (tree->gtType == TYP_STRUCT)
- {
- GenTree* parent = tree->gtGetParent(nullptr);
- if ((parent != nullptr) && (parent->gtOper == GT_ASG))
- {
- break;
- }
- }
case GT_NULLCHECK:
+ // All indirections create non-null assertions
+ assertionIndex = optCreateAssertion(tree->AsIndir()->Addr(), nullptr, OAK_NOT_EQUAL);
+ break;
+
case GT_ARR_LENGTH:
- // An array length can create a non-null assertion
- assertionIndex = optCreateAssertion(tree->gtOp.gtOp1, nullptr, OAK_NOT_EQUAL);
+ // An array length is an indirection (but doesn't derive from GenTreeIndir).
+ assertionIndex = optCreateAssertion(tree->AsArrLen()->ArrRef(), nullptr, OAK_NOT_EQUAL);
break;
case GT_ARR_BOUNDS_CHECK:
@@ -2629,9 +2608,29 @@ GenTreePtr Compiler::optConstantAssertionProp(AssertionDsc* curAssertion,
else
{
bool isArrIndex = ((tree->gtFlags & GTF_VAR_ARR_INDEX) != 0);
- newTree->ChangeOperConst(GT_CNS_INT);
- newTree->gtIntCon.gtIconVal = curAssertion->op2.u1.iconVal;
- newTree->ClearIconHandleMask();
+ // If we have done constant propagation of a struct type, it is only valid for zero-init,
+ // and we have to ensure that we have the right zero for the type.
+ if (varTypeIsStruct(tree))
+ {
+ assert(curAssertion->op2.u1.iconVal == 0);
+ }
+#ifdef FEATURE_SIMD
+ if (varTypeIsSIMD(tree))
+ {
+ var_types simdType = tree->TypeGet();
+ tree->ChangeOperConst(GT_CNS_DBL);
+ GenTree* initVal = tree;
+ initVal->gtType = TYP_FLOAT;
+ newTree =
+ gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, TYP_FLOAT, genTypeSize(simdType));
+ }
+ else
+#endif // FEATURE_SIMD
+ {
+ newTree->ChangeOperConst(GT_CNS_INT);
+ newTree->gtIntCon.gtIconVal = curAssertion->op2.u1.iconVal;
+ newTree->ClearIconHandleMask();
+ }
// If we're doing an array index address, assume any constant propagated contributes to the index.
if (isArrIndex)
{
@@ -3421,32 +3420,13 @@ GenTreePtr Compiler::optAssertionProp_Ind(ASSERT_VALARG_TP assertions, const Gen
{
assert(tree->OperIsIndir());
- // TODO-1stClassStructs: All indirections should be handled here, but
- // previously, when these indirections were GT_OBJ, or implicit due to a block
- // copy or init, they were not being handled.
- if (tree->TypeGet() == TYP_STRUCT)
- {
- if (tree->OperIsBlk())
- {
- return nullptr;
- }
- else
- {
- GenTree* parent = tree->gtGetParent(nullptr);
- if ((parent != nullptr) && parent->OperIsBlkOp())
- {
- return nullptr;
- }
- }
- }
-
if (!(tree->gtFlags & GTF_EXCEPT))
{
return nullptr;
}
// Check for add of a constant.
- GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op1 = tree->AsIndir()->Addr();
if ((op1->gtOper == GT_ADD) && (op1->gtOp.gtOp2->gtOper == GT_CNS_INT))
{
op1 = op1->gtOp.gtOp1;
@@ -3700,6 +3680,21 @@ GenTreePtr Compiler::optAssertionProp_BndsChk(ASSERT_VALARG_TP assertions, const
assert(tree->gtOper == GT_ARR_BOUNDS_CHECK);
+#ifdef FEATURE_ENABLE_NO_RANGE_CHECKS
+ if (JitConfig.JitNoRangeChks())
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nFlagging check redundant due to JitNoRangeChks in BB%02u:\n", compCurBB->bbNum);
+ gtDispTree(tree, nullptr, nullptr, true);
+ }
+#endif // DEBUG
+ tree->gtFlags |= GTF_ARR_BOUND_INBND;
+ return nullptr;
+ }
+#endif // FEATURE_ENABLE_NO_RANGE_CHECKS
+
BitVecOps::Iter iter(apTraits, assertions);
unsigned index = 0;
while (iter.NextElem(apTraits, &index))
@@ -4688,9 +4683,8 @@ GenTreePtr Compiler::optVNConstantPropOnJTrue(BasicBlock* block, GenTreePtr stmt
newStmt = fgInsertStmtNearEnd(block, sideEffList);
sideEffList = nullptr;
}
- fgMorphBlockStmt(block, newStmt DEBUGARG(__FUNCTION__));
- gtSetStmtInfo(newStmt);
- fgSetStmtSeq(newStmt);
+
+ fgMorphBlockStmt(block, newStmt->AsStmt() DEBUGARG(__FUNCTION__));
}
// Transform the relop's operands to be both zeroes.
@@ -4748,7 +4742,6 @@ Compiler::fgWalkResult Compiler::optVNConstantPropCurStmt(BasicBlock* block, Gen
case GT_MOD:
case GT_UDIV:
case GT_UMOD:
- case GT_MULHI:
case GT_EQ:
case GT_NE:
case GT_LT:
@@ -4767,6 +4760,10 @@ Compiler::fgWalkResult Compiler::optVNConstantPropCurStmt(BasicBlock* block, Gen
case GT_INTRINSIC:
break;
+ case GT_MULHI:
+ assert(false && "Unexpected GT_MULHI node encountered before lowering");
+ break;
+
case GT_JTRUE:
break;
@@ -4911,9 +4908,7 @@ GenTreePtr Compiler::optVNAssertionPropCurStmt(BasicBlock* block, GenTreePtr stm
if (optAssertionPropagatedCurrentStmt)
{
- fgMorphBlockStmt(block, stmt DEBUGARG("optVNAssertionPropCurStmt"));
- gtSetStmtInfo(stmt);
- fgSetStmtSeq(stmt);
+ fgMorphBlockStmt(block, stmt->AsStmt() DEBUGARG("optVNAssertionPropCurStmt"));
}
// Check if propagation removed statements starting from current stmt.
@@ -5110,13 +5105,7 @@ void Compiler::optAssertionPropMain()
}
#endif
// Re-morph the statement.
- fgMorphBlockStmt(block, stmt DEBUGARG("optAssertionPropMain"));
-
- // Recalculate the gtCostSz, etc...
- gtSetStmtInfo(stmt);
-
- // Re-thread the nodes
- fgSetStmtSeq(stmt);
+ fgMorphBlockStmt(block, stmt->AsStmt() DEBUGARG("optAssertionPropMain"));
}
// Check if propagation removed statements starting from current stmt.
diff --git a/src/jit/bitsetasuint64.h b/src/jit/bitsetasuint64.h
index 150f7e9d61..243e9e33b4 100644
--- a/src/jit/bitsetasuint64.h
+++ b/src/jit/bitsetasuint64.h
@@ -167,7 +167,7 @@ public:
{
IAllocator* alloc = BitSetTraits::GetDebugOnlyAllocator(env);
const int CharsForUINT64 = sizeof(UINT64) * 2;
- char* res = NULL;
+ char* res = nullptr;
const int AllocSize = CharsForUINT64 + 4;
res = (char*)alloc->Alloc(AllocSize);
UINT64 bits = bs;
diff --git a/src/jit/block.cpp b/src/jit/block.cpp
index 2d37754ec5..47f1052cc8 100644
--- a/src/jit/block.cpp
+++ b/src/jit/block.cpp
@@ -554,7 +554,9 @@ void BasicBlock::dspBlockHeader(Compiler* compiler,
}
if (showFlags)
{
- printf(" flags=0x%08x: ", bbFlags);
+ const unsigned lowFlags = (unsigned)bbFlags;
+ const unsigned highFlags = (unsigned)(bbFlags >> 32);
+ printf(" flags=0x%08x.%08x: ", highFlags, lowFlags);
dspFlags();
}
printf("\n");
@@ -568,7 +570,25 @@ void* BasicBlock::HeapPhiArg::operator new(size_t sz, Compiler* comp)
return comp->compGetMem(sz, CMK_HeapPhiArg);
}
-void BasicBlock::CloneBlockState(Compiler* compiler, BasicBlock* to, const BasicBlock* from)
+//------------------------------------------------------------------------
+// CloneBlockState: Try to populate `to` block with a copy of `from` block's statements, replacing
+// uses of local `varNum` with IntCns `varVal`.
+//
+// Arguments:
+// compiler - Jit compiler instance
+// to - New/empty block to copy statements into
+// from - Block to copy statements from
+// varNum - lclVar uses with lclNum `varNum` will be replaced; can be ~0 to indicate no replacement.
+// varVal - If replacing uses of `varNum`, replace them with int constants with value `varVal`.
+//
+// Return Value:
+// Cloning may fail because this routine uses `gtCloneExpr` for cloning and it can't handle all
+// IR nodes. If cloning of any statement fails, `false` will be returned and block `to` may be
+// partially populated. If cloning of all statements succeeds, `true` will be returned and
+// block `to` will be fully populated.
+
+bool BasicBlock::CloneBlockState(
+ Compiler* compiler, BasicBlock* to, const BasicBlock* from, unsigned varNum, int varVal)
{
assert(to->bbTreeList == nullptr);
@@ -595,9 +615,17 @@ void BasicBlock::CloneBlockState(Compiler* compiler, BasicBlock* to, const Basic
for (GenTreePtr fromStmt = from->bbTreeList; fromStmt != nullptr; fromStmt = fromStmt->gtNext)
{
- compiler->fgInsertStmtAtEnd(to,
- compiler->fgNewStmtFromTree(compiler->gtCloneExpr(fromStmt->gtStmt.gtStmtExpr)));
+ auto newExpr = compiler->gtCloneExpr(fromStmt->gtStmt.gtStmtExpr, 0, varNum, varVal);
+ if (!newExpr)
+ {
+ // gtCloneExpr doesn't handle all opcodes, so may fail to clone a statement.
+ // When that happens, it returns nullptr; abandon the rest of this block and
+ // return `false` to the caller to indicate that cloning was unsuccessful.
+ return false;
+ }
+ compiler->fgInsertStmtAtEnd(to, compiler->fgNewStmtFromTree(newExpr));
}
+ return true;
}
// LIR helpers
@@ -667,7 +695,6 @@ GenTreeStmt* BasicBlock::lastStmt()
return result->AsStmt();
}
-
//------------------------------------------------------------------------
// BasicBlock::firstNode: Returns the first node in the block.
//
diff --git a/src/jit/block.h b/src/jit/block.h
index ecfbb620a1..99c0efc1a7 100644
--- a/src/jit/block.h
+++ b/src/jit/block.h
@@ -30,17 +30,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "simplerhash.h"
/*****************************************************************************/
-
+typedef BitVec EXPSET_TP;
#if LARGE_EXPSET
-typedef unsigned __int64 EXPSET_TP;
#define EXPSET_SZ 64
#else
-typedef unsigned int EXPSET_TP;
#define EXPSET_SZ 32
#endif
-#define EXPSET_ALL ((EXPSET_TP)0 - 1)
-
typedef BitVec ASSERT_TP;
typedef BitVec_ValArg_T ASSERT_VALARG_TP;
typedef BitVec_ValRet_T ASSERT_VALRET_TP;
@@ -291,14 +287,14 @@ struct BasicBlock : private LIR::Range
}
}
+ unsigned __int64 bbFlags; // see BBF_xxxx below
+
unsigned bbNum; // the block's number
unsigned bbPostOrderNum; // the block's post order number in the graph.
unsigned bbRefs; // number of blocks that can reach here, either by fall-through or a branch. If this falls to zero,
// the block is unreachable.
- unsigned bbFlags; // see BBF_xxxx below
-
#define BBF_VISITED 0x00000001 // BB visited during optimizations
#define BBF_MARKED 0x00000002 // BB marked during optimizations
#define BBF_CHANGED 0x00000004 // input/output of this block has changed
@@ -357,6 +353,10 @@ struct BasicBlock : private LIR::Range
// BBJ_CALLFINALLY block, as well as, on x86, the final step block out of a
// finally.
+// Flags that relate blocks to loop structure.
+
+#define BBF_LOOP_FLAGS (BBF_LOOP_PREHEADER | BBF_LOOP_HEAD | BBF_LOOP_CALL0 | BBF_LOOP_CALL1)
+
bool isRunRarely()
{
return ((bbFlags & BBF_RUN_RARELY) != 0);
@@ -860,9 +860,7 @@ struct BasicBlock : private LIR::Range
unsigned bbHeapSsaNumIn; // The SSA # of "Heap" on entry to the block.
unsigned bbHeapSsaNumOut; // The SSA # of "Heap" on exit from the block.
-#ifdef DEBUGGING_SUPPORT
VARSET_TP bbScope; // variables in scope over the block
-#endif
void InitVarSets(class Compiler* comp);
@@ -1094,9 +1092,11 @@ public:
return AllSuccs(comp, this);
}
- // Clone block state and statements from 'from' block to 'to' block.
- // Assumes that "to" is an empty block.
- static void CloneBlockState(Compiler* compiler, BasicBlock* to, const BasicBlock* from);
+ // Try to clone block state and statements from `from` block to `to` block (which must be new/empty),
+ // optionally replacing uses of local `varNum` with IntCns `varVal`. Return true if all statements
+ // in the block are cloned successfully, false (with partially-populated `to` block) if one fails.
+ static bool CloneBlockState(
+ Compiler* compiler, BasicBlock* to, const BasicBlock* from, unsigned varNum = (unsigned)-1, int varVal = 0);
void MakeLIR(GenTree* firstNode, GenTree* lastNode);
bool IsLIR();
diff --git a/src/jit/codegen.h b/src/jit/codegen.h
index 0c4a311186..c6e38ab6af 100755
--- a/src/jit/codegen.h
+++ b/src/jit/codegen.h
@@ -48,7 +48,6 @@ public:
unsigned* cnsPtr,
bool nogen = false);
-
private:
#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
// Bit masks used in negating a float or double number.
@@ -123,7 +122,7 @@ private:
void genRangeCheck(GenTree* node);
- void genLockedInstructions(GenTree* node);
+ void genLockedInstructions(GenTreeOp* node);
//-------------------------------------------------------------------------
// Register-related methods
@@ -251,6 +250,8 @@ protected:
void genAdjustSP(ssize_t delta);
+ void genAdjustStackLevel(BasicBlock* block);
+
void genExitCode(BasicBlock* block);
//-------------------------------------------------------------------------
@@ -488,15 +489,26 @@ protected:
void genAmd64EmitterUnitTests();
#endif
-//-------------------------------------------------------------------------
-//
-// End prolog/epilog generation
-//
-//-------------------------------------------------------------------------
+ //-------------------------------------------------------------------------
+ //
+ // End prolog/epilog generation
+ //
+ //-------------------------------------------------------------------------
-/*****************************************************************************/
-#ifdef DEBUGGING_SUPPORT
-/*****************************************************************************/
+ void genSinglePush();
+ void genSinglePop();
+ regMaskTP genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs);
+ void genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs);
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Debugging Support XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
#ifdef DEBUG
void genIPmappingDisp(unsigned mappingNum, Compiler::IPmappingDsc* ipMapping);
@@ -730,10 +742,6 @@ protected:
unsigned genTrnslLocalVarCount;
#endif
-/*****************************************************************************/
-#endif // DEBUGGING_SUPPORT
-/*****************************************************************************/
-
#ifndef LEGACY_BACKEND
#include "codegenlinear.h"
#else // LEGACY_BACKEND
diff --git a/src/jit/codegenarm.cpp b/src/jit/codegenarm.cpp
index 4ce82307f9..73e51f2ef7 100644
--- a/src/jit/codegenarm.cpp
+++ b/src/jit/codegenarm.cpp
@@ -27,102 +27,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "gcinfoencoder.h"
#endif
-// Get the register assigned to the given node
-
-regNumber CodeGenInterface::genGetAssignedReg(GenTreePtr tree)
-{
- return tree->gtRegNum;
-}
-
-//------------------------------------------------------------------------
-// genSpillVar: Spill a local variable
-//
-// Arguments:
-// tree - the lclVar node for the variable being spilled
-//
-// Return Value:
-// None.
-//
-// Assumptions:
-// The lclVar must be a register candidate (lvRegCandidate)
-
-void CodeGen::genSpillVar(GenTreePtr tree)
-{
- regMaskTP regMask;
- unsigned varNum = tree->gtLclVarCommon.gtLclNum;
- LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
-
- // We don't actually need to spill if it is already living in memory
- bool needsSpill = ((tree->gtFlags & GTF_VAR_DEF) == 0 && varDsc->lvIsInReg());
- if (needsSpill)
- {
- bool restoreRegVar = false;
- if (tree->gtOper == GT_REG_VAR)
- {
- tree->SetOper(GT_LCL_VAR);
- restoreRegVar = true;
- }
-
- // mask off the flag to generate the right spill code, then bring it back
- tree->gtFlags &= ~GTF_REG_VAL;
-
- instruction storeIns = ins_Store(tree->TypeGet());
-
- if (varTypeIsMultiReg(tree))
- {
- assert(varDsc->lvRegNum == genRegPairLo(tree->gtRegPair));
- assert(varDsc->lvOtherReg == genRegPairHi(tree->gtRegPair));
- regNumber regLo = genRegPairLo(tree->gtRegPair);
- regNumber regHi = genRegPairHi(tree->gtRegPair);
- inst_TT_RV(storeIns, tree, regLo);
- inst_TT_RV(storeIns, tree, regHi, 4);
- }
- else
- {
- assert(varDsc->lvRegNum == tree->gtRegNum);
- inst_TT_RV(storeIns, tree, tree->gtRegNum);
- }
- tree->gtFlags |= GTF_REG_VAL;
-
- if (restoreRegVar)
- {
- tree->SetOper(GT_REG_VAR);
- }
-
- genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree));
- gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask());
-
- if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex))
- {
-#ifdef DEBUG
- if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
- {
- JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
- }
- else
- {
- JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
- }
-#endif
- VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
- }
- }
-
- tree->gtFlags &= ~GTF_SPILL;
- varDsc->lvRegNum = REG_STK;
- if (varTypeIsMultiReg(tree))
- {
- varDsc->lvOtherReg = REG_STK;
- }
-}
-
-// inline
-void CodeGenInterface::genUpdateVarReg(LclVarDsc* varDsc, GenTreePtr tree)
-{
- assert(tree->OperIsScalarLocal() || (tree->gtOper == GT_COPY));
- varDsc->lvRegNum = tree->gtRegNum;
-}
-
/*****************************************************************************
*
* Generate code that will set the given register to the integer constant.
@@ -157,735 +61,22 @@ void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFla
*/
void CodeGen::genEmitGSCookieCheck(bool pushReg)
{
- NYI("ARM genEmitGSCookieCheck is not yet implemented for protojit");
+ NYI("ARM genEmitGSCookieCheck");
}
-/*****************************************************************************
- *
- * Generate code for all the basic blocks in the function.
- */
-
-void CodeGen::genCodeForBBlist()
+BasicBlock* CodeGen::genCallFinally(BasicBlock* block, BasicBlock* lblk)
{
- unsigned varNum;
- LclVarDsc* varDsc;
-
- unsigned savedStkLvl;
-
-#ifdef DEBUG
- genInterruptibleUsed = true;
-
- // You have to be careful if you create basic blocks from now on
- compiler->fgSafeBasicBlockCreation = false;
-
- // This stress mode is not comptible with fully interruptible GC
- if (genInterruptible && compiler->opts.compStackCheckOnCall)
- {
- compiler->opts.compStackCheckOnCall = false;
- }
-
- // This stress mode is not comptible with fully interruptible GC
- if (genInterruptible && compiler->opts.compStackCheckOnRet)
- {
- compiler->opts.compStackCheckOnRet = false;
- }
-#endif
-
- // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
- genPrepForEHCodegen();
-
- assert(!compiler->fgFirstBBScratch ||
- compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
-
- /* Initialize the spill tracking logic */
-
- regSet.rsSpillBeg();
-
-#ifdef DEBUGGING_SUPPORT
- /* Initialize the line# tracking logic */
-
- if (compiler->opts.compScopeInfo)
- {
- siInit();
- }
-#endif
-
- if (compiler->opts.compDbgEnC)
- {
- noway_assert(isFramePointerUsed());
- regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
- }
-
- /* If we have any pinvoke calls, we might potentially trash everything */
- if (compiler->info.compCallUnmanaged)
- {
- noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
- regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
- }
-
- genPendingCallLabel = nullptr;
-
- /* Initialize the pointer tracking code */
-
- gcInfo.gcRegPtrSetInit();
- gcInfo.gcVarPtrSetInit();
-
- /* If any arguments live in registers, mark those regs as such */
-
- for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
- {
- /* Is this variable a parameter assigned to a register? */
-
- if (!varDsc->lvIsParam || !varDsc->lvRegister)
- continue;
-
- /* Is the argument live on entry to the method? */
-
- if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
- continue;
-
- /* Is this a floating-point argument? */
-
- if (varDsc->IsFloatRegType())
- continue;
-
- noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
-
- /* Mark the register as holding the variable */
-
- regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
- }
-
- unsigned finallyNesting = 0;
-
- // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
- // allocation at the start of each basic block.
- VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
-
- /*-------------------------------------------------------------------------
- *
- * Walk the basic blocks and generate code for each one
- *
- */
-
- BasicBlock* block;
- BasicBlock* lblk; /* previous block */
-
- for (lblk = NULL, block = compiler->fgFirstBB; block != NULL; lblk = block, block = block->bbNext)
- {
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\n=============== Generating ");
- block->dspBlockHeader(compiler, true, true);
- compiler->fgDispBBLiveness(block);
- }
-#endif // DEBUG
-
- /* Figure out which registers hold variables on entry to this block */
-
- regSet.ClearMaskVars();
- gcInfo.gcRegGCrefSetCur = RBM_NONE;
- gcInfo.gcRegByrefSetCur = RBM_NONE;
-
- compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(block);
-
- genUpdateLife(block->bbLiveIn);
-
- // Even if liveness didn't change, we need to update the registers containing GC references.
- // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't
- // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change
- // here. That would require handling the changes in recordVarLocationsAtStartOfBB().
-
- regMaskTP newLiveRegSet = RBM_NONE;
- regMaskTP newRegGCrefSet = RBM_NONE;
- regMaskTP newRegByrefSet = RBM_NONE;
- VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex);
- while (iter.NextElem(compiler, &varIndex))
- {
- unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
- LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
-
- if (varDsc->lvIsInReg())
- {
- newLiveRegSet |= varDsc->lvRegMask();
- if (varDsc->lvType == TYP_REF)
- {
- newRegGCrefSet |= varDsc->lvRegMask();
- }
- else if (varDsc->lvType == TYP_BYREF)
- {
- newRegByrefSet |= varDsc->lvRegMask();
- }
- }
- else if (varDsc->lvType == TYP_REF || varDsc->lvType == TYP_BYREF)
- {
- VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
- }
- }
-
- regSet.rsMaskVars = newLiveRegSet;
- gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUGARG(true));
- gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUGARG(true));
-
- /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
- represent the exception object (TYP_REF).
- We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
- to the block, it will be the first thing evaluated
- (thanks to GTF_ORDER_SIDEEFF).
- */
-
- if (handlerGetsXcptnObj(block->bbCatchTyp))
- {
- for (GenTree* node : LIR::AsRange(block))
- {
- if (node->OperGet() == GT_CATCH_ARG)
- {
- gcInfo.gcMarkRegSetGCref(RBM_EXCEPTION_OBJECT);
- break;
- }
- }
- }
-
- /* Start a new code output block */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if FEATURE_EH_FUNCLETS
-#if defined(_TARGET_ARM_)
- // If this block is the target of a finally return, we need to add a preceding NOP, in the same EH region,
- // so the unwinder doesn't get confused by our "movw lr, xxx; movt lr, xxx; b Lyyy" calling convention that
- // calls the funclet during non-exceptional control flow.
- if (block->bbFlags & BBF_FINALLY_TARGET)
- {
- assert(block->bbFlags & BBF_JMP_TARGET);
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\nEmitting finally target NOP predecessor for BB%02u\n", block->bbNum);
- }
-#endif
- // Create a label that we'll use for computing the start of an EH region, if this block is
- // at the beginning of such a region. If we used the existing bbEmitCookie as is for
- // determining the EH regions, then this NOP would end up outside of the region, if this
- // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
- // would be executed, which we would prefer not to do.
-
- block->bbUnwindNopEmitCookie =
- getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
-
- instGen(INS_nop);
- }
-#endif // defined(_TARGET_ARM_)
-
- genUpdateCurrentFunclet(block);
-#endif // FEATURE_EH_FUNCLETS
-
-#ifdef _TARGET_XARCH_
- if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
- {
- getEmitter()->emitLoopAlign();
- }
-#endif
-
-#ifdef DEBUG
- if (compiler->opts.dspCode)
- printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
-#endif
-
- block->bbEmitCookie = NULL;
-
- if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL))
- {
- /* Mark a label and update the current set of live GC refs */
-
- block->bbEmitCookie =
- getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
- /*isFinally*/ block->bbFlags & BBF_FINALLY_TARGET);
- }
-
- if (block == compiler->fgFirstColdBlock)
- {
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\nThis is the start of the cold region of the method\n");
- }
-#endif
- // We should never have a block that falls through into the Cold section
- noway_assert(!lblk->bbFallsThrough());
-
- // We require the block that starts the Cold section to have a label
- noway_assert(block->bbEmitCookie);
- getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
- }
-
- /* Both stacks are always empty on entry to a basic block */
-
- genStackLevel = 0;
-
-#if !FEATURE_FIXED_OUT_ARGS
- /* Check for inserted throw blocks and adjust genStackLevel */
-
- if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
- {
- noway_assert(block->bbFlags & BBF_JMP_TARGET);
-
- genStackLevel = compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int);
-
- if (genStackLevel)
- {
- NYI("Need emitMarkStackLvl()");
- }
- }
-#endif // !FEATURE_FIXED_OUT_ARGS
-
- savedStkLvl = genStackLevel;
-
- /* Tell everyone which basic block we're working on */
-
- compiler->compCurBB = block;
-
-#ifdef DEBUGGING_SUPPORT
- siBeginBlock(block);
-
- // BBF_INTERNAL blocks don't correspond to any single IL instruction.
- if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) && block != compiler->fgFirstBB)
- genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true);
-
- bool firstMapping = true;
-#endif // DEBUGGING_SUPPORT
-
- /*---------------------------------------------------------------------
- *
- * Generate code for each statement-tree in the block
- *
- */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if FEATURE_EH_FUNCLETS
- if (block->bbFlags & BBF_FUNCLET_BEG)
- {
- genReserveFuncletProlog(block);
- }
-#endif // FEATURE_EH_FUNCLETS
-
- // Clear compCurStmt and compCurLifeTree.
- compiler->compCurStmt = nullptr;
- compiler->compCurLifeTree = nullptr;
-
-#ifdef DEBUG
- bool pastProfileUpdate = false;
-#endif
-
-// Traverse the block in linear order, generating code for each node as we
-// as we encounter it.
-#ifdef DEBUGGING_SUPPORT
- IL_OFFSETX currentILOffset = BAD_IL_OFFSET;
-#endif
- for (GenTree* node : LIR::AsRange(block))
- {
-#ifdef DEBUGGING_SUPPORT
- // Do we have a new IL offset?
- if (node->OperGet() == GT_IL_OFFSET)
- {
- genEnsureCodeEmitted(currentILOffset);
-
- currentILOffset = node->gtStmt.gtStmtILoffsx;
-
- genIPmappingAdd(currentILOffset, firstMapping);
- firstMapping = false;
- }
-#endif // DEBUGGING_SUPPORT
-
-#ifdef DEBUG
- if (node->OperGet() == GT_IL_OFFSET)
- {
- noway_assert(node->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize ||
- node->gtStmt.gtStmtLastILoffs == BAD_IL_OFFSET);
-
- if (compiler->opts.dspCode && compiler->opts.dspInstrs &&
- node->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
- {
- while (genCurDispOffset <= node->gtStmt.gtStmtLastILoffs)
- {
- genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> ");
- }
- }
- }
-#endif // DEBUG
-
- genCodeForTreeNode(node);
- if (node->gtHasReg() && node->gtLsraInfo.isLocalDefUse)
- {
- genConsumeReg(node);
- }
-
-#ifdef DEBUG
- regSet.rsSpillChk();
-
- assert((node->gtFlags & GTF_SPILL) == 0);
-
- /* Make sure we didn't bungle pointer register tracking */
-
- regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur);
- regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
-
- // If return is a GC-type, clear it. Note that if a common
- // epilog is generated (genReturnBB) it has a void return
- // even though we might return a ref. We can't use the compRetType
- // as the determiner because something we are tracking as a byref
- // might be used as a return value of a int function (which is legal)
- if (node->gtOper == GT_RETURN && (varTypeIsGC(compiler->info.compRetType) ||
- (node->gtOp.gtOp1 != 0 && varTypeIsGC(node->gtOp.gtOp1->TypeGet()))))
- {
- nonVarPtrRegs &= ~RBM_INTRET;
- }
-
- // When profiling, the first few nodes in a catch block will be an update of
- // the profile count (does not interfere with the exception object).
- if (((compiler->opts.eeFlags & CORJIT_FLG_BBINSTR) != 0) && handlerGetsXcptnObj(block->bbCatchTyp))
- {
- pastProfileUpdate = pastProfileUpdate || node->OperGet() == GT_CATCH_ARG;
- if (!pastProfileUpdate)
- {
- nonVarPtrRegs &= ~RBM_EXCEPTION_OBJECT;
- }
- }
-
- if (nonVarPtrRegs)
- {
- printf("Regset after node=");
- Compiler::printTreeID(node);
- printf(" BB%02u gcr=", block->bbNum);
- printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
- compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
- printf(", byr=");
- printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
- compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
- printf(", regVars=");
- printRegMaskInt(regSet.rsMaskVars);
- compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
- printf("\n");
- }
-
- noway_assert(nonVarPtrRegs == 0);
-#endif // DEBUG
- }
-
-#ifdef DEBUGGING_SUPPORT
- // It is possible to reach the end of the block without generating code for the current IL offset.
- // For example, if the following IR ends the current block, no code will have been generated for
- // offset 21:
- //
- // ( 0, 0) [000040] ------------ il_offset void IL offset: 21
- //
- // N001 ( 0, 0) [000039] ------------ nop void
- //
- // This can lead to problems when debugging the generated code. To prevent these issues, make sure
- // we've generated code for the last IL offset we saw in the block.
- genEnsureCodeEmitted(currentILOffset);
-
- if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
- {
- siEndBlock(block);
-
- /* Is this the last block, and are there any open scopes left ? */
-
- bool isLastBlockProcessed = (block->bbNext == NULL);
- if (block->isBBCallAlwaysPair())
- {
- isLastBlockProcessed = (block->bbNext->bbNext == NULL);
- }
-
- if (isLastBlockProcessed && siOpenScopeList.scNext)
- {
- /* This assert no longer holds, because we may insert a throw
- block to demarcate the end of a try or finally region when they
- are at the end of the method. It would be nice if we could fix
- our code so that this throw block will no longer be necessary. */
-
- // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
-
- siCloseAllOpenScopes();
- }
- }
-
-#endif // DEBUGGING_SUPPORT
-
- genStackLevel -= savedStkLvl;
-
-#ifdef DEBUG
- // compCurLife should be equal to the liveOut set, except that we don't keep
- // it up to date for vars that are not register candidates
- // (it would be nice to have a xor set function)
-
- VARSET_TP VARSET_INIT_NOCOPY(extraLiveVars, VarSetOps::Diff(compiler, block->bbLiveOut, compiler->compCurLife));
- VarSetOps::UnionD(compiler, extraLiveVars, VarSetOps::Diff(compiler, compiler->compCurLife, block->bbLiveOut));
- VARSET_ITER_INIT(compiler, extraLiveVarIter, extraLiveVars, extraLiveVarIndex);
- while (extraLiveVarIter.NextElem(compiler, &extraLiveVarIndex))
- {
- unsigned varNum = compiler->lvaTrackedToVarNum[extraLiveVarIndex];
- LclVarDsc* varDsc = compiler->lvaTable + varNum;
- assert(!varDsc->lvIsRegCandidate());
- }
-#endif
-
- /* Both stacks should always be empty on exit from a basic block */
-
- noway_assert(genStackLevel == 0);
-
-#ifdef _TARGET_AMD64_
- // On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several
- // situations, to support proper exception handling semantics. This is mostly to ensure that when the stack
- // walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region.
- // The document "X64 and ARM ABIs.docx" has more details. The situations:
- // 1. If the call instruction is in a different EH region as the instruction that follows it.
- // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might
- // be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters
- // here.)
- // We handle case #1 here, and case #2 in the emitter.
- if (getEmitter()->emitIsLastInsCall())
- {
- // Ok, the last instruction generated is a call instruction. Do any of the other conditions hold?
- // Note: we may be generating a few too many NOPs for the case of call preceding an epilog. Technically,
- // if the next block is a BBJ_RETURN, an epilog will be generated, but there may be some instructions
- // generated before the OS epilog starts, such as a GS cookie check.
- if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
- {
- // We only need the NOP if we're not going to generate any more code as part of the block end.
-
- switch (block->bbJumpKind)
- {
- case BBJ_ALWAYS:
- case BBJ_THROW:
- case BBJ_CALLFINALLY:
- case BBJ_EHCATCHRET:
- // We're going to generate more code below anyway, so no need for the NOP.
-
- case BBJ_RETURN:
- case BBJ_EHFINALLYRET:
- case BBJ_EHFILTERRET:
- // These are the "epilog follows" case, handled in the emitter.
-
- break;
-
- case BBJ_NONE:
- if (block->bbNext == nullptr)
- {
- // Call immediately before the end of the code; we should never get here .
- instGen(INS_BREAKPOINT); // This should never get executed
- }
- else
- {
- // We need the NOP
- instGen(INS_nop);
- }
- break;
-
- case BBJ_COND:
- case BBJ_SWITCH:
- // These can't have a call as the last instruction!
-
- default:
- noway_assert(!"Unexpected bbJumpKind");
- break;
- }
- }
- }
-#endif //_TARGET_AMD64_
-
- /* Do we need to generate a jump or return? */
-
- switch (block->bbJumpKind)
- {
- case BBJ_ALWAYS:
- inst_JMP(EJ_jmp, block->bbJumpDest);
- break;
-
- case BBJ_RETURN:
- genExitCode(block);
- break;
-
- case BBJ_THROW:
- // If we have a throw at the end of a function or funclet, we need to emit another instruction
- // afterwards to help the OS unwinder determine the correct context during unwind.
- // We insert an unexecuted breakpoint instruction in several situations
- // following a throw instruction:
- // 1. If the throw is the last instruction of the function or funclet. This helps
- // the OS unwinder determine the correct context during an unwind from the
- // thrown exception.
- // 2. If this is this is the last block of the hot section.
- // 3. If the subsequent block is a special throw block.
- // 4. On AMD64, if the next block is in a different EH region.
- if ((block->bbNext == NULL)
-#if FEATURE_EH_FUNCLETS
- || (block->bbNext->bbFlags & BBF_FUNCLET_BEG)
-#endif // FEATURE_EH_FUNCLETS
-#ifdef _TARGET_AMD64_
- || !BasicBlock::sameEHRegion(block, block->bbNext)
-#endif // _TARGET_AMD64_
- || (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) ||
- block->bbNext == compiler->fgFirstColdBlock)
- {
- instGen(INS_BREAKPOINT); // This should never get executed
- }
-
- break;
-
- case BBJ_CALLFINALLY:
-
- // Now set REG_LR to the address of where the finally funclet should
- // return to directly.
-
- BasicBlock* bbFinallyRet;
- bbFinallyRet = NULL;
-
- // We don't have retless calls, since we use the BBJ_ALWAYS to point at a NOP pad where
- // we would have otherwise created retless calls.
- assert(block->isBBCallAlwaysPair());
-
- assert(block->bbNext != NULL);
- assert(block->bbNext->bbJumpKind == BBJ_ALWAYS);
- assert(block->bbNext->bbJumpDest != NULL);
- assert(block->bbNext->bbJumpDest->bbFlags & BBF_FINALLY_TARGET);
-
- bbFinallyRet = block->bbNext->bbJumpDest;
- bbFinallyRet->bbFlags |= BBF_JMP_TARGET;
-
-#if 0
- // TODO-ARM-CQ:
- // We don't know the address of finally funclet yet. But adr requires the offset
- // to finally funclet from current IP is within 4095 bytes. So this code is disabled
- // for now.
- getEmitter()->emitIns_J_R (INS_adr,
- EA_4BYTE,
- bbFinallyRet,
- REG_LR);
-#else // !0
- // Load the address where the finally funclet should return into LR.
- // The funclet prolog/epilog will do "push {lr}" / "pop {pc}" to do
- // the return.
- getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR);
- getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR);
-#endif // !0
-
- // Jump to the finally BB
- inst_JMP(EJ_jmp, block->bbJumpDest);
-
- // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
- // jump target using bbJumpDest - that is already used to point
- // to the finally block. So just skip past the BBJ_ALWAYS unless the
- // block is RETLESS.
- if (!(block->bbFlags & BBF_RETLESS_CALL))
- {
- assert(block->isBBCallAlwaysPair());
-
- lblk = block;
- block = block->bbNext;
- }
- break;
-
-#ifdef _TARGET_ARM_
-
- case BBJ_EHCATCHRET:
- // set r0 to the address the VM should return to after the catch
- getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_R0);
- getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_R0);
-
- __fallthrough;
-
- case BBJ_EHFINALLYRET:
- case BBJ_EHFILTERRET:
- genReserveFuncletEpilog(block);
- break;
-
-#elif defined(_TARGET_AMD64_)
-
- case BBJ_EHCATCHRET:
- // Set EAX to the address the VM should return to after the catch.
- // Generate a RIP-relative
- // lea reg, [rip + disp32] ; the RIP is implicit
- // which will be position-indepenent.
- // TODO-ARM-Bug?: For ngen, we need to generate a reloc for the displacement (maybe EA_PTR_DSP_RELOC).
- getEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, block->bbJumpDest, REG_INTRET);
- __fallthrough;
-
- case BBJ_EHFINALLYRET:
- case BBJ_EHFILTERRET:
- genReserveFuncletEpilog(block);
- break;
-
-#endif // _TARGET_AMD64_
-
- case BBJ_NONE:
- case BBJ_COND:
- case BBJ_SWITCH:
- break;
-
- default:
- noway_assert(!"Unexpected bbJumpKind");
- break;
- }
-
-#ifdef DEBUG
- compiler->compCurBB = 0;
-#endif
-
- } //------------------ END-FOR each block of the method -------------------
-
- /* Nothing is live at this point */
- genUpdateLife(VarSetOps::MakeEmpty(compiler));
-
- /* Finalize the spill tracking logic */
-
- regSet.rsSpillEnd();
-
- /* Finalize the temp tracking logic */
-
- compiler->tmpEnd();
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\n# ");
- printf("compCycleEstimate = %6d, compSizeEstimate = %5d ", compiler->compCycleEstimate, compiler->compSizeEstimate);
- printf("%s\n", compiler->info.compFullName);
- }
-#endif
+ NYI("ARM genCallFinally");
+ return block;
}
-// return the child that has the same reg as the dst (if any)
-// other child returned (out param) in 'other'
-GenTree* sameRegAsDst(GenTree* tree, GenTree*& other /*out*/)
-{
- if (tree->gtRegNum == REG_NA)
- {
- other = nullptr;
- return NULL;
- }
+// move an immediate value into an integer register
- GenTreePtr op1 = tree->gtOp.gtOp1->gtEffectiveVal();
- GenTreePtr op2 = tree->gtOp.gtOp2->gtEffectiveVal();
- if (op1->gtRegNum == tree->gtRegNum)
- {
- other = op2;
- return op1;
- }
- if (op2->gtRegNum == tree->gtRegNum)
- {
- other = op1;
- return op2;
- }
- else
- {
- other = nullptr;
- return NULL;
- }
+void CodeGen::genEHCatchRet(BasicBlock* block)
+{
+ NYI("ARM genEHCatchRet");
}
-// move an immediate value into an integer register
-
void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags)
{
// reg cannot be a FP register
@@ -902,16 +93,7 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm,
}
else
{
-#ifdef _TARGET_AMD64_
- if (AddrShouldUsePCRel(imm))
- {
- getEmitter()->emitIns_R_AI(INS_lea, EA_PTR_DSP_RELOC, reg, imm);
- }
- else
-#endif // _TARGET_AMD64_
- {
- getEmitter()->emitIns_R_I(INS_mov, size, reg, imm);
- }
+ getEmitter()->emitIns_R_I(INS_mov, size, reg, imm);
}
regTracker.rsTrackRegIntCns(reg, imm);
}
@@ -1423,6 +605,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
break;
case GT_LIST:
+ case GT_FIELD_LIST:
case GT_ARGPLACE:
// Nothing to do
break;
@@ -1479,7 +662,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_LOCKADD:
case GT_XCHG:
case GT_XADD:
- genLockedInstructions(treeNode);
+ genLockedInstructions(treeNode->AsOp());
break;
case GT_CMPXCHG:
@@ -1554,7 +737,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
{
#ifdef DEBUG
char message[256];
- sprintf(message, "NYI: Unimplemented node type %s\n", GenTree::NodeName(treeNode->OperGet()));
+ _snprintf_s(message, _countof(message), _TRUNCATE, "NYI: Unimplemented node type %s\n",
+ GenTree::NodeName(treeNode->OperGet()));
notYetImplemented(message, __FILE__, __LINE__);
#else
NYI("unimplemented node");
@@ -1566,7 +750,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
// generate code for the locked operations:
// GT_LOCKADD, GT_XCHG, GT_XADD
-void CodeGen::genLockedInstructions(GenTree* treeNode)
+void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
{
NYI("genLockedInstructions");
}
@@ -1697,188 +881,9 @@ void CodeGen::genCodeForShift(GenTreePtr tree)
NYI("genCodeForShift");
}
-void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
-{
- regNumber dstReg = tree->gtRegNum;
-
- GenTree* unspillTree = tree;
- if (tree->gtOper == GT_RELOAD)
- {
- unspillTree = tree->gtOp.gtOp1;
- }
- if (unspillTree->gtFlags & GTF_SPILLED)
- {
- if (genIsRegCandidateLocal(unspillTree))
- {
- // Reset spilled flag, since we are going to load a local variable from its home location.
- unspillTree->gtFlags &= ~GTF_SPILLED;
-
- // Load local variable from its home location.
- inst_RV_TT(ins_Load(unspillTree->gtType), dstReg, unspillTree);
-
- unspillTree->SetInReg();
-
- GenTreeLclVarCommon* lcl = unspillTree->AsLclVarCommon();
- LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
-
- // TODO-Review: We would like to call:
- // genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(tree));
- // instead of the following code, but this ends up hitting this assert:
- // assert((regSet.rsMaskVars & regMask) == 0);
- // due to issues with LSRA resolution moves.
- // So, just force it for now. This probably indicates a condition that creates a GC hole!
- //
- // Extra note: I think we really want to call something like gcInfo.gcUpdateForRegVarMove,
- // because the variable is not really going live or dead, but that method is somewhat poorly
- // factored because it, in turn, updates rsMaskVars which is part of RegSet not GCInfo.
- // TODO-Cleanup: This code exists in other CodeGen*.cpp files, and should be moved to CodeGenCommon.cpp.
-
- genUpdateVarReg(varDsc, tree);
-#ifdef DEBUG
- if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
- {
- JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->gtLclNum);
- }
-#endif // DEBUG
- VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\t\t\t\t\t\t\tV%02u in reg ", lcl->gtLclNum);
- varDsc->PrintVarReg();
- printf(" is becoming live ");
- Compiler::printTreeID(unspillTree);
- printf("\n");
- }
-#endif // DEBUG
-
- regSet.AddMaskVars(genGetRegMask(varDsc));
- }
- else
- {
- TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum);
- compiler->tmpRlsTemp(t);
- getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitActualTypeSize(unspillTree->gtType), dstReg,
- t->tdTempNum(), 0);
-
- unspillTree->SetInReg();
- }
-
- gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
- }
-}
-
-// do liveness update for a subnode that is being consumed by codegen
-regNumber CodeGen::genConsumeReg(GenTree* tree)
-{
- genUnspillRegIfNeeded(tree);
-
- // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar
- genUpdateLife(tree);
- assert(tree->gtRegNum != REG_NA);
-
- // there are three cases where consuming a reg means clearing the bit in the live mask
- // 1. it was not produced by a local
- // 2. it was produced by a local that is going dead
- // 3. it was produced by a local that does not live in that reg (like one allocated on the stack)
-
- if (genIsRegCandidateLocal(tree))
- {
- GenTreeLclVarCommon* lcl = tree->AsLclVarCommon();
- LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
-
- if (varDsc->lvRegNum == tree->gtRegNum && ((tree->gtFlags & GTF_VAR_DEATH) != 0))
- {
- gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
- }
- else if (!varDsc->lvLRACandidate)
- {
- gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
- }
- }
- else
- {
- gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
- }
-
- return tree->gtRegNum;
-}
-
-// Do liveness update for an address tree: one of GT_LEA, GT_LCL_VAR, or GT_CNS_INT (for call indirect).
-void CodeGen::genConsumeAddress(GenTree* addr)
-{
- if (addr->OperGet() == GT_LEA)
- {
- genConsumeAddrMode(addr->AsAddrMode());
- }
- else
- {
- assert(!addr->isContained());
- genConsumeReg(addr);
- }
-}
-
-// do liveness update for a subnode that is being consumed by codegen
-void CodeGen::genConsumeAddrMode(GenTreeAddrMode* addr)
+void CodeGen::genRegCopy(GenTree* treeNode)
{
- if (addr->Base())
- genConsumeReg(addr->Base());
- if (addr->Index())
- genConsumeReg(addr->Index());
-}
-
-// do liveness update for register produced by the current node in codegen
-void CodeGen::genProduceReg(GenTree* tree)
-{
- if (tree->gtFlags & GTF_SPILL)
- {
- if (genIsRegCandidateLocal(tree))
- {
- // Store local variable to its home location.
- tree->gtFlags &= ~GTF_REG_VAL;
- inst_TT_RV(ins_Store(tree->gtType), tree, tree->gtRegNum);
- }
- else
- {
- tree->SetInReg();
- regSet.rsSpillTree(tree->gtRegNum, tree);
- tree->gtFlags |= GTF_SPILLED;
- tree->gtFlags &= ~GTF_SPILL;
- gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
- return;
- }
- }
-
- genUpdateLife(tree);
-
- // If we've produced a register, mark it as a pointer, as needed.
- // Except in the case of a dead definition of a lclVar.
- if (tree->gtHasReg() && (!tree->IsLocal() || (tree->gtFlags & GTF_VAR_DEATH) == 0))
- {
- gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet());
- }
- tree->SetInReg();
-}
-
-// transfer gc/byref status of src reg to dst reg
-void CodeGen::genTransferRegGCState(regNumber dst, regNumber src)
-{
- regMaskTP srcMask = genRegMask(src);
- regMaskTP dstMask = genRegMask(dst);
-
- if (gcInfo.gcRegGCrefSetCur & srcMask)
- {
- gcInfo.gcMarkRegSetGCref(dstMask);
- }
- else if (gcInfo.gcRegByrefSetCur & srcMask)
- {
- gcInfo.gcMarkRegSetByref(dstMask);
- }
- else
- {
- gcInfo.gcMarkRegSetNpt(dstMask);
- }
+ NYI("genRegCopy");
}
// Produce code for a GT_CALL node
@@ -2050,57 +1055,6 @@ void CodeGen::genEmitHelperCall(unsigned helper,
NYI("Helper call");
}
-/*****************************************************************************/
-#ifdef DEBUGGING_SUPPORT
-/*****************************************************************************
- * genSetScopeInfo
- *
- * Called for every scope info piece to record by the main genSetScopeInfo()
- */
-
-void CodeGen::genSetScopeInfo(unsigned which,
- UNATIVE_OFFSET startOffs,
- UNATIVE_OFFSET length,
- unsigned varNum,
- unsigned LVnum,
- bool avail,
- Compiler::siVarLoc& varLoc)
-{
- /* We need to do some mapping while reporting back these variables */
-
- unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
- noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
-
- VarName name = nullptr;
-
-#ifdef DEBUG
-
- for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
- {
- if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
- {
- name = compiler->info.compVarScopes[scopeNum].vsdName;
- }
- }
-
- // Hang on to this compiler->info.
-
- TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
-
- tlvi.tlviVarNum = ilVarNum;
- tlvi.tlviLVnum = LVnum;
- tlvi.tlviName = name;
- tlvi.tlviStartPC = startOffs;
- tlvi.tlviLength = length;
- tlvi.tlviAvailable = avail;
- tlvi.tlviVarLoc = varLoc;
-
-#endif // DEBUG
-
- compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
-}
-#endif // DEBUGGING_SUPPORT
-
#endif // _TARGET_ARM_
#endif // !LEGACY_BACKEND
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp
index ca0df53a34..cc7c5dc524 100644
--- a/src/jit/codegenarm64.cpp
+++ b/src/jit/codegenarm64.cpp
@@ -747,7 +747,7 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in
* +=======================+ <---- Caller's SP
* |Callee saved registers | // multiple of 8 bytes
* |-----------------------|
- * | PSP slot | // 8 bytes
+ * | PSP slot | // 8 bytes (omitted in CoreRT ABI)
* |-----------------------|
* ~ alignment padding ~ // To make the whole frame 16 byte aligned.
* |-----------------------|
@@ -773,7 +773,7 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in
* +=======================+ <---- Caller's SP
* |Callee saved registers | // multiple of 8 bytes
* |-----------------------|
- * | PSP slot | // 8 bytes
+ * | PSP slot | // 8 bytes (omitted in CoreRT ABI)
* |-----------------------|
* ~ alignment padding ~ // To make the whole frame 16 byte aligned.
* |-----------------------|
@@ -801,7 +801,7 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in
* +=======================+ <---- Caller's SP
* |Callee saved registers | // multiple of 8 bytes
* |-----------------------|
- * | PSP slot | // 8 bytes
+ * | PSP slot | // 8 bytes (omitted in CoreRT ABI)
* |-----------------------|
* ~ alignment padding ~ // To make the first SP subtraction 16 byte aligned
* |-----------------------|
@@ -883,7 +883,7 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in
* +=======================+ <---- Caller's SP
* |Callee saved registers | // multiple of 8 bytes
* |-----------------------|
- * | PSP slot | // 8 bytes
+ * | PSP slot | // 8 bytes (omitted in CoreRT ABI)
* |-----------------------|
* | Saved FP, LR | // 16 bytes
* |-----------------------|
@@ -988,6 +988,12 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
// This is the end of the OS-reported prolog for purposes of unwinding
compiler->unwindEndProlog();
+ // If there is no PSPSym (CoreRT ABI), we are done.
+ if (compiler->lvaPSPSym == BAD_VAR_NUM)
+ {
+ return;
+ }
+
if (isFilter)
{
// This is the first block of a filter
@@ -1134,8 +1140,10 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
assert((rsMaskSaveRegs & RBM_LR) != 0);
assert((rsMaskSaveRegs & RBM_FP) != 0);
+ unsigned PSPSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? REGSIZE_BYTES : 0;
+
unsigned saveRegsCount = genCountBits(rsMaskSaveRegs);
- unsigned saveRegsPlusPSPSize = saveRegsCount * REGSIZE_BYTES + /* PSPSym */ REGSIZE_BYTES;
+ unsigned saveRegsPlusPSPSize = saveRegsCount * REGSIZE_BYTES + PSPSize;
if (compiler->info.compIsVarArgs)
{
// For varargs we always save all of the integer register arguments
@@ -1222,22 +1230,29 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
printf(" SP delta 1: %d\n", genFuncletInfo.fiSpDelta1);
printf(" SP delta 2: %d\n", genFuncletInfo.fiSpDelta2);
- if (CallerSP_to_PSP_slot_delta != compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging
+ if (compiler->lvaPSPSym != BAD_VAR_NUM)
{
- printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n",
- compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
+ if (CallerSP_to_PSP_slot_delta !=
+ compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging
+ {
+ printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n",
+ compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
+ }
}
}
-#endif // DEBUG
assert(genFuncletInfo.fiSP_to_FPLR_save_delta >= 0);
assert(genFuncletInfo.fiSP_to_PSP_slot_delta >= 0);
assert(genFuncletInfo.fiSP_to_CalleeSave_delta >= 0);
assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta <= 0);
- assert(compiler->lvaPSPSym != BAD_VAR_NUM);
- assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta ==
- compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and
- // funclet!
+
+ if (compiler->lvaPSPSym != BAD_VAR_NUM)
+ {
+ assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta ==
+ compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and
+ // funclet!
+ }
+#endif // DEBUG
}
/*
@@ -1250,100 +1265,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
*/
-// Get the register assigned to the given node
-
-regNumber CodeGenInterface::genGetAssignedReg(GenTreePtr tree)
-{
- return tree->gtRegNum;
-}
-
-//------------------------------------------------------------------------
-// genSpillVar: Spill a local variable
-//
-// Arguments:
-// tree - the lclVar node for the variable being spilled
-//
-// Return Value:
-// None.
-//
-// Assumptions:
-// The lclVar must be a register candidate (lvRegCandidate)
-
-void CodeGen::genSpillVar(GenTreePtr tree)
-{
- unsigned varNum = tree->gtLclVarCommon.gtLclNum;
- LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
-
- assert(varDsc->lvIsRegCandidate());
-
- // We don't actually need to spill if it is already living in memory
- bool needsSpill = ((tree->gtFlags & GTF_VAR_DEF) == 0 && varDsc->lvIsInReg());
- if (needsSpill)
- {
- var_types lclTyp = varDsc->TypeGet();
- if (varDsc->lvNormalizeOnStore())
- lclTyp = genActualType(lclTyp);
- emitAttr size = emitTypeSize(lclTyp);
-
- bool restoreRegVar = false;
- if (tree->gtOper == GT_REG_VAR)
- {
- tree->SetOper(GT_LCL_VAR);
- restoreRegVar = true;
- }
-
- // mask off the flag to generate the right spill code, then bring it back
- tree->gtFlags &= ~GTF_REG_VAL;
-
- instruction storeIns = ins_Store(tree->TypeGet(), compiler->isSIMDTypeLocalAligned(varNum));
-
- assert(varDsc->lvRegNum == tree->gtRegNum);
- inst_TT_RV(storeIns, tree, tree->gtRegNum, 0, size);
-
- tree->gtFlags |= GTF_REG_VAL;
-
- if (restoreRegVar)
- {
- tree->SetOper(GT_REG_VAR);
- }
-
- genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree));
- gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask());
-
- if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex))
- {
-#ifdef DEBUG
- if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
- {
- JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
- }
- else
- {
- JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
- }
-#endif
- VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
- }
- }
-
- tree->gtFlags &= ~GTF_SPILL;
- varDsc->lvRegNum = REG_STK;
- if (varTypeIsMultiReg(tree))
- {
- varDsc->lvOtherReg = REG_STK;
- }
-}
-
-// inline
-void CodeGenInterface::genUpdateVarReg(LclVarDsc* varDsc, GenTreePtr tree)
-{
- assert(tree->OperIsScalarLocal() || (tree->gtOper == GT_COPY));
- varDsc->lvRegNum = tree->gtRegNum;
-}
-
-/*****************************************************************************/
-/*****************************************************************************/
-
/*****************************************************************************
*
* Generate code that will set the given register to the integer constant.
@@ -1405,702 +1326,79 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
genDefineTempLabel(gsCheckBlk);
}
-/*****************************************************************************
- *
- * Generate code for all the basic blocks in the function.
- */
-
-void CodeGen::genCodeForBBlist()
+BasicBlock* CodeGen::genCallFinally(BasicBlock* block, BasicBlock* lblk)
{
- unsigned varNum;
- LclVarDsc* varDsc;
-
- unsigned savedStkLvl;
-
-#ifdef DEBUG
- genInterruptibleUsed = true;
-
- // You have to be careful if you create basic blocks from now on
- compiler->fgSafeBasicBlockCreation = false;
-
- // This stress mode is not comptible with fully interruptible GC
- if (genInterruptible && compiler->opts.compStackCheckOnCall)
- {
- compiler->opts.compStackCheckOnCall = false;
- }
-
- // This stress mode is not comptible with fully interruptible GC
- if (genInterruptible && compiler->opts.compStackCheckOnRet)
- {
- compiler->opts.compStackCheckOnRet = false;
- }
-#endif // DEBUG
-
- // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
- genPrepForEHCodegen();
-
- assert(!compiler->fgFirstBBScratch ||
- compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
-
- /* Initialize the spill tracking logic */
-
- regSet.rsSpillBeg();
+ // Generate a call to the finally, like this:
+ // mov x0,qword ptr [fp + 10H] / sp // Load x0 with PSPSym, or sp if PSPSym is not used
+ // bl finally-funclet
+ // b finally-return // Only for non-retless finally calls
+ // The 'b' can be a NOP if we're going to the next block.
-#ifdef DEBUGGING_SUPPORT
- /* Initialize the line# tracking logic */
-
- if (compiler->opts.compScopeInfo)
- {
- siInit();
- }
-#endif
-
- // The current implementation of switch tables requires the first block to have a label so it
- // can generate offsets to the switch label targets.
- // TODO-ARM64-CQ: remove this when switches have been re-implemented to not use this.
- if (compiler->fgHasSwitch)
+ if (compiler->lvaPSPSym != BAD_VAR_NUM)
{
- compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R0, compiler->lvaPSPSym, 0);
}
-
- genPendingCallLabel = nullptr;
-
- /* Initialize the pointer tracking code */
-
- gcInfo.gcRegPtrSetInit();
- gcInfo.gcVarPtrSetInit();
-
- /* If any arguments live in registers, mark those regs as such */
-
- for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ else
{
- /* Is this variable a parameter assigned to a register? */
-
- if (!varDsc->lvIsParam || !varDsc->lvRegister)
- continue;
-
- /* Is the argument live on entry to the method? */
-
- if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
- continue;
-
- /* Is this a floating-point argument? */
-
- if (varDsc->IsFloatRegType())
- continue;
-
- noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
-
- /* Mark the register as holding the variable */
-
- regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
+ getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_R0, REG_SPBASE);
}
+ getEmitter()->emitIns_J(INS_bl_local, block->bbJumpDest);
- unsigned finallyNesting = 0;
-
- // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
- // allocation at the start of each basic block.
- VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
-
- /*-------------------------------------------------------------------------
- *
- * Walk the basic blocks and generate code for each one
- *
- */
-
- BasicBlock* block;
- BasicBlock* lblk; /* previous block */
-
- for (lblk = NULL, block = compiler->fgFirstBB; block != NULL; lblk = block, block = block->bbNext)
+ if (block->bbFlags & BBF_RETLESS_CALL)
{
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\n=============== Generating ");
- block->dspBlockHeader(compiler, true, true);
- compiler->fgDispBBLiveness(block);
- }
-#endif // DEBUG
-
- /* Figure out which registers hold variables on entry to this block */
-
- regSet.ClearMaskVars();
- gcInfo.gcRegGCrefSetCur = RBM_NONE;
- gcInfo.gcRegByrefSetCur = RBM_NONE;
-
- compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(block);
-
- genUpdateLife(block->bbLiveIn);
-
- // Even if liveness didn't change, we need to update the registers containing GC references.
- // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't
- // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change
- // here. That would require handling the changes in recordVarLocationsAtStartOfBB().
-
- regMaskTP newLiveRegSet = RBM_NONE;
- regMaskTP newRegGCrefSet = RBM_NONE;
- regMaskTP newRegByrefSet = RBM_NONE;
-#ifdef DEBUG
- VARSET_TP VARSET_INIT_NOCOPY(removedGCVars, VarSetOps::MakeEmpty(compiler));
- VARSET_TP VARSET_INIT_NOCOPY(addedGCVars, VarSetOps::MakeEmpty(compiler));
-#endif
- VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex);
- while (iter.NextElem(compiler, &varIndex))
- {
- unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
- LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
-
- if (varDsc->lvIsInReg())
- {
- newLiveRegSet |= varDsc->lvRegMask();
- if (varDsc->lvType == TYP_REF)
- {
- newRegGCrefSet |= varDsc->lvRegMask();
- }
- else if (varDsc->lvType == TYP_BYREF)
- {
- newRegByrefSet |= varDsc->lvRegMask();
- }
-#ifdef DEBUG
- if (verbose && VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
- {
- VarSetOps::AddElemD(compiler, removedGCVars, varIndex);
- }
-#endif // DEBUG
- VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
- }
- else if (compiler->lvaIsGCTracked(varDsc))
- {
-#ifdef DEBUG
- if (verbose && !VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
- {
- VarSetOps::AddElemD(compiler, addedGCVars, varIndex);
- }
-#endif // DEBUG
- VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
- }
- }
-
- regSet.rsMaskVars = newLiveRegSet;
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- if (!VarSetOps::IsEmpty(compiler, addedGCVars))
- {
- printf("\t\t\t\t\t\t\tAdded GCVars: ");
- dumpConvertedVarSet(compiler, addedGCVars);
- printf("\n");
- }
- if (!VarSetOps::IsEmpty(compiler, removedGCVars))
- {
- printf("\t\t\t\t\t\t\tRemoved GCVars: ");
- dumpConvertedVarSet(compiler, removedGCVars);
- printf("\n");
- }
- }
-#endif // DEBUG
-
- gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUGARG(true));
- gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUGARG(true));
-
- /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
- represent the exception object (TYP_REF).
- We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
- to the block, it will be the first thing evaluated
- (thanks to GTF_ORDER_SIDEEFF).
- */
-
- if (handlerGetsXcptnObj(block->bbCatchTyp))
- {
- for (GenTree* node : LIR::AsRange(block))
- {
- if (node->OperGet() == GT_CATCH_ARG)
- {
- gcInfo.gcMarkRegSetGCref(RBM_EXCEPTION_OBJECT);
- break;
- }
- }
- }
-
- /* Start a new code output block */
-
- genUpdateCurrentFunclet(block);
-
-#ifdef _TARGET_XARCH_
- if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
- {
- getEmitter()->emitLoopAlign();
- }
-#endif
-
-#ifdef DEBUG
- if (compiler->opts.dspCode)
- printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
-#endif
-
- block->bbEmitCookie = NULL;
-
- if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL))
- {
- /* Mark a label and update the current set of live GC refs */
-
- block->bbEmitCookie = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, FALSE);
- }
-
- if (block == compiler->fgFirstColdBlock)
- {
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\nThis is the start of the cold region of the method\n");
- }
-#endif
- // We should never have a block that falls through into the Cold section
- noway_assert(!lblk->bbFallsThrough());
-
- // We require the block that starts the Cold section to have a label
- noway_assert(block->bbEmitCookie);
- getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
- }
-
- /* Both stacks are always empty on entry to a basic block */
-
- genStackLevel = 0;
-
- savedStkLvl = genStackLevel;
-
- /* Tell everyone which basic block we're working on */
-
- compiler->compCurBB = block;
-
-#ifdef DEBUGGING_SUPPORT
- siBeginBlock(block);
-
- // BBF_INTERNAL blocks don't correspond to any single IL instruction.
- if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) &&
- !compiler->fgBBisScratch(block)) // If the block is the distinguished first scratch block, then no need to
- // emit a NO_MAPPING entry, immediately after the prolog.
- {
- genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true);
- }
-
- bool firstMapping = true;
-#endif // DEBUGGING_SUPPORT
-
- /*---------------------------------------------------------------------
- *
- * Generate code for each statement-tree in the block
- *
- */
-
- if (block->bbFlags & BBF_FUNCLET_BEG)
- {
- genReserveFuncletProlog(block);
- }
-
- // Clear compCurStmt and compCurLifeTree.
- compiler->compCurStmt = nullptr;
- compiler->compCurLifeTree = nullptr;
-
- // Traverse the block in linear order, generating code for each node as we
- // as we encounter it.
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef DEBUGGING_SUPPORT
- IL_OFFSETX currentILOffset = BAD_IL_OFFSET;
-#endif
- for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
- {
-#ifdef DEBUGGING_SUPPORT
- // Do we have a new IL offset?
- if (node->OperGet() == GT_IL_OFFSET)
- {
- genEnsureCodeEmitted(currentILOffset);
- currentILOffset = node->gtStmt.gtStmtILoffsx;
- genIPmappingAdd(currentILOffset, firstMapping);
- firstMapping = false;
- }
-#endif // DEBUGGING_SUPPORT
-
-#ifdef DEBUG
- if (node->OperGet() == GT_IL_OFFSET)
- {
- noway_assert(node->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize ||
- node->gtStmt.gtStmtLastILoffs == BAD_IL_OFFSET);
-
- if (compiler->opts.dspCode && compiler->opts.dspInstrs &&
- node->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
- {
- while (genCurDispOffset <= node->gtStmt.gtStmtLastILoffs)
- {
- genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> ");
- }
- }
- }
-#endif // DEBUG
-
- genCodeForTreeNode(node);
- if (node->gtHasReg() && node->gtLsraInfo.isLocalDefUse)
- {
- genConsumeReg(node);
- }
- } // end for each node in block
-
-#ifdef DEBUG
- // The following set of register spill checks and GC pointer tracking checks used to be
- // performed at statement boundaries. Now, with LIR, there are no statements, so they are
- // performed at the end of each block.
- // TODO: could these checks be performed more frequently? E.g., at each location where
- // the register allocator says there are no live non-variable registers. Perhaps this could
- // be done by (a) keeping a running count of live non-variable registers by using
- // gtLsraInfo.srcCount and gtLsraInfo.dstCount to decrement and increment the count, respectively,
- // and running the checks when the count is zero. Or, (b) use the map maintained by LSRA
- // (operandToLocationInfoMap) to mark a node somehow when, after the execution of that node,
- // there will be no live non-variable registers.
-
- regSet.rsSpillChk();
-
- /* Make sure we didn't bungle pointer register tracking */
-
- regMaskTP ptrRegs = gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur;
- regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
-
- // If return is a GC-type, clear it. Note that if a common
- // epilog is generated (genReturnBB) it has a void return
- // even though we might return a ref. We can't use the compRetType
- // as the determiner because something we are tracking as a byref
- // might be used as a return value of a int function (which is legal)
- GenTree* blockLastNode = block->lastNode();
- if ((blockLastNode != nullptr) && (blockLastNode->gtOper == GT_RETURN) &&
- (varTypeIsGC(compiler->info.compRetType) ||
- (blockLastNode->gtOp.gtOp1 != nullptr && varTypeIsGC(blockLastNode->gtOp.gtOp1->TypeGet()))))
- {
- nonVarPtrRegs &= ~RBM_INTRET;
- }
-
- if (nonVarPtrRegs)
- {
- printf("Regset after BB%02u gcr=", block->bbNum);
- printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
- compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
- printf(", byr=");
- printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
- compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
- printf(", regVars=");
- printRegMaskInt(regSet.rsMaskVars);
- compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
- printf("\n");
- }
-
- noway_assert(nonVarPtrRegs == RBM_NONE);
-#endif // DEBUG
-
-#if defined(DEBUG) && defined(_TARGET_ARM64_)
- if (block->bbNext == nullptr)
- {
- // Unit testing of the ARM64 emitter: generate a bunch of instructions into the last block
- // (it's as good as any, but better than the prolog, which can only be a single instruction
- // group) then use COMPlus_JitLateDisasm=* to see if the late disassembler
- // thinks the instructions are the same as we do.
- genArm64EmitterUnitTests();
- }
-#endif // defined(DEBUG) && defined(_TARGET_ARM64_)
-
-#ifdef DEBUGGING_SUPPORT
- // It is possible to reach the end of the block without generating code for the current IL offset.
- // For example, if the following IR ends the current block, no code will have been generated for
- // offset 21:
- //
- // ( 0, 0) [000040] ------------ il_offset void IL offset: 21
- //
- // N001 ( 0, 0) [000039] ------------ nop void
- //
- // This can lead to problems when debugging the generated code. To prevent these issues, make sure
- // we've generated code for the last IL offset we saw in the block.
- genEnsureCodeEmitted(currentILOffset);
-
- if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
- {
- siEndBlock(block);
-
- /* Is this the last block, and are there any open scopes left ? */
+ // We have a retless call, and the last instruction generated was a call.
+ // If the next block is in a different EH region (or is the end of the code
+ // block), then we need to generate a breakpoint here (since it will never
+ // get executed) to get proper unwind behavior.
- bool isLastBlockProcessed = (block->bbNext == NULL);
- if (block->isBBCallAlwaysPair())
- {
- isLastBlockProcessed = (block->bbNext->bbNext == NULL);
- }
-
- if (isLastBlockProcessed && siOpenScopeList.scNext)
- {
- /* This assert no longer holds, because we may insert a throw
- block to demarcate the end of a try or finally region when they
- are at the end of the method. It would be nice if we could fix
- our code so that this throw block will no longer be necessary. */
-
- // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
-
- siCloseAllOpenScopes();
- }
- }
-
-#endif // DEBUGGING_SUPPORT
-
- genStackLevel -= savedStkLvl;
-
-#ifdef DEBUG
- // compCurLife should be equal to the liveOut set, except that we don't keep
- // it up to date for vars that are not register candidates
- // (it would be nice to have a xor set function)
-
- VARSET_TP VARSET_INIT_NOCOPY(extraLiveVars, VarSetOps::Diff(compiler, block->bbLiveOut, compiler->compCurLife));
- VarSetOps::UnionD(compiler, extraLiveVars, VarSetOps::Diff(compiler, compiler->compCurLife, block->bbLiveOut));
- VARSET_ITER_INIT(compiler, extraLiveVarIter, extraLiveVars, extraLiveVarIndex);
- while (extraLiveVarIter.NextElem(compiler, &extraLiveVarIndex))
+ if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
{
- unsigned varNum = compiler->lvaTrackedToVarNum[extraLiveVarIndex];
- LclVarDsc* varDsc = compiler->lvaTable + varNum;
- assert(!varDsc->lvIsRegCandidate());
+ instGen(INS_BREAKPOINT); // This should never get executed
}
-#endif
-
- /* Both stacks should always be empty on exit from a basic block */
-
- noway_assert(genStackLevel == 0);
+ }
+ else
+ {
+ // Because of the way the flowgraph is connected, the liveness info for this one instruction
+ // after the call is not (can not be) correct in cases where a variable has a last use in the
+ // handler. So turn off GC reporting for this single instruction.
+ getEmitter()->emitDisableGC();
-#if 0
- // On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several
- // situations, to support proper exception handling semantics. This is mostly to ensure that when the stack
- // walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region.
- // The document "X64 and ARM ABIs.docx" has more details. The situations:
- // 1. If the call instruction is in a different EH region as the instruction that follows it.
- // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might
- // be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters here.)
- // We handle case #1 here, and case #2 in the emitter.
- if (getEmitter()->emitIsLastInsCall())
+ // Now go to where the finally funclet needs to return to.
+ if (block->bbNext->bbJumpDest == block->bbNext->bbNext)
{
- // Ok, the last instruction generated is a call instruction. Do any of the other conditions hold?
- // Note: we may be generating a few too many NOPs for the case of call preceding an epilog. Technically,
- // if the next block is a BBJ_RETURN, an epilog will be generated, but there may be some instructions
- // generated before the OS epilog starts, such as a GS cookie check.
- if ((block->bbNext == nullptr) ||
- !BasicBlock::sameEHRegion(block, block->bbNext))
- {
- // We only need the NOP if we're not going to generate any more code as part of the block end.
-
- switch (block->bbJumpKind)
- {
- case BBJ_ALWAYS:
- case BBJ_THROW:
- case BBJ_CALLFINALLY:
- case BBJ_EHCATCHRET:
- // We're going to generate more code below anyway, so no need for the NOP.
-
- case BBJ_RETURN:
- case BBJ_EHFINALLYRET:
- case BBJ_EHFILTERRET:
- // These are the "epilog follows" case, handled in the emitter.
-
- break;
-
- case BBJ_NONE:
- if (block->bbNext == nullptr)
- {
- // Call immediately before the end of the code; we should never get here .
- instGen(INS_BREAKPOINT); // This should never get executed
- }
- else
- {
- // We need the NOP
- instGen(INS_nop);
- }
- break;
-
- case BBJ_COND:
- case BBJ_SWITCH:
- // These can't have a call as the last instruction!
-
- default:
- noway_assert(!"Unexpected bbJumpKind");
- break;
- }
- }
+ // Fall-through.
+ // TODO-ARM64-CQ: Can we get rid of this instruction, and just have the call return directly
+ // to the next instruction? This would depend on stack walking from within the finally
+ // handler working without this instruction being in this special EH region.
+ instGen(INS_nop);
}
-#endif // 0
-
- /* Do we need to generate a jump or return? */
-
- switch (block->bbJumpKind)
+ else
{
- case BBJ_ALWAYS:
- inst_JMP(EJ_jmp, block->bbJumpDest);
- break;
-
- case BBJ_RETURN:
- genExitCode(block);
- break;
-
- case BBJ_THROW:
- // If we have a throw at the end of a function or funclet, we need to emit another instruction
- // afterwards to help the OS unwinder determine the correct context during unwind.
- // We insert an unexecuted breakpoint instruction in several situations
- // following a throw instruction:
- // 1. If the throw is the last instruction of the function or funclet. This helps
- // the OS unwinder determine the correct context during an unwind from the
- // thrown exception.
- // 2. If this is this is the last block of the hot section.
- // 3. If the subsequent block is a special throw block.
- // 4. On AMD64, if the next block is in a different EH region.
- if ((block->bbNext == NULL) || (block->bbNext->bbFlags & BBF_FUNCLET_BEG) ||
- !BasicBlock::sameEHRegion(block, block->bbNext) ||
- (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) ||
- block->bbNext == compiler->fgFirstColdBlock)
- {
- instGen(INS_BREAKPOINT); // This should never get executed
- }
-
- break;
-
- case BBJ_CALLFINALLY:
-
- // Generate a call to the finally, like this:
- // mov x0,qword ptr [fp + 10H] // Load x0 with PSPSym
- // bl finally-funclet
- // b finally-return // Only for non-retless finally calls
- // The 'b' can be a NOP if we're going to the next block.
-
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R0, compiler->lvaPSPSym, 0);
- getEmitter()->emitIns_J(INS_bl_local, block->bbJumpDest);
-
- if (block->bbFlags & BBF_RETLESS_CALL)
- {
- // We have a retless call, and the last instruction generated was a call.
- // If the next block is in a different EH region (or is the end of the code
- // block), then we need to generate a breakpoint here (since it will never
- // get executed) to get proper unwind behavior.
-
- if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
- {
- instGen(INS_BREAKPOINT); // This should never get executed
- }
- }
- else
- {
- // Because of the way the flowgraph is connected, the liveness info for this one instruction
- // after the call is not (can not be) correct in cases where a variable has a last use in the
- // handler. So turn off GC reporting for this single instruction.
- getEmitter()->emitDisableGC();
-
- // Now go to where the finally funclet needs to return to.
- if (block->bbNext->bbJumpDest == block->bbNext->bbNext)
- {
- // Fall-through.
- // TODO-ARM64-CQ: Can we get rid of this instruction, and just have the call return directly
- // to the next instruction? This would depend on stack walking from within the finally
- // handler working without this instruction being in this special EH region.
- instGen(INS_nop);
- }
- else
- {
- inst_JMP(EJ_jmp, block->bbNext->bbJumpDest);
- }
-
- getEmitter()->emitEnableGC();
- }
-
- // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
- // jump target using bbJumpDest - that is already used to point
- // to the finally block. So just skip past the BBJ_ALWAYS unless the
- // block is RETLESS.
- if (!(block->bbFlags & BBF_RETLESS_CALL))
- {
- assert(block->isBBCallAlwaysPair());
-
- lblk = block;
- block = block->bbNext;
- }
- break;
-
- case BBJ_EHCATCHRET:
- // For long address (default): `adrp + add` will be emitted.
- // For short address (proven later): `adr` will be emitted.
- getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, block->bbJumpDest, REG_INTRET);
-
- __fallthrough;
-
- case BBJ_EHFINALLYRET:
- case BBJ_EHFILTERRET:
- genReserveFuncletEpilog(block);
- break;
-
- case BBJ_NONE:
- case BBJ_COND:
- case BBJ_SWITCH:
- break;
-
- default:
- noway_assert(!"Unexpected bbJumpKind");
- break;
+ inst_JMP(EJ_jmp, block->bbNext->bbJumpDest);
}
-#ifdef DEBUG
- compiler->compCurBB = 0;
-#endif
-
- } //------------------ END-FOR each block of the method -------------------
-
- /* Nothing is live at this point */
- genUpdateLife(VarSetOps::MakeEmpty(compiler));
-
- /* Finalize the spill tracking logic */
-
- regSet.rsSpillEnd();
-
- /* Finalize the temp tracking logic */
-
- compiler->tmpEnd();
+ getEmitter()->emitEnableGC();
+ }
-#ifdef DEBUG
- if (compiler->verbose)
+ // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
+ // jump target using bbJumpDest - that is already used to point
+ // to the finally block. So just skip past the BBJ_ALWAYS unless the
+ // block is RETLESS.
+ if (!(block->bbFlags & BBF_RETLESS_CALL))
{
- printf("\n# ");
- printf("compCycleEstimate = %6d, compSizeEstimate = %5d ", compiler->compCycleEstimate,
- compiler->compSizeEstimate);
- printf("%s\n", compiler->info.compFullName);
+ assert(block->isBBCallAlwaysPair());
+
+ lblk = block;
+ block = block->bbNext;
}
-#endif
+ return block;
}
-// return the child that has the same reg as the dst (if any)
-// other child returned (out param) in 'other'
-// TODO-Cleanup: move to CodeGenCommon.cpp
-GenTree* sameRegAsDst(GenTree* tree, GenTree*& other /*out*/)
+void CodeGen::genEHCatchRet(BasicBlock* block)
{
- if (tree->gtRegNum == REG_NA)
- {
- other = nullptr;
- return NULL;
- }
-
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- if (op1->gtRegNum == tree->gtRegNum)
- {
- other = op2;
- return op1;
- }
- if (op2->gtRegNum == tree->gtRegNum)
- {
- other = op1;
- return op2;
- }
- else
- {
- other = nullptr;
- return NULL;
- }
+ // For long address (default): `adrp + add` will be emitted.
+ // For short address (proven later): `adr` will be emitted.
+ getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, block->bbJumpDest, REG_INTRET);
}
// move an immediate value into an integer register
@@ -3397,12 +2695,13 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
break;
case GT_LIST:
+ case GT_FIELD_LIST:
case GT_ARGPLACE:
// Nothing to do
break;
case GT_PUTARG_STK:
- genPutArgStk(treeNode);
+ genPutArgStk(treeNode->AsPutArgStk());
break;
case GT_PUTARG_REG:
@@ -3432,7 +2731,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_LOCKADD:
case GT_XCHG:
case GT_XADD:
- genLockedInstructions(treeNode);
+ genLockedInstructions(treeNode->AsOp());
break;
case GT_MEMORYBARRIER:
@@ -3597,7 +2896,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
{
#ifdef DEBUG
char message[256];
- sprintf(message, "Unimplemented node type %s\n", GenTree::NodeName(treeNode->OperGet()));
+ _snprintf_s(message, _countof(message), _TRUNCATE, "Unimplemented node type %s\n",
+ GenTree::NodeName(treeNode->OperGet()));
#endif
assert(!"Unknown node in codegen");
}
@@ -3998,9 +3298,11 @@ BAILOUT:
if (endLabel != nullptr)
genDefineTempLabel(endLabel);
- // Write the lvaShadowSPfirst stack frame slot
- noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM);
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, targetReg, compiler->lvaLocAllocSPvar, 0);
+ // Write the lvaLocAllocSPvar stack frame slot
+ if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM)
+ {
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, targetReg, compiler->lvaLocAllocSPvar, 0);
+ }
#if STACK_PROBES
if (compiler->opts.compNeedStackProbes)
@@ -4034,6 +3336,10 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
unsigned size = initBlkNode->Size();
GenTreePtr dstAddr = initBlkNode->Addr();
GenTreePtr initVal = initBlkNode->Data();
+ if (initVal->OperIsInitVal())
+ {
+ initVal = initVal->gtGetOp1();
+ }
assert(!dstAddr->isContained());
assert(!initVal->isContained());
@@ -4043,8 +3349,7 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
emitter *emit = getEmitter();
- genConsumeReg(initVal);
- genConsumeReg(dstAddr);
+ genConsumeOperands(initBlkNode);
// If the initVal was moved, or spilled and reloaded to a different register,
// get the original initVal from below the GT_RELOAD, but only after capturing the valReg,
@@ -4066,27 +3371,25 @@ void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode)
unsigned size = initBlkNode->Size();
GenTreePtr dstAddr = initBlkNode->Addr();
GenTreePtr initVal = initBlkNode->Data();
+ if (initVal->OperIsInitVal())
+ {
+ initVal = initVal->gtGetOp1();
+ }
assert(!dstAddr->isContained());
assert(!initVal->isContained());
assert(initBlkNode->gtRsvdRegs == RBM_ARG_2);
- if (size == 0)
- {
- noway_assert(initBlkNode->gtOper == GT_DYN_BLK);
- genConsumeRegAndCopy(initBlkNode->AsDynBlk()->gtDynamicSize, REG_ARG_2);
- }
- else
- {
// TODO-ARM64-CQ: When initblk loop unrolling is implemented
// put this assert back on.
#if 0
- assert(size >= INITBLK_UNROLL_LIMIT);
-#endif // 0
- genSetRegToIcon(REG_ARG_2, size);
+ if (size != 0)
+ {
+ assert(blockSize >= INITBLK_UNROLL_LIMIT);
}
- genConsumeRegAndCopy(initVal, REG_ARG_1);
- genConsumeRegAndCopy(dstAddr, REG_ARG_0);
+#endif // 0
+
+ genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
}
@@ -4238,29 +3541,38 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
// str tempReg, [R14, #8]
void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
{
- // Make sure we got the arguments of the cpobj operation in the right registers
- GenTreePtr dstAddr = cpObjNode->Addr();
- GenTreePtr source = cpObjNode->Data();
- noway_assert(source->gtOper == GT_IND);
- GenTreePtr srcAddr = source->gtGetOp1();
+ GenTreePtr dstAddr = cpObjNode->Addr();
+ GenTreePtr source = cpObjNode->Data();
+ var_types srcAddrType = TYP_BYREF;
+ bool sourceIsLocal = false;
+
+ assert(source->isContained());
+ if (source->gtOper == GT_IND)
+ {
+ GenTree* srcAddr = source->gtGetOp1();
+ assert(!srcAddr->isContained());
+ srcAddrType = srcAddr->TypeGet();
+ }
+ else
+ {
+ noway_assert(source->IsLocal());
+ sourceIsLocal = true;
+ }
bool dstOnStack = dstAddr->OperIsLocalAddr();
#ifdef DEBUG
assert(!dstAddr->isContained());
- assert(!srcAddr->isContained());
// This GenTree node has data about GC pointers, this means we're dealing
// with CpObj.
assert(cpObjNode->gtGcPtrCount > 0);
#endif // DEBUG
- // Consume these registers.
+ // Consume the operands and get them into the right registers.
// They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
- genConsumeRegAndCopy(srcAddr, REG_WRITE_BARRIER_SRC_BYREF);
- gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_SRC_BYREF, srcAddr->TypeGet());
-
- genConsumeRegAndCopy(dstAddr, REG_WRITE_BARRIER_DST_BYREF);
+ genConsumeBlockOp(cpObjNode, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_SRC_BYREF, REG_NA);
+ gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_SRC_BYREF, srcAddrType);
gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet());
// Temp register used to perform the sequence of loads and stores.
@@ -4332,31 +3644,17 @@ void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode)
// Make sure we got the arguments of the cpblk operation in the right registers
unsigned blockSize = cpBlkNode->Size();
GenTreePtr dstAddr = cpBlkNode->Addr();
- GenTreePtr source = cpBlkNode->Data();
- noway_assert(source->gtOper == GT_IND);
- GenTreePtr srcAddr = source->gtGetOp1();
-
assert(!dstAddr->isContained());
- assert(!srcAddr->isContained());
- assert(cpBlkNode->gtRsvdRegs == RBM_ARG_2);
- if (blockSize != 0)
- {
+ genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
+
#if 0
// Enable this when we support cpblk loop unrolling.
-
- assert(blockSize->gtIntCon.gtIconVal >= CPBLK_UNROLL_LIMIT);
-
-#endif // 0
- genSetRegToIcon(REG_ARG_2, blockSize);
- }
- else
+ if (blockSize != 0)
{
- noway_assert(cpBlkNode->gtOper == GT_DYN_BLK);
- genConsumeRegAndCopy(cpBlkNode->AsDynBlk()->gtDynamicSize, REG_ARG_2);
+ assert(blockSize->gtIntCon.gtIconVal >= CPBLK_UNROLL_LIMIT);
}
- genConsumeRegAndCopy(srcAddr, REG_ARG_1);
- genConsumeRegAndCopy(dstAddr, REG_ARG_0);
+#endif // 0
genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
}
@@ -4421,7 +3719,7 @@ void CodeGen::genJumpTable(GenTree* treeNode)
// generate code for the locked operations:
// GT_LOCKADD, GT_XCHG, GT_XADD
-void CodeGen::genLockedInstructions(GenTree* treeNode)
+void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
{
#if 0
GenTree* data = treeNode->gtOp.gtOp2;
@@ -4839,154 +4137,6 @@ void CodeGen::genCodeForShift(GenTreePtr tree)
genProduceReg(tree);
}
-// TODO-Cleanup: move to CodeGenCommon.cpp
-void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
-{
- regNumber dstReg = tree->gtRegNum;
-
- GenTree* unspillTree = tree;
- if (tree->gtOper == GT_RELOAD)
- {
- unspillTree = tree->gtOp.gtOp1;
- }
-
- if (unspillTree->gtFlags & GTF_SPILLED)
- {
- if (genIsRegCandidateLocal(unspillTree))
- {
- // Reset spilled flag, since we are going to load a local variable from its home location.
- unspillTree->gtFlags &= ~GTF_SPILLED;
-
- GenTreeLclVarCommon* lcl = unspillTree->AsLclVarCommon();
- LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
-
- var_types targetType = unspillTree->gtType;
- instruction ins = ins_Load(targetType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum));
- emitAttr attr = emitTypeSize(targetType);
- emitter* emit = getEmitter();
-
- // Fixes Issue #3326
- attr = emit->emitInsAdjustLoadStoreAttr(ins, attr);
-
- // Load local variable from its home location.
- inst_RV_TT(ins, dstReg, unspillTree, 0, attr);
-
- unspillTree->SetInReg();
-
- // TODO-Review: We would like to call:
- // genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(tree));
- // instead of the following code, but this ends up hitting this assert:
- // assert((regSet.rsMaskVars & regMask) == 0);
- // due to issues with LSRA resolution moves.
- // So, just force it for now. This probably indicates a condition that creates a GC hole!
- //
- // Extra note: I think we really want to call something like gcInfo.gcUpdateForRegVarMove,
- // because the variable is not really going live or dead, but that method is somewhat poorly
- // factored because it, in turn, updates rsMaskVars which is part of RegSet not GCInfo.
- // This code exists in other CodeGen*.cpp files.
-
- // Don't update the variable's location if we are just re-spilling it again.
-
- if ((unspillTree->gtFlags & GTF_SPILL) == 0)
- {
- genUpdateVarReg(varDsc, tree);
-#ifdef DEBUG
- if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
- {
- JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->gtLclNum);
- }
-#endif // DEBUG
- VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\t\t\t\t\t\t\tV%02u in reg ", lcl->gtLclNum);
- varDsc->PrintVarReg();
- printf(" is becoming live ");
- compiler->printTreeID(unspillTree);
- printf("\n");
- }
-#endif // DEBUG
-
- regSet.AddMaskVars(genGetRegMask(varDsc));
- }
-
- gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
- }
- else if (unspillTree->IsMultiRegCall())
- {
- GenTreeCall* call = unspillTree->AsCall();
- ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = pRetTypeDesc->GetReturnRegCount();
- GenTreeCopyOrReload* reloadTree = nullptr;
- if (tree->OperGet() == GT_RELOAD)
- {
- reloadTree = tree->AsCopyOrReload();
- }
-
- // In case of multi-reg call node, GTF_SPILLED flag on it indicates that
- // one or more of its result regs are spilled. Call node needs to be
- // queried to know which specific result regs to be unspilled.
- for (unsigned i = 0; i < regCount; ++i)
- {
- unsigned flags = call->GetRegSpillFlagByIdx(i);
- if ((flags & GTF_SPILLED) != 0)
- {
- var_types dstType = pRetTypeDesc->GetReturnRegType(i);
- regNumber unspillTreeReg = call->GetRegNumByIdx(i);
-
- if (reloadTree != nullptr)
- {
- dstReg = reloadTree->GetRegNumByIdx(i);
- if (dstReg == REG_NA)
- {
- dstReg = unspillTreeReg;
- }
- }
- else
- {
- dstReg = unspillTreeReg;
- }
-
- TempDsc* t = regSet.rsUnspillInPlace(call, unspillTreeReg, i);
- getEmitter()->emitIns_R_S(ins_Load(dstType), emitActualTypeSize(dstType), dstReg, t->tdTempNum(),
- 0);
- compiler->tmpRlsTemp(t);
- gcInfo.gcMarkRegPtrVal(dstReg, dstType);
- }
- }
-
- unspillTree->gtFlags &= ~GTF_SPILLED;
- unspillTree->SetInReg();
- }
- else
- {
- TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum);
- getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitActualTypeSize(unspillTree->TypeGet()), dstReg,
- t->tdTempNum(), 0);
- compiler->tmpRlsTemp(t);
-
- unspillTree->gtFlags &= ~GTF_SPILLED;
- unspillTree->SetInReg();
- gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
- }
- }
-}
-
-// Do Liveness update for a subnodes that is being consumed by codegen
-// including the logic for reload in case is needed and also takes care
-// of locating the value on the desired register.
-void CodeGen::genConsumeRegAndCopy(GenTree* tree, regNumber needReg)
-{
- regNumber treeReg = genConsumeReg(tree);
- if (treeReg != needReg)
- {
- var_types targetType = tree->TypeGet();
- inst_RV_RV(ins_Copy(targetType), needReg, treeReg, targetType);
- }
-}
-
void CodeGen::genRegCopy(GenTree* treeNode)
{
assert(treeNode->OperGet() == GT_COPY);
@@ -5049,261 +4199,6 @@ void CodeGen::genRegCopy(GenTree* treeNode)
genProduceReg(treeNode);
}
-// Do liveness update for a subnode that is being consumed by codegen.
-// TODO-Cleanup: move to CodeGenCommon.cpp
-regNumber CodeGen::genConsumeReg(GenTree* tree)
-{
- if (tree->OperGet() == GT_COPY)
- {
- genRegCopy(tree);
- }
- // Handle the case where we have a lclVar that needs to be copied before use (i.e. because it
- // interferes with one of the other sources (or the target, if it's a "delayed use" register)).
- // TODO-Cleanup: This is a special copyReg case in LSRA - consider eliminating these and
- // always using GT_COPY to make the lclVar location explicit.
- // Note that we have to do this before calling genUpdateLife because otherwise if we spill it
- // the lvRegNum will be set to REG_STK and we will lose track of what register currently holds
- // the lclVar (normally when a lclVar is spilled it is then used from its former register
- // location, which matches the gtRegNum on the node).
- // (Note that it doesn't matter if we call this before or after genUnspillRegIfNeeded
- // because if it's on the stack it will always get reloaded into tree->gtRegNum).
- if (genIsRegCandidateLocal(tree))
- {
- GenTreeLclVarCommon* lcl = tree->AsLclVarCommon();
- LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
- if ((varDsc->lvRegNum != REG_STK) && (varDsc->lvRegNum != tree->gtRegNum))
- {
- inst_RV_RV(ins_Copy(tree->TypeGet()), tree->gtRegNum, varDsc->lvRegNum);
- }
- }
-
- genUnspillRegIfNeeded(tree);
-
- // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar
- genUpdateLife(tree);
- assert(tree->gtRegNum != REG_NA);
-
- // there are three cases where consuming a reg means clearing the bit in the live mask
- // 1. it was not produced by a local
- // 2. it was produced by a local that is going dead
- // 3. it was produced by a local that does not live in that reg (like one allocated on the stack)
-
- if (genIsRegCandidateLocal(tree))
- {
- GenTreeLclVarCommon* lcl = tree->AsLclVarCommon();
- LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
- assert(varDsc->lvLRACandidate);
-
- if ((tree->gtFlags & GTF_VAR_DEATH) != 0)
- {
- gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->lvRegNum));
- }
- else if (varDsc->lvRegNum == REG_STK)
- {
- // We have loaded this into a register only temporarily
- gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
- }
- }
- else
- {
- gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
- }
-
- return tree->gtRegNum;
-}
-
-// Do liveness update for an address tree: one of GT_LEA, GT_LCL_VAR, or GT_CNS_INT (for call indirect).
-// TODO-Cleanup: move to CodeGenCommon.cpp
-void CodeGen::genConsumeAddress(GenTree* addr)
-{
- if (addr->OperGet() == GT_LEA)
- {
- genConsumeAddrMode(addr->AsAddrMode());
- }
- else if (!addr->isContained())
- {
- genConsumeReg(addr);
- }
-}
-
-// do liveness update for a subnode that is being consumed by codegen
-// TODO-Cleanup: move to CodeGenCommon.cpp
-void CodeGen::genConsumeAddrMode(GenTreeAddrMode* addr)
-{
- if (addr->Base())
- genConsumeReg(addr->Base());
- if (addr->Index())
- genConsumeReg(addr->Index());
-}
-
-// TODO-Cleanup: move to CodeGenCommon.cpp
-void CodeGen::genConsumeRegs(GenTree* tree)
-{
- if (tree->isContained())
- {
- if (tree->isIndir())
- {
- genConsumeAddress(tree->AsIndir()->Addr());
- }
- else if (tree->OperGet() == GT_AND)
- {
- // This is the special contained GT_AND that we created in Lowering::LowerCmp()
- // Now we need to consume the operands of the GT_AND node.
- genConsumeOperands(tree->AsOp());
- }
- else
- {
- assert(tree->OperIsLeaf());
- }
- }
- else
- {
- genConsumeReg(tree);
- }
-}
-
-//------------------------------------------------------------------------
-// genConsumeOperands: Do liveness update for the operands of a unary or binary tree
-//
-// Arguments:
-// tree - the GenTreeOp whose operands will have their liveness updated.
-//
-// Return Value:
-// None.
-//
-// Notes:
-// Note that this logic is localized here because we must do the liveness update in
-// the correct execution order. This is important because we may have two operands
-// that involve the same lclVar, and if one is marked "lastUse" we must handle it
-// after the first.
-// TODO-Cleanup: move to CodeGenCommon.cpp
-
-void CodeGen::genConsumeOperands(GenTreeOp* tree)
-{
- GenTree* firstOp = tree->gtOp1;
- GenTree* secondOp = tree->gtOp2;
- if ((tree->gtFlags & GTF_REVERSE_OPS) != 0)
- {
- assert(secondOp != nullptr);
- firstOp = secondOp;
- secondOp = tree->gtOp1;
- }
- if (firstOp != nullptr)
- {
- genConsumeRegs(firstOp);
- }
- if (secondOp != nullptr)
- {
- genConsumeRegs(secondOp);
- }
-}
-
-// do liveness update for register produced by the current node in codegen
-// TODO-Cleanup: move to CodeGenCommon.cpp
-void CodeGen::genProduceReg(GenTree* tree)
-{
- if (tree->gtFlags & GTF_SPILL)
- {
- if (genIsRegCandidateLocal(tree))
- {
- // Store local variable to its home location.
- tree->gtFlags &= ~GTF_REG_VAL;
- inst_TT_RV(ins_Store(tree->gtType, compiler->isSIMDTypeLocalAligned(tree->gtLclVarCommon.gtLclNum)), tree,
- tree->gtRegNum);
- }
- else
- {
- tree->SetInReg();
- regSet.rsSpillTree(tree->gtRegNum, tree);
- tree->gtFlags |= GTF_SPILLED;
- tree->gtFlags &= ~GTF_SPILL;
- gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
- return;
- }
- }
-
- genUpdateLife(tree);
-
- // If we've produced a register, mark it as a pointer, as needed.
- if (tree->gtHasReg())
- {
- // We only mark the register in the following cases:
- // 1. It is not a register candidate local. In this case, we're producing a
- // register from a local, but the local is not a register candidate. Thus,
- // we must be loading it as a temp register, and any "last use" flag on
- // the register wouldn't be relevant.
- // 2. The register candidate local is going dead. There's no point to mark
- // the register as live, with a GC pointer, if the variable is dead.
- if (!genIsRegCandidateLocal(tree) || ((tree->gtFlags & GTF_VAR_DEATH) == 0))
- {
- gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet());
- }
- }
- tree->SetInReg();
-}
-
-// transfer gc/byref status of src reg to dst reg
-// TODO-Cleanup: move to CodeGenCommon.cpp
-void CodeGen::genTransferRegGCState(regNumber dst, regNumber src)
-{
- regMaskTP srcMask = genRegMask(src);
- regMaskTP dstMask = genRegMask(dst);
-
- if (gcInfo.gcRegGCrefSetCur & srcMask)
- {
- gcInfo.gcMarkRegSetGCref(dstMask);
- }
- else if (gcInfo.gcRegByrefSetCur & srcMask)
- {
- gcInfo.gcMarkRegSetByref(dstMask);
- }
- else
- {
- gcInfo.gcMarkRegSetNpt(dstMask);
- }
-}
-
-// generates an ip-relative call or indirect call via reg ('call reg')
-// pass in 'addr' for a relative call or 'base' for a indirect register call
-// methHnd - optional, only used for pretty printing
-// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
-// TODO-Cleanup: move to CodeGenCommon.cpp
-void CodeGen::genEmitCall(int callType,
- CORINFO_METHOD_HANDLE methHnd,
- INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) void* addr,
- emitAttr retSize,
- emitAttr secondRetSize,
- IL_OFFSETX ilOffset,
- regNumber base,
- bool isJump,
- bool isNoGC)
-{
-
- getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, 0,
- retSize, secondRetSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, ilOffset, base, REG_NA, 0, 0, isJump,
- emitter::emitNoGChelper(compiler->eeGetHelperNum(methHnd)));
-}
-
-// generates an indirect call via addressing mode (call []) given an indir node
-// methHnd - optional, only used for pretty printing
-// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
-// TODO-Cleanup: move to CodeGenCommon.cpp
-void CodeGen::genEmitCall(int callType,
- CORINFO_METHOD_HANDLE methHnd,
- INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) GenTreeIndir* indir,
- emitAttr retSize,
- emitAttr secondRetSize,
- IL_OFFSETX ilOffset)
-{
- genConsumeAddress(indir->Addr());
-
- getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) nullptr, 0,
- retSize, secondRetSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, ilOffset, indir->Base() ? indir->Base()->gtRegNum : REG_NA,
- indir->Index() ? indir->Index()->gtRegNum : REG_NA, indir->Scale(), indir->Offset());
-}
-
// Produce code for a GT_CALL node
void CodeGen::genCallInstruction(GenTreePtr node)
{
@@ -5321,7 +4216,7 @@ void CodeGen::genCallInstruction(GenTreePtr node)
// Consume all the arg regs
for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
{
- assert(list->IsList());
+ assert(list->OperIsList());
GenTreePtr argNode = list->Current();
@@ -5332,7 +4227,7 @@ void CodeGen::genCallInstruction(GenTreePtr node)
continue;
// Deal with multi register passed struct args.
- if (argNode->OperGet() == GT_LIST)
+ if (argNode->OperGet() == GT_FIELD_LIST)
{
GenTreeArgList* argListPtr = argNode->AsArgList();
unsigned iterationNum = 0;
@@ -5457,7 +4352,6 @@ void CodeGen::genCallInstruction(GenTreePtr node)
}
}
-#ifdef DEBUGGING_SUPPORT
// We need to propagate the IL offset information to the call instruction, so we can emit
// an IL to native mapping record for the call, to support managed return value debugging.
// We don't want tail call helper calls that were converted from normal calls to get a record,
@@ -5466,7 +4360,6 @@ void CodeGen::genCallInstruction(GenTreePtr node)
{
(void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
}
-#endif // DEBUGGING_SUPPORT
if (target != nullptr)
{
@@ -6703,7 +5596,7 @@ void CodeGen::genIntrinsic(GenTreePtr treeNode)
// Return value:
// None
//
-void CodeGen::genPutArgStk(GenTreePtr treeNode)
+void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
{
assert(treeNode->OperGet() == GT_PUTARG_STK);
var_types targetType = treeNode->TypeGet();
@@ -6759,7 +5652,7 @@ void CodeGen::genPutArgStk(GenTreePtr treeNode)
varNumOut = compiler->lvaOutgoingArgSpaceVar;
argOffsetMax = compiler->lvaOutgoingArgSpaceSize;
}
- bool isStruct = (targetType == TYP_STRUCT) || (source->OperGet() == GT_LIST);
+ bool isStruct = (targetType == TYP_STRUCT) || (source->OperGet() == GT_FIELD_LIST);
if (!isStruct) // a normal non-Struct argument
{
@@ -6785,24 +5678,24 @@ void CodeGen::genPutArgStk(GenTreePtr treeNode)
{
assert(source->isContained()); // We expect that this node was marked as contained in LowerArm64
- if (source->OperGet() == GT_LIST)
+ if (source->OperGet() == GT_FIELD_LIST)
{
// Deal with the multi register passed struct args.
- GenTreeArgList* argListPtr = source->AsArgList();
+ GenTreeFieldList* fieldListPtr = source->AsFieldList();
- // Evaluate each of the GT_LIST items into their register
+ // Evaluate each of the GT_FIELD_LIST items into their register
// and store their register into the outgoing argument area
- for (; argListPtr != nullptr; argListPtr = argListPtr->Rest())
+ for (; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest())
{
- GenTreePtr nextArgNode = argListPtr->gtOp.gtOp1;
+ GenTreePtr nextArgNode = fieldListPtr->gtOp.gtOp1;
genConsumeReg(nextArgNode);
regNumber reg = nextArgNode->gtRegNum;
var_types type = nextArgNode->TypeGet();
emitAttr attr = emitTypeSize(type);
- // Emit store instructions to store the registers produced by the GT_LIST into the outgoing argument
- // area
+ // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing
+ // argument area
emit->emitIns_S_R(ins_Store(type), attr, reg, varNumOut, argOffsetOut);
argOffsetOut += EA_SIZE_IN_BYTES(attr);
assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
@@ -7159,7 +6052,6 @@ void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize
// Now we can actually use those slot ID's to declare live ranges.
gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
-#if defined(DEBUGGING_SUPPORT)
if (compiler->opts.compDbgEnC)
{
// what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp)
@@ -7183,7 +6075,6 @@ void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize
// frame
gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
}
-#endif
gcInfoEncoder->Build();
@@ -7249,58 +6140,6 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
regTracker.rsTrashRegsForGCInterruptability();
}
-/*****************************************************************************/
-#ifdef DEBUGGING_SUPPORT
-/*****************************************************************************
- * genSetScopeInfo
- *
- * Called for every scope info piece to record by the main genSetScopeInfo()
- */
-
-// TODO-Cleanup: move to CodeGenCommon.cpp
-void CodeGen::genSetScopeInfo(unsigned which,
- UNATIVE_OFFSET startOffs,
- UNATIVE_OFFSET length,
- unsigned varNum,
- unsigned LVnum,
- bool avail,
- Compiler::siVarLoc& varLoc)
-{
- /* We need to do some mapping while reporting back these variables */
-
- unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
- noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
-
- VarName name = nullptr;
-
-#ifdef DEBUG
-
- for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
- {
- if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
- {
- name = compiler->info.compVarScopes[scopeNum].vsdName;
- }
- }
-
- // Hang on to this compiler->info.
-
- TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
-
- tlvi.tlviVarNum = ilVarNum;
- tlvi.tlviLVnum = LVnum;
- tlvi.tlviName = name;
- tlvi.tlviStartPC = startOffs;
- tlvi.tlviLength = length;
- tlvi.tlviAvailable = avail;
- tlvi.tlviVarLoc = varLoc;
-
-#endif // DEBUG
-
- compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
-}
-#endif // DEBUGGING_SUPPORT
-
/*****************************************************************************
* Unit testing of the ARM64 emitter: generate a bunch of instructions into the prolog
* (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late
diff --git a/src/jit/codegenclassic.h b/src/jit/codegenclassic.h
index 81b7b34194..3a88c83915 100644
--- a/src/jit/codegenclassic.h
+++ b/src/jit/codegenclassic.h
@@ -63,10 +63,6 @@ void genPInvokeCallEpilog(LclVarDsc* varDsc, regMaskTP retVal);
regNumber genLclHeap(GenTreePtr size);
-void genSinglePush();
-
-void genSinglePop();
-
void genDyingVars(VARSET_VALARG_TP beforeSet, VARSET_VALARG_TP afterSet);
bool genContainsVarDeath(GenTreePtr from, GenTreePtr to, unsigned varNum);
@@ -287,9 +283,6 @@ void genCodeForJumpTable(GenTreePtr tree);
void genCodeForSwitchTable(GenTreePtr tree);
void genCodeForSwitch(GenTreePtr tree);
-regMaskTP genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs);
-void genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs);
-
size_t genPushArgList(GenTreePtr call);
#ifdef _TARGET_ARM_
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
index 2710447ade..240911523f 100755..100644
--- a/src/jit/codegencommon.cpp
+++ b/src/jit/codegencommon.cpp
@@ -103,6 +103,10 @@ CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler)
u8ToDblBitmask = nullptr;
#endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+#if defined(FEATURE_PUT_STRUCT_ARG_STK) && !defined(_TARGET_X86_)
+ m_stkArgVarNum = BAD_VAR_NUM;
+#endif
+
regTracker.rsTrackInit(compiler, &regSet);
gcInfo.regSet = &regSet;
m_cgEmitter = new (compiler->getAllocator()) emitter();
@@ -163,12 +167,10 @@ CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler)
genFlagsEqualToNone();
#endif // LEGACY_BACKEND
-#ifdef DEBUGGING_SUPPORT
// Initialize the IP-mapping logic.
compiler->genIPmappingList = nullptr;
compiler->genIPmappingLast = nullptr;
compiler->genCallSite2ILOffsetMap = nullptr;
-#endif
/* Assume that we not fully interruptible */
@@ -359,7 +361,7 @@ void CodeGen::genPrepForCompiler()
{
VarSetOps::AddElemD(compiler, compiler->raRegVarsMask, varDsc->lvVarIndex);
}
- else if (compiler->lvaIsGCTracked(varDsc) && (!varDsc->lvIsParam || varDsc->lvIsRegArg))
+ else if (compiler->lvaIsGCTracked(varDsc))
{
VarSetOps::AddElemD(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex);
}
@@ -646,23 +648,32 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
return RBM_RSI | RBM_RDI | RBM_CALLEE_TRASH;
#elif defined(_TARGET_ARM64_)
return RBM_CALLEE_TRASH_NOGC;
+#elif defined(_TARGET_X86_)
+ return RBM_ESI | RBM_EDI | RBM_ECX;
#else
NYI("Model kill set for CORINFO_HELP_ASSIGN_BYREF on target arch");
return RBM_CALLEE_TRASH;
#endif
case CORINFO_HELP_PROF_FCN_ENTER:
-#ifdef _TARGET_AMD64_
+#ifdef RBM_PROFILER_ENTER_TRASH
return RBM_PROFILER_ENTER_TRASH;
#else
- unreached();
+ NYI("Model kill set for CORINFO_HELP_PROF_FCN_ENTER on target arch");
#endif
+
case CORINFO_HELP_PROF_FCN_LEAVE:
- case CORINFO_HELP_PROF_FCN_TAILCALL:
-#ifdef _TARGET_AMD64_
+#ifdef RBM_PROFILER_LEAVE_TRASH
return RBM_PROFILER_LEAVE_TRASH;
#else
- unreached();
+ NYI("Model kill set for CORINFO_HELP_PROF_FCN_LEAVE on target arch");
+#endif
+
+ case CORINFO_HELP_PROF_FCN_TAILCALL:
+#ifdef RBM_PROFILER_TAILCALL_TRASH
+ return RBM_PROFILER_TAILCALL_TRASH;
+#else
+ NYI("Model kill set for CORINFO_HELP_PROF_FCN_TAILCALL on target arch");
#endif
case CORINFO_HELP_STOP_FOR_GC:
@@ -685,26 +696,34 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
regMaskTP Compiler::compNoGCHelperCallKillSet(CorInfoHelpFunc helper)
{
assert(emitter::emitNoGChelper(helper));
-#ifdef _TARGET_AMD64_
+
switch (helper)
{
+#if defined(_TARGET_AMD64_) || defined(_TARGET_X86_)
case CORINFO_HELP_PROF_FCN_ENTER:
return RBM_PROFILER_ENTER_TRASH;
case CORINFO_HELP_PROF_FCN_LEAVE:
- case CORINFO_HELP_PROF_FCN_TAILCALL:
return RBM_PROFILER_LEAVE_TRASH;
+ case CORINFO_HELP_PROF_FCN_TAILCALL:
+ return RBM_PROFILER_TAILCALL_TRASH;
+#endif // defined(_TARGET_AMD64_) || defined(_TARGET_X86_)
+
case CORINFO_HELP_ASSIGN_BYREF:
+#if defined(_TARGET_AMD64_)
// this helper doesn't trash RSI and RDI
return RBM_CALLEE_TRASH_NOGC & ~(RBM_RSI | RBM_RDI);
+#elif defined(_TARGET_X86_)
+ // This helper only trashes ECX.
+ return RBM_ECX;
+#else
+ return RBM_CALLEE_TRASH_NOGC;
+#endif // defined(_TARGET_AMD64_)
default:
return RBM_CALLEE_TRASH_NOGC;
}
-#else
- return RBM_CALLEE_TRASH_NOGC;
-#endif
}
// Update liveness (always var liveness, i.e., compCurLife, and also, if "ForCodeGen" is true, reg liveness, i.e.,
@@ -1004,9 +1023,7 @@ void Compiler::compUpdateLifeVar(GenTreePtr tree, VARSET_TP* pLastUseVars)
#endif // LEGACY_BACKEND
-#ifdef DEBUGGING_SUPPORT
codeGen->siUpdate();
-#endif
}
}
@@ -1172,9 +1189,7 @@ void Compiler::compChangeLife(VARSET_VALARG_TP newLife DEBUGARG(GenTreePtr tree)
#endif // !LEGACY_BACKEND
}
-#ifdef DEBUGGING_SUPPORT
codeGen->siUpdate();
-#endif
}
// Need an explicit instantiation.
@@ -1626,6 +1641,44 @@ void CodeGen::genAdjustSP(ssize_t delta)
inst_RV_IV(INS_add, REG_SPBASE, delta, EA_PTRSIZE);
}
+//------------------------------------------------------------------------
+// genAdjustStackLevel: Adjust the stack level, if required, for a throw helper block
+//
+// Arguments:
+// block - The BasicBlock for which we are about to generate code.
+//
+// Assumptions:
+// Must be called just prior to generating code for 'block'.
+//
+// Notes:
+// This only makes an adjustment if !FEATURE_FIXED_OUT_ARGS, if there is no frame pointer,
+// and if 'block' is a throw helper block with a non-zero stack level.
+
+void CodeGen::genAdjustStackLevel(BasicBlock* block)
+{
+#if !FEATURE_FIXED_OUT_ARGS
+ // Check for inserted throw blocks and adjust genStackLevel.
+
+ if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
+ {
+ noway_assert(block->bbFlags & BBF_JMP_TARGET);
+
+ genStackLevel = compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int);
+
+ if (genStackLevel != 0)
+ {
+#ifdef _TARGET_X86_
+ getEmitter()->emitMarkStackLvl(genStackLevel);
+ inst_RV_IV(INS_add, REG_SPBASE, genStackLevel, EA_PTRSIZE);
+ genStackLevel = 0;
+#else // _TARGET_X86_
+ NYI("Need emitMarkStackLvl()");
+#endif // _TARGET_X86_
+ }
+ }
+#endif // !FEATURE_FIXED_OUT_ARGS
+}
+
#ifdef _TARGET_ARM_
// return size
// alignmentWB is out param
@@ -2539,14 +2592,12 @@ emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind
void CodeGen::genExitCode(BasicBlock* block)
{
-#ifdef DEBUGGING_SUPPORT
/* Just wrote the first instruction of the epilog - inform debugger
Note that this may result in a duplicate IPmapping entry, and
that this is ok */
// For non-optimized debuggable code, there is only one epilog.
genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::EPILOG, true);
-#endif // DEBUGGING_SUPPORT
bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
if (compiler->getNeedsGSSecurityCookie())
@@ -2968,7 +3019,7 @@ void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
#if defined(DEBUG)
,
(compiler->compCodeOpt() != Compiler::SMALL_CODE) &&
- !(compiler->opts.eeFlags & CORJIT_FLG_PREJIT)
+ !compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)
#endif
#ifdef LEGACY_BACKEND
,
@@ -3095,7 +3146,8 @@ void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
We need to relax the assert as our estimation won't include code-gen
stack changes (which we know don't affect fgAddCodeRef()) */
noway_assert(getEmitter()->emitMaxStackDepth <=
- (compiler->fgPtrArgCntMax + compiler->compHndBBtabCount + // Return address for locally-called finallys
+ (compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments.
+ compiler->compHndBBtabCount + // Return address for locally-called finallys
genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc
(compiler->compTailCallUsed ? 4 : 0))); // CORINFO_HELP_TAILCALL args
#endif
@@ -3116,8 +3168,6 @@ void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
compiler->unwindEmit(*codePtr, coldCodePtr);
-#ifdef DEBUGGING_SUPPORT
-
/* Finalize the line # tracking logic after we know the exact block sizes/offsets */
genIPmappingGen();
@@ -3126,8 +3176,6 @@ void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
genSetScopeInfo();
-#endif // DEBUGGING_SUPPORT
-
#ifdef LATE_DISASM
unsigned finalHotCodeSize;
unsigned finalColdCodeSize;
@@ -3272,6 +3320,8 @@ void CodeGen::genReportEH()
EHblkDsc* HBtab;
EHblkDsc* HBtabEnd;
+ bool isCoreRTABI = compiler->IsTargetAbi(CORINFO_CORERT_ABI);
+
unsigned EHCount = compiler->compHndBBtabCount;
#if FEATURE_EH_FUNCLETS
@@ -3279,46 +3329,55 @@ void CodeGen::genReportEH()
// VM.
unsigned duplicateClauseCount = 0;
unsigned enclosingTryIndex;
- for (XTnum = 0; XTnum < compiler->compHndBBtabCount; XTnum++)
+
+ // Duplicate clauses are not used by CoreRT ABI
+ if (!isCoreRTABI)
{
- for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index,
- // ignoring 'mutual protect' trys
- enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
- enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
+ for (XTnum = 0; XTnum < compiler->compHndBBtabCount; XTnum++)
{
- ++duplicateClauseCount;
+ for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index,
+ // ignoring 'mutual protect' trys
+ enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
+ enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
+ {
+ ++duplicateClauseCount;
+ }
}
+ EHCount += duplicateClauseCount;
}
- EHCount += duplicateClauseCount;
#if FEATURE_EH_CALLFINALLY_THUNKS
unsigned clonedFinallyCount = 0;
- // We don't keep track of how many cloned finally there are. So, go through and count.
- // We do a quick pass first through the EH table to see if there are any try/finally
- // clauses. If there aren't, we don't need to look for BBJ_CALLFINALLY.
-
- bool anyFinallys = false;
- for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
- HBtab < HBtabEnd; HBtab++)
+ // Duplicate clauses are not used by CoreRT ABI
+ if (!isCoreRTABI)
{
- if (HBtab->HasFinallyHandler())
+ // We don't keep track of how many cloned finally there are. So, go through and count.
+ // We do a quick pass first through the EH table to see if there are any try/finally
+ // clauses. If there aren't, we don't need to look for BBJ_CALLFINALLY.
+
+ bool anyFinallys = false;
+ for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
+ HBtab < HBtabEnd; HBtab++)
{
- anyFinallys = true;
- break;
+ if (HBtab->HasFinallyHandler())
+ {
+ anyFinallys = true;
+ break;
+ }
}
- }
- if (anyFinallys)
- {
- for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
+ if (anyFinallys)
{
- if (block->bbJumpKind == BBJ_CALLFINALLY)
+ for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
{
- ++clonedFinallyCount;
+ if (block->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ ++clonedFinallyCount;
+ }
}
- }
- EHCount += clonedFinallyCount;
+ EHCount += clonedFinallyCount;
+ }
}
#endif // FEATURE_EH_CALLFINALLY_THUNKS
@@ -3373,6 +3432,23 @@ void CodeGen::genReportEH()
CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(HBtab->ebdHandlerType);
+ if (isCoreRTABI && (XTnum > 0))
+ {
+ // For CoreRT, CORINFO_EH_CLAUSE_SAMETRY flag means that the current clause covers same
+ // try block as the previous one. The runtime cannot reliably infer this information from
+ // native code offsets because of different try blocks can have same offsets. Alternative
+ // solution to this problem would be inserting extra nops to ensure that different try
+ // blocks have different offsets.
+ if (EHblkDsc::ebdIsSameTry(HBtab, HBtab - 1))
+ {
+ // The SAMETRY bit should only be set on catch clauses. This is ensured in IL, where only 'catch' is
+ // allowed to be mutually-protect. E.g., the C# "try {} catch {} catch {} finally {}" actually exists in
+ // IL as "try { try {} catch {} catch {} } finally {}".
+ assert(HBtab->HasCatchHandler());
+ flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | CORINFO_EH_CLAUSE_SAMETRY);
+ }
+ }
+
// Note that we reuse the CORINFO_EH_CLAUSE type, even though the names of
// the fields aren't accurate.
@@ -3578,9 +3654,7 @@ void CodeGen::genReportEH()
CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(encTab->ebdHandlerType);
// Tell the VM this is an extra clause caused by moving funclets out of line.
- // It seems weird this is from the CorExceptionFlag enum in corhdr.h,
- // not the CORINFO_EH_CLAUSE_FLAGS enum in corinfo.h.
- flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | COR_ILEXCEPTION_CLAUSE_DUPLICATED);
+ flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | CORINFO_EH_CLAUSE_DUPLICATE);
// Note that the JIT-EE interface reuses the CORINFO_EH_CLAUSE type, even though the names of
// the fields aren't really accurate. For example, we set "TryLength" to the offset of the
@@ -3617,7 +3691,7 @@ void CodeGen::genReportEH()
} // if (duplicateClauseCount > 0)
#if FEATURE_EH_CALLFINALLY_THUNKS
- if (anyFinallys)
+ if (clonedFinallyCount > 0)
{
unsigned reportedClonedFinallyCount = 0;
for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
@@ -3647,9 +3721,9 @@ void CodeGen::genReportEH()
CORINFO_EH_CLAUSE clause;
clause.ClassToken = 0; // unused
- clause.Flags = (CORINFO_EH_CLAUSE_FLAGS)(CORINFO_EH_CLAUSE_FINALLY | COR_ILEXCEPTION_CLAUSE_DUPLICATED);
- clause.TryOffset = hndBeg;
- clause.TryLength = hndBeg;
+ clause.Flags = (CORINFO_EH_CLAUSE_FLAGS)(CORINFO_EH_CLAUSE_FINALLY | CORINFO_EH_CLAUSE_DUPLICATE);
+ clause.TryOffset = hndBeg;
+ clause.TryLength = hndBeg;
clause.HandlerOffset = hndBeg;
clause.HandlerLength = hndEnd;
@@ -3671,7 +3745,7 @@ void CodeGen::genReportEH()
} // for each block
assert(clonedFinallyCount == reportedClonedFinallyCount);
- } // if (anyFinallys)
+ } // if (clonedFinallyCount > 0)
#endif // FEATURE_EH_CALLFINALLY_THUNKS
#endif // FEATURE_EH_FUNCLETS
@@ -6995,12 +7069,12 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg,
noway_assert(varTypeIsGC(varDsc->TypeGet()) || (varDsc->TypeGet() == TYP_STRUCT) ||
compiler->info.compInitMem || compiler->opts.compDbgCode);
-#ifdef _TARGET_64BIT_
+#ifndef LEGACY_BACKEND
if (!varDsc->lvOnFrame)
{
continue;
}
-#else // !_TARGET_64BIT_
+#else // LEGACY_BACKEND
if (varDsc->lvRegister)
{
if (varDsc->lvOnFrame)
@@ -7016,7 +7090,7 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg,
}
continue;
}
-#endif // !_TARGET_64BIT_
+#endif // LEGACY_BACKEND
if ((varDsc->TypeGet() == TYP_STRUCT) && !compiler->info.compInitMem &&
(varDsc->lvExactSize >= TARGET_POINTER_SIZE))
@@ -7221,11 +7295,31 @@ void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed)
#ifdef PROFILING_SUPPORTED
-/*-----------------------------------------------------------------------------
- *
- * Generate the profiling function enter callback.
- */
-
+//-----------------------------------------------------------------------------------
+// genProfilingEnterCallback: Generate the profiling function enter callback.
+//
+// Arguments:
+// initReg - register to use as scratch register
+// pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is
+// not zero after this call.
+//
+// Return Value:
+// None
+//
+// Notes:
+// The x86 profile enter helper has the following requirements (see ProfileEnterNaked in
+// VM\i386\asmhelpers.asm for details):
+// 1. The calling sequence for calling the helper is:
+// push FunctionIDOrClientID
+// call ProfileEnterHelper
+// 2. The calling function has an EBP frame.
+// 3. EBP points to the saved ESP which is the first thing saved in the function. Thus,
+// the following prolog is assumed:
+// push ESP
+// mov EBP, ESP
+// 4. All registers are preserved.
+// 5. The helper pops the FunctionIDOrClientID argument from the stack.
+//
void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
{
assert(compiler->compGeneratingProlog);
@@ -7236,7 +7330,6 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
return;
}
-#ifndef LEGACY_BACKEND
#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) // No profiling for System V systems yet.
unsigned varNum;
LclVarDsc* varDsc;
@@ -7280,7 +7373,7 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
else
{
// No need to record relocations, if we are generating ELT hooks under the influence
- // of complus_JitELtHookEnabled=1
+ // of COMPlus_JitELTHookEnabled=1
if (compiler->opts.compJitELTHookEnabled)
{
genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
@@ -7346,11 +7439,7 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
*pInitRegZeroed = false;
}
-#else //!_TARGET_AMD64_
- NYI("RyuJIT: Emit Profiler Enter callback");
-#endif
-
-#else // LEGACY_BACKEND
+#elif defined(_TARGET_X86_) || (defined(_TARGET_ARM_) && defined(LEGACY_BACKEND))
unsigned saveStackLvl2 = genStackLevel;
@@ -7423,17 +7512,41 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
/* Restore the stack level */
genStackLevel = saveStackLvl2;
-#endif // LEGACY_BACKEND
-}
-/*****************************************************************************
- *
- * Generates Leave profiler hook.
- * Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node.
- */
+#else // target
+ NYI("Emit Profiler Enter callback");
+#endif // target
+}
+//-----------------------------------------------------------------------------------
+// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback.
+// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node.
+//
+// Arguments:
+// helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL
+//
+// Return Value:
+// None
+//
+// Notes:
+// The x86 profile leave/tailcall helper has the following requirements (see ProfileLeaveNaked and
+// ProfileTailcallNaked in VM\i386\asmhelpers.asm for details):
+// 1. The calling sequence for calling the helper is:
+// push FunctionIDOrClientID
+// call ProfileLeaveHelper or ProfileTailcallHelper
+// 2. The calling function has an EBP frame.
+// 3. EBP points to the saved ESP which is the first thing saved in the function. Thus,
+// the following prolog is assumed:
+// push ESP
+// mov EBP, ESP
+// 4. helper == CORINFO_HELP_PROF_FCN_LEAVE: All registers are preserved.
+// helper == CORINFO_HELP_PROF_FCN_TAILCALL: Only argument registers are preserved.
+// 5. The helper pops the FunctionIDOrClientID argument from the stack.
+//
void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FCN_LEAVE*/)
{
+ assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL));
+
// Only hook if profiler says it's okay.
if (!compiler->compIsProfilerHookNeeded())
{
@@ -7442,12 +7555,11 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FC
compiler->info.compProfilerCallback = true;
- // Need to save on to the stack level, since the callee will pop the argument
+ // Need to save on to the stack level, since the helper call will pop the argument
unsigned saveStackLvl2 = genStackLevel;
-#ifndef LEGACY_BACKEND
-
#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) // No profiling for System V systems yet.
+
// Since the method needs to make a profiler callback, it should have out-going arg space allocated.
noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
@@ -7477,7 +7589,7 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FC
else
{
// Don't record relocations, if we are generating ELT hooks under the influence
- // of complus_JitELtHookEnabled=1
+ // of COMPlus_JitELTHookEnabled=1
if (compiler->opts.compJitELTHookEnabled)
{
genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
@@ -7517,13 +7629,8 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FC
// "mov r8, helper addr; call r8"
genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_ARG_2);
-#else //!_TARGET_AMD64_
- NYI("RyuJIT: Emit Profiler Leave callback");
-#endif // _TARGET_*
-
-#else // LEGACY_BACKEND
+#elif defined(_TARGET_X86_)
-#if defined(_TARGET_X86_)
//
// Push the profilerHandle
//
@@ -7538,7 +7645,7 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FC
}
genSinglePush();
- genEmitHelperCall(CORINFO_HELP_PROF_FCN_LEAVE,
+ genEmitHelperCall(helper,
sizeof(int) * 1, // argSize
EA_UNKNOWN); // retSize
@@ -7549,7 +7656,9 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FC
{
compiler->fgPtrArgCntMax = 1;
}
-#elif defined(_TARGET_ARM_)
+
+#elif defined(LEGACY_BACKEND) && defined(_TARGET_ARM_)
+
//
// Push the profilerHandle
//
@@ -7571,9 +7680,9 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FC
bool r0Trashed;
emitAttr attr = EA_UNKNOWN;
- if (compiler->info.compRetType == TYP_VOID ||
- (!compiler->info.compIsVarArgs && !compiler->opts.compUseSoftFP && (varTypeIsFloating(compiler->info.compRetType) ||
- compiler->IsHfa(compiler->info.compMethodInfo->args.retTypeClass))))
+ if (compiler->info.compRetType == TYP_VOID || (!compiler->info.compIsVarArgs && !compiler->opts.compUseSoftFP &&
+ (varTypeIsFloating(compiler->info.compRetType) ||
+ compiler->IsHfa(compiler->info.compMethodInfo->args.retTypeClass))))
{
r0Trashed = false;
}
@@ -7625,11 +7734,10 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FC
}
regSet.rsUnlockReg(RBM_PROFILER_RET_USED);
-#else // _TARGET_*
- NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking them");
-#endif // _TARGET_*
-#endif // LEGACY_BACKEND
+#else // target
+ NYI("Emit Profiler Leave callback");
+#endif // target
/* Restore the stack level */
genStackLevel = saveStackLvl2;
@@ -7741,7 +7849,7 @@ void CodeGen::genPrologPadForReJit()
assert(compiler->compGeneratingProlog);
#ifdef _TARGET_XARCH_
- if (!(compiler->opts.eeFlags & CORJIT_FLG_PROF_REJIT_NOPS))
+ if (!compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_REJIT_NOPS))
{
return;
}
@@ -8165,11 +8273,9 @@ void CodeGen::genFnProlog()
getEmitter()->emitBegProlog();
compiler->unwindBegProlog();
-#ifdef DEBUGGING_SUPPORT
// Do this so we can put the prolog instruction group ahead of
// other instruction groups
genIPmappingAddToFront((IL_OFFSETX)ICorDebugInfo::PROLOG);
-#endif // DEBUGGING_SUPPORT
#ifdef DEBUG
if (compiler->opts.dspCode)
@@ -8178,13 +8284,11 @@ void CodeGen::genFnProlog()
}
#endif
-#ifdef DEBUGGING_SUPPORT
if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
{
// Create new scopes for the method-parameters for the prolog-block.
psiBegProlog();
}
-#endif
#ifdef DEBUG
@@ -8664,12 +8768,6 @@ void CodeGen::genFnProlog()
// when compInitMem is true the genZeroInitFrame will zero out the shadow SP slots
if (compiler->ehNeedsShadowSPslots() && !compiler->info.compInitMem)
{
- /*
- // size/speed option?
- getEmitter()->emitIns_I_ARR(INS_mov, EA_PTRSIZE, 0,
- REG_EBP, REG_NA, -compiler->lvaShadowSPfirstOffs);
- */
-
// The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
unsigned filterEndOffsetSlotOffs = compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*));
@@ -8707,9 +8805,8 @@ void CodeGen::genFnProlog()
// Initialize any "hidden" slots/locals
- if (compiler->compLocallocUsed)
+ if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM)
{
- noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM);
#ifdef _TARGET_ARM64_
getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_FPBASE, compiler->lvaLocAllocSPvar, 0);
#else
@@ -8870,12 +8967,10 @@ void CodeGen::genFnProlog()
getEmitter()->emitMarkPrologEnd();
}
-#ifdef DEBUGGING_SUPPORT
if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
{
psiEndProlog();
}
-#endif
if (hasGCRef)
{
@@ -8927,7 +9022,7 @@ void CodeGen::genFnProlog()
// LEA EAX, &<VARARGS HANDLE> + EAX
getEmitter()->emitIns_R_ARR(INS_lea, EA_PTRSIZE, REG_EAX, genFramePointerReg(), REG_EAX, offset);
- if (varDsc->lvRegister)
+ if (varDsc->lvIsInReg())
{
if (varDsc->lvRegNum != REG_EAX)
{
@@ -9637,7 +9732,7 @@ void CodeGen::genFnEpilog(BasicBlock* block)
* |Pre-spill regs space | // This is only necessary to keep the PSP slot at the same offset
* | | // in function and funclet
* |-----------------------|
- * | PSP slot |
+ * | PSP slot | // Omitted in CoreRT ABI
* |-----------------------|
* ~ possible 4 byte pad ~
* ~ for alignment ~
@@ -9936,7 +10031,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
* ~ possible 8 byte pad ~
* ~ for alignment ~
* |-----------------------|
- * | PSP slot |
+ * | PSP slot | // Omitted in CoreRT ABI
* |-----------------------|
* | Outgoing arg space | // this only exists if the function makes a call
* |-----------------------| <---- Initial SP
@@ -10007,6 +10102,12 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
// This is the end of the OS-reported prolog for purposes of unwinding
compiler->unwindEndProlog();
+ // If there is no PSPSym (CoreRT ABI), we are done.
+ if (compiler->lvaPSPSym == BAD_VAR_NUM)
+ {
+ return;
+ }
+
getEmitter()->emitIns_R_AR(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_ARG_0, genFuncletInfo.fiPSP_slot_InitialSP_offset);
regTracker.rsTrackRegTrash(REG_FPBASE);
@@ -10100,10 +10201,12 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
unsigned calleeFPRegsSavedSize = genCountBits(compiler->compCalleeFPRegsSavedMask) * XMM_REGSIZE_BYTES;
unsigned FPRegsPad = (calleeFPRegsSavedSize > 0) ? AlignmentPad(totalFrameSize, XMM_REGSIZE_BYTES) : 0;
+ unsigned PSPSymSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? REGSIZE_BYTES : 0;
+
totalFrameSize += FPRegsPad // Padding before pushing entire xmm regs
+ calleeFPRegsSavedSize // pushed callee-saved float regs
// below calculated 'pad' will go here
- + REGSIZE_BYTES // PSPSym
+ + PSPSymSize // PSPSym
+ compiler->lvaOutgoingArgSpaceSize // outgoing arg space
;
@@ -10111,7 +10214,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
genFuncletInfo.fiSpDelta = FPRegsPad // Padding to align SP on XMM_REGSIZE_BYTES boundary
+ calleeFPRegsSavedSize // Callee saved xmm regs
- + pad + REGSIZE_BYTES // PSPSym
+ + pad + PSPSymSize // PSPSym
+ compiler->lvaOutgoingArgSpaceSize // outgoing arg space
;
@@ -10124,12 +10227,14 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta);
printf(" PSP slot Initial SP offset: %d\n", genFuncletInfo.fiPSP_slot_InitialSP_offset);
}
-#endif // DEBUG
- assert(compiler->lvaPSPSym != BAD_VAR_NUM);
- assert(genFuncletInfo.fiPSP_slot_InitialSP_offset ==
- compiler->lvaGetInitialSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and
- // funclet!
+ if (compiler->lvaPSPSym != BAD_VAR_NUM)
+ {
+ assert(genFuncletInfo.fiPSP_slot_InitialSP_offset ==
+ compiler->lvaGetInitialSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and
+ // funclet!
+ }
+#endif // DEBUG
}
#elif defined(_TARGET_ARM64_)
@@ -10249,13 +10354,12 @@ void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed)
{
assert(compiler->compGeneratingProlog);
- if (!compiler->ehNeedsPSPSym())
+ if (compiler->lvaPSPSym == BAD_VAR_NUM)
{
return;
}
- noway_assert(isFramePointerUsed()); // We need an explicit frame pointer
- assert(compiler->lvaPSPSym != BAD_VAR_NUM); // We should have created the PSPSym variable
+ noway_assert(isFramePointerUsed()); // We need an explicit frame pointer
#if defined(_TARGET_ARM_)
@@ -10851,8 +10955,162 @@ unsigned CodeGen::getFirstArgWithStackSlot()
#endif // !LEGACY_BACKEND && (_TARGET_XARCH_ || _TARGET_ARM64_)
-/*****************************************************************************/
-#ifdef DEBUGGING_SUPPORT
+//------------------------------------------------------------------------
+// genSinglePush: Report a change in stack level caused by a single word-sized push instruction
+//
+void CodeGen::genSinglePush()
+{
+ genStackLevel += sizeof(void*);
+}
+
+//------------------------------------------------------------------------
+// genSinglePop: Report a change in stack level caused by a single word-sized pop instruction
+//
+void CodeGen::genSinglePop()
+{
+ genStackLevel -= sizeof(void*);
+}
+
+//------------------------------------------------------------------------
+// genPushRegs: Push the given registers.
+//
+// Arguments:
+// regs - mask or registers to push
+// byrefRegs - OUT arg. Set to byref registers that were pushed.
+// noRefRegs - OUT arg. Set to non-GC ref registers that were pushed.
+//
+// Return Value:
+// Mask of registers pushed.
+//
+// Notes:
+// This function does not check if the register is marked as used, etc.
+//
+regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs)
+{
+ *byrefRegs = RBM_NONE;
+ *noRefRegs = RBM_NONE;
+
+ if (regs == RBM_NONE)
+ {
+ return RBM_NONE;
+ }
+
+#if FEATURE_FIXED_OUT_ARGS
+
+ NYI("Don't call genPushRegs with real regs!");
+ return RBM_NONE;
+
+#else // FEATURE_FIXED_OUT_ARGS
+
+ noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_I_IMPL));
+ noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL));
+
+ regMaskTP pushedRegs = regs;
+
+ for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg))
+ {
+ regMaskTP regBit = regMaskTP(1) << reg;
+
+ if ((regBit & regs) == RBM_NONE)
+ continue;
+
+ var_types type;
+ if (regBit & gcInfo.gcRegGCrefSetCur)
+ {
+ type = TYP_REF;
+ }
+ else if (regBit & gcInfo.gcRegByrefSetCur)
+ {
+ *byrefRegs |= regBit;
+ type = TYP_BYREF;
+ }
+ else if (noRefRegs != NULL)
+ {
+ *noRefRegs |= regBit;
+ type = TYP_I_IMPL;
+ }
+ else
+ {
+ continue;
+ }
+
+ inst_RV(INS_push, reg, type);
+
+ genSinglePush();
+ gcInfo.gcMarkRegSetNpt(regBit);
+
+ regs &= ~regBit;
+ }
+
+ return pushedRegs;
+
+#endif // FEATURE_FIXED_OUT_ARGS
+}
+
+//------------------------------------------------------------------------
+// genPopRegs: Pop the registers that were pushed by genPushRegs().
+//
+// Arguments:
+// regs - mask of registers to pop
+// byrefRegs - The byref registers that were pushed by genPushRegs().
+// noRefRegs - The non-GC ref registers that were pushed by genPushRegs().
+//
+// Return Value:
+// None
+//
+void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs)
+{
+ if (regs == RBM_NONE)
+ {
+ return;
+ }
+
+#if FEATURE_FIXED_OUT_ARGS
+
+ NYI("Don't call genPopRegs with real regs!");
+
+#else // FEATURE_FIXED_OUT_ARGS
+
+ noway_assert((regs & byrefRegs) == byrefRegs);
+ noway_assert((regs & noRefRegs) == noRefRegs);
+ noway_assert((regs & (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur)) == RBM_NONE);
+
+ noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_INT));
+ noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_INT));
+
+ // Walk the registers in the reverse order as genPushRegs()
+ for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg))
+ {
+ regMaskTP regBit = regMaskTP(1) << reg;
+
+ if ((regBit & regs) == RBM_NONE)
+ continue;
+
+ var_types type;
+ if (regBit & byrefRegs)
+ {
+ type = TYP_BYREF;
+ }
+ else if (regBit & noRefRegs)
+ {
+ type = TYP_INT;
+ }
+ else
+ {
+ type = TYP_REF;
+ }
+
+ inst_RV(INS_pop, reg, type);
+ genSinglePop();
+
+ if (type != TYP_INT)
+ gcInfo.gcMarkRegPtrVal(reg, type);
+
+ regs &= ~regBit;
+ }
+
+#endif // FEATURE_FIXED_OUT_ARGS
+}
/*****************************************************************************
* genSetScopeInfo
@@ -11151,6 +11409,103 @@ void CodeGen::genSetScopeInfo()
compiler->eeSetLVdone();
}
+//------------------------------------------------------------------------
+// genSetScopeInfo: Record scope information for debug info
+//
+// Arguments:
+// which
+// startOffs - the starting offset for this scope
+// length - the length of this scope
+// varNum - the lclVar for this scope info
+// LVnum
+// avail
+// varLoc
+//
+// Notes:
+// Called for every scope info piece to record by the main genSetScopeInfo()
+
+void CodeGen::genSetScopeInfo(unsigned which,
+ UNATIVE_OFFSET startOffs,
+ UNATIVE_OFFSET length,
+ unsigned varNum,
+ unsigned LVnum,
+ bool avail,
+ Compiler::siVarLoc& varLoc)
+{
+ // We need to do some mapping while reporting back these variables.
+
+ unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
+ noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
+
+#ifdef _TARGET_X86_
+ // Non-x86 platforms are allowed to access all arguments directly
+ // so we don't need this code.
+
+ // Is this a varargs function?
+
+ if (compiler->info.compIsVarArgs && varNum != compiler->lvaVarargsHandleArg &&
+ varNum < compiler->info.compArgsCount && !compiler->lvaTable[varNum].lvIsRegArg)
+ {
+ noway_assert(varLoc.vlType == Compiler::VLT_STK || varLoc.vlType == Compiler::VLT_STK2);
+
+ // All stack arguments (except the varargs handle) have to be
+ // accessed via the varargs cookie. Discard generated info,
+ // and just find its position relative to the varargs handle
+
+ PREFIX_ASSUME(compiler->lvaVarargsHandleArg < compiler->info.compArgsCount);
+ if (!compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame)
+ {
+ noway_assert(!compiler->opts.compDbgCode);
+ return;
+ }
+
+ // Can't check compiler->lvaTable[varNum].lvOnFrame as we don't set it for
+ // arguments of vararg functions to avoid reporting them to GC.
+ noway_assert(!compiler->lvaTable[varNum].lvRegister);
+ unsigned cookieOffset = compiler->lvaTable[compiler->lvaVarargsHandleArg].lvStkOffs;
+ unsigned varOffset = compiler->lvaTable[varNum].lvStkOffs;
+
+ noway_assert(cookieOffset < varOffset);
+ unsigned offset = varOffset - cookieOffset;
+ unsigned stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * sizeof(void*);
+ noway_assert(offset < stkArgSize);
+ offset = stkArgSize - offset;
+
+ varLoc.vlType = Compiler::VLT_FIXED_VA;
+ varLoc.vlFixedVarArg.vlfvOffset = offset;
+ }
+
+#endif // _TARGET_X86_
+
+ VarName name = nullptr;
+
+#ifdef DEBUG
+
+ for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
+ {
+ if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
+ {
+ name = compiler->info.compVarScopes[scopeNum].vsdName;
+ }
+ }
+
+ // Hang on to this compiler->info.
+
+ TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
+
+ tlvi.tlviVarNum = ilVarNum;
+ tlvi.tlviLVnum = LVnum;
+ tlvi.tlviName = name;
+ tlvi.tlviStartPC = startOffs;
+ tlvi.tlviLength = length;
+ tlvi.tlviAvailable = avail;
+ tlvi.tlviVarLoc = varLoc;
+
+#endif // DEBUG
+
+ compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
+}
+
/*****************************************************************************/
#ifdef LATE_DISASM
#if defined(DEBUG)
@@ -11747,19 +12102,16 @@ void CodeGen::genIPmappingGen()
compiler->eeSetLIdone();
}
-#endif // DEBUGGING_SUPPORT
-
/*============================================================================
*
* These are empty stubs to help the late dis-assembler to compile
- * if DEBUGGING_SUPPORT is not enabled, or the late disassembler is being
- * built into a non-DEBUG build.
+ * if the late disassembler is being built into a non-DEBUG build.
*
*============================================================================
*/
#if defined(LATE_DISASM)
-#if !defined(DEBUGGING_SUPPORT) || !defined(DEBUG)
+#if !defined(DEBUG)
/* virtual */
const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg)
@@ -11774,6 +12126,6 @@ const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsi
}
/*****************************************************************************/
-#endif // !defined(DEBUGGING_SUPPORT) || !defined(DEBUG)
+#endif // !defined(DEBUG)
#endif // defined(LATE_DISASM)
/*****************************************************************************/
diff --git a/src/jit/codegeninterface.h b/src/jit/codegeninterface.h
index e9abbe6b3c..3950673e3a 100644
--- a/src/jit/codegeninterface.h
+++ b/src/jit/codegeninterface.h
@@ -253,12 +253,14 @@ public:
private:
bool m_cgDoubleAlign;
-#else // !DOUBLE_ALIGN
+#else // !DOUBLE_ALIGN
+
public:
bool doubleAlignOrFramePointerUsed() const
{
return isFramePointerUsed();
}
+
#endif // !DOUBLE_ALIGN
#ifdef DEBUG
@@ -424,10 +426,8 @@ public:
private:
bool m_cgFullPtrRegMap;
-#ifdef DEBUGGING_SUPPORT
public:
virtual void siUpdate() = 0;
-#endif // DEBUGGING_SUPPORT
#ifdef LATE_DISASM
public:
diff --git a/src/jit/codegenlegacy.cpp b/src/jit/codegenlegacy.cpp
index ea40eb2aff..667b9d4af8 100644
--- a/src/jit/codegenlegacy.cpp
+++ b/src/jit/codegenlegacy.cpp
@@ -243,18 +243,6 @@ GenTreePtr CodeGen::genGetAddrModeBase(GenTreePtr tree)
return NULL;
}
-// inline
-void CodeGen::genSinglePush()
-{
- genStackLevel += sizeof(void*);
-}
-
-// inline
-void CodeGen::genSinglePop()
-{
- genStackLevel -= sizeof(void*);
-}
-
#if FEATURE_STACK_FP_X87
// inline
void CodeGenInterface::genResetFPstkLevel(unsigned newValue /* = 0 */)
@@ -497,9 +485,10 @@ void CodeGen::genIncRegBy(regNumber reg, ssize_t ival, GenTreePtr tree, var_type
}
}
#endif
-
- insFlags flags = setFlags ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
- inst_RV_IV(INS_add, reg, ival, emitActualTypeSize(dstType), flags);
+ {
+ insFlags flags = setFlags ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+ inst_RV_IV(INS_add, reg, ival, emitActualTypeSize(dstType), flags);
+ }
#ifdef _TARGET_XARCH_
UPDATE_LIVENESS:
@@ -4328,8 +4317,6 @@ emitJumpKind CodeGen::genCondSetFlags(GenTreePtr cond)
addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, smallOk);
}
- // #if defined(DEBUGGING_SUPPORT)
-
/* Special case: comparison of two constants */
// Needed if Importer doesn't call gtFoldExpr()
@@ -4347,8 +4334,6 @@ emitJumpKind CodeGen::genCondSetFlags(GenTreePtr cond)
addrReg1 = genRegMask(op1->gtRegNum);
}
- // #endif
-
/* Compare the operand against the constant */
if (op2->IsIconHandle())
@@ -7087,84 +7072,87 @@ void CodeGen::genCodeForTreeSmpBinArithLogOp(GenTreePtr tree, regMaskTP destReg,
regTracker.rsTrackRegTrash(reg);
- bool op2Released = false;
+ {
+ bool op2Released = false;
- // For overflow instructions, tree->gtType is the accurate type,
- // and gives us the size for the operands.
+ // For overflow instructions, tree->gtType is the accurate type,
+ // and gives us the size for the operands.
- emitAttr opSize = emitTypeSize(treeType);
+ emitAttr opSize = emitTypeSize(treeType);
- /* Compute the new value */
+ /* Compute the new value */
- if (isArith && !op2->InReg() && (op2->OperKind() & GTK_CONST)
+ if (isArith && !op2->InReg() && (op2->OperKind() & GTK_CONST)
#if !CPU_HAS_FP_SUPPORT
- && (treeType == TYP_INT || treeType == TYP_I_IMPL)
+ && (treeType == TYP_INT || treeType == TYP_I_IMPL)
#endif
- )
- {
- ssize_t ival = op2->gtIntCon.gtIconVal;
-
- if (oper == GT_ADD)
- {
- genIncRegBy(reg, ival, tree, treeType, ovfl);
- }
- else if (oper == GT_SUB)
- {
- if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) ||
- (ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN))) // -0x80000000 == 0x80000000.
- // Therefore we can't use -ival.
)
- {
- /* For unsigned overflow, we have to use INS_sub to set
- the flags correctly */
+ {
+ ssize_t ival = op2->gtIntCon.gtIconVal;
- genDecRegBy(reg, ival, tree);
+ if (oper == GT_ADD)
+ {
+ genIncRegBy(reg, ival, tree, treeType, ovfl);
}
- else
+ else if (oper == GT_SUB)
{
- /* Else, we simply add the negative of the value */
+ if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) ||
+ (ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN))) // -0x80000000 == 0x80000000.
+ // Therefore we can't use -ival.
+ )
+ {
+ /* For unsigned overflow, we have to use INS_sub to set
+ the flags correctly */
- genIncRegBy(reg, -ival, tree, treeType, ovfl);
+ genDecRegBy(reg, ival, tree);
+ }
+ else
+ {
+ /* Else, we simply add the negative of the value */
+
+ genIncRegBy(reg, -ival, tree, treeType, ovfl);
+ }
+ }
+ else if (oper == GT_MUL)
+ {
+ genMulRegBy(reg, ival, tree, treeType, ovfl);
}
}
- else if (oper == GT_MUL)
- {
- genMulRegBy(reg, ival, tree, treeType, ovfl);
- }
- }
- else
- {
- // op2 could be a GT_COMMA (i.e. an assignment for a CSE def)
- op2 = op2->gtEffectiveVal();
- if (varTypeIsByte(treeType) && op2->InReg())
+ else
{
- noway_assert(genRegMask(reg) & RBM_BYTE_REGS);
+ // op2 could be a GT_COMMA (i.e. an assignment for a CSE def)
+ op2 = op2->gtEffectiveVal();
+ if (varTypeIsByte(treeType) && op2->InReg())
+ {
+ noway_assert(genRegMask(reg) & RBM_BYTE_REGS);
- regNumber op2reg = op2->gtRegNum;
- regMaskTP op2regMask = genRegMask(op2reg);
+ regNumber op2reg = op2->gtRegNum;
+ regMaskTP op2regMask = genRegMask(op2reg);
- if (!(op2regMask & RBM_BYTE_REGS))
- {
- regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
+ if (!(op2regMask & RBM_BYTE_REGS))
+ {
+ regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
- inst_RV_RV(INS_mov, byteReg, op2reg);
- regTracker.rsTrackRegTrash(byteReg);
+ inst_RV_RV(INS_mov, byteReg, op2reg);
+ regTracker.rsTrackRegTrash(byteReg);
- genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
- op2Released = true;
+ genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+ op2Released = true;
- op2->gtRegNum = byteReg;
+ op2->gtRegNum = byteReg;
+ }
}
- }
- inst_RV_TT(ins, reg, op2, 0, opSize, flags);
- }
+ inst_RV_TT(ins, reg, op2, 0, opSize, flags);
+ }
- /* Free up anything that was tied up by the operand */
-
- if (!op2Released)
- genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+ /* Free up anything that was tied up by the operand */
+ if (!op2Released)
+ {
+ genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+ }
+ }
/* The result will be where the first operand is sitting */
/* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */
@@ -9721,7 +9709,7 @@ void CodeGen::genCodeForTreeSmpOp(GenTreePtr tree, regMaskTP destReg, regMaskTP
switch (oper)
{
case GT_ASG:
- if (tree->OperIsBlkOp())
+ if (tree->OperIsBlkOp() && op1->gtOper != GT_LCL_VAR)
{
genCodeForBlkOp(tree, destReg);
}
@@ -10184,6 +10172,9 @@ void CodeGen::genCodeForTreeSmpOp(GenTreePtr tree, regMaskTP destReg, regMaskTP
if (op1 == NULL)
return;
#endif
+ __fallthrough;
+
+ case GT_INIT_VAL:
/* Generate the operand into some register */
@@ -11293,10 +11284,8 @@ void CodeGen::genCodeForTreeSmpOpAsg(GenTreePtr tree)
bool volat = false; // Is this a volatile store
regMaskTP regGC;
instruction ins;
-#ifdef DEBUGGING_SUPPORT
- unsigned lclVarNum = compiler->lvaCount;
- unsigned lclILoffs = DUMMY_INIT(0);
-#endif
+ unsigned lclVarNum = compiler->lvaCount;
+ unsigned lclILoffs = DUMMY_INIT(0);
#ifdef _TARGET_ARM_
if (tree->gtType == TYP_STRUCT)
@@ -11335,7 +11324,6 @@ void CodeGen::genCodeForTreeSmpOpAsg(GenTreePtr tree)
noway_assert(varNum < compiler->lvaCount);
varDsc = compiler->lvaTable + varNum;
-#ifdef DEBUGGING_SUPPORT
/* For non-debuggable code, every definition of a lcl-var has
* to be checked to see if we need to open a new scope for it.
* Remember the local var info to call siCheckVarScope
@@ -11346,7 +11334,6 @@ void CodeGen::genCodeForTreeSmpOpAsg(GenTreePtr tree)
lclVarNum = varNum;
lclILoffs = op1->gtLclVar.gtLclILoffs;
}
-#endif
/* Check against dead store ? (with min opts we may have dead stores) */
@@ -11999,13 +11986,11 @@ void CodeGen::genCodeForTreeSmpOpAsg(GenTreePtr tree)
genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, REG_NA, ovfl);
LExit:
-#ifdef DEBUGGING_SUPPORT
/* For non-debuggable code, every definition of a lcl-var has
* to be checked to see if we need to open a new scope for it.
*/
if (lclVarNum < compiler->lvaCount)
siCheckVarScope(lclVarNum, lclILoffs);
-#endif
}
#ifdef _PREFAST_
#pragma warning(pop)
@@ -12436,14 +12421,12 @@ void CodeGen::genCodeForBBlist()
regSet.rsSpillBeg();
-#ifdef DEBUGGING_SUPPORT
/* Initialize the line# tracking logic */
if (compiler->opts.compScopeInfo)
{
siInit();
}
-#endif
#ifdef _TARGET_X86_
if (compiler->compTailCallUsed)
@@ -12774,27 +12757,7 @@ void CodeGen::genCodeForBBlist()
genResetFPstkLevel();
#endif // FEATURE_STACK_FP_X87
-#if !FEATURE_FIXED_OUT_ARGS
- /* Check for inserted throw blocks and adjust genStackLevel */
-
- if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
- {
- noway_assert(block->bbFlags & BBF_JMP_TARGET);
-
- genStackLevel = compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int);
-
- if (genStackLevel)
- {
-#ifdef _TARGET_X86_
- getEmitter()->emitMarkStackLvl(genStackLevel);
- inst_RV_IV(INS_add, REG_SPBASE, genStackLevel, EA_PTRSIZE);
- genStackLevel = 0;
-#else // _TARGET_X86_
- NYI("Need emitMarkStackLvl()");
-#endif // _TARGET_X86_
- }
- }
-#endif // !FEATURE_FIXED_OUT_ARGS
+ genAdjustStackLevel(block);
savedStkLvl = genStackLevel;
@@ -12802,7 +12765,6 @@ void CodeGen::genCodeForBBlist()
compiler->compCurBB = block;
-#ifdef DEBUGGING_SUPPORT
siBeginBlock(block);
// BBF_INTERNAL blocks don't correspond to any single IL instruction.
@@ -12810,7 +12772,6 @@ void CodeGen::genCodeForBBlist()
genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true);
bool firstMapping = true;
-#endif // DEBUGGING_SUPPORT
/*---------------------------------------------------------------------
*
@@ -12830,8 +12791,6 @@ void CodeGen::genCodeForBBlist()
{
noway_assert(stmt->gtOper == GT_STMT);
-#if defined(DEBUGGING_SUPPORT)
-
/* Do we have a new IL-offset ? */
if (stmt->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
@@ -12841,8 +12800,6 @@ void CodeGen::genCodeForBBlist()
firstMapping = false;
}
-#endif // DEBUGGING_SUPPORT
-
#ifdef DEBUG
if (stmt->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
{
@@ -12945,7 +12902,7 @@ void CodeGen::genCodeForBBlist()
// harmless "inc" instruction (does not interfere with the exception
// object).
- if ((compiler->opts.eeFlags & CORJIT_FLG_BBINSTR) && (stmt == block->bbTreeList) &&
+ if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR) && (stmt == block->bbTreeList) &&
(block->bbCatchTyp && handlerGetsXcptnObj(block->bbCatchTyp)))
{
nonVarPtrRegs &= ~RBM_EXCEPTION_OBJECT;
@@ -12972,14 +12929,10 @@ void CodeGen::genCodeForBBlist()
noway_assert(stmt->gtOper == GT_STMT);
-#ifdef DEBUGGING_SUPPORT
genEnsureCodeEmitted(stmt->gtStmt.gtStmtILoffsx);
-#endif
} //-------- END-FOR each statement-tree of the current block ---------
-#ifdef DEBUGGING_SUPPORT
-
if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
{
siEndBlock(block);
@@ -13005,8 +12958,6 @@ void CodeGen::genCodeForBBlist()
}
}
-#endif // DEBUGGING_SUPPORT
-
genStackLevel -= savedStkLvl;
gcInfo.gcMarkRegSetNpt(gcrefRegs | byrefRegs);
@@ -13449,10 +13400,8 @@ void CodeGen::genCodeForTreeLng(GenTreePtr tree, regMaskTP needReg, regMaskTP av
{
case GT_ASG:
{
-#ifdef DEBUGGING_SUPPORT
unsigned lclVarNum = compiler->lvaCount;
unsigned lclVarILoffs = DUMMY_INIT(0);
-#endif
/* Is the target a local ? */
@@ -13467,7 +13416,6 @@ void CodeGen::genCodeForTreeLng(GenTreePtr tree, regMaskTP needReg, regMaskTP av
// No dead stores, (with min opts we may have dead stores)
noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH));
-#ifdef DEBUGGING_SUPPORT
/* For non-debuggable code, every definition of a lcl-var has
* to be checked to see if we need to open a new scope for it.
* Remember the local var info to call siCheckVarScope
@@ -13479,7 +13427,6 @@ void CodeGen::genCodeForTreeLng(GenTreePtr tree, regMaskTP needReg, regMaskTP av
lclVarNum = varNum;
lclVarILoffs = op1->gtLclVar.gtLclILoffs;
}
-#endif
/* Has the variable been assigned to a register (pair) ? */
@@ -13767,13 +13714,11 @@ void CodeGen::genCodeForTreeLng(GenTreePtr tree, regMaskTP needReg, regMaskTP av
genUpdateLife(op1);
genUpdateLife(tree);
-#ifdef DEBUGGING_SUPPORT
/* For non-debuggable code, every definition of a lcl-var has
* to be checked to see if we need to open a new scope for it.
*/
if (lclVarNum < compiler->lvaCount)
siCheckVarScope(lclVarNum, lclVarILoffs);
-#endif
}
return;
@@ -15792,132 +15737,6 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize)
/*****************************************************************************
*
- * Push the given registers.
- * This function does not check if the register is marked as used, etc.
- */
-
-regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs)
-{
- *byrefRegs = RBM_NONE;
- *noRefRegs = RBM_NONE;
-
- // noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this
-
- if (regs == RBM_NONE)
- return RBM_NONE;
-
-#if FEATURE_FIXED_OUT_ARGS
-
- NYI("Don't call genPushRegs with real regs!");
- return RBM_NONE;
-
-#else // FEATURE_FIXED_OUT_ARGS
-
- noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_I_IMPL));
- noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL));
-
- regMaskTP pushedRegs = regs;
-
- for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg))
- {
- regMaskTP regBit = regMaskTP(1) << reg;
-
- if ((regBit & regs) == RBM_NONE)
- continue;
-
- var_types type;
- if (regBit & gcInfo.gcRegGCrefSetCur)
- {
- type = TYP_REF;
- }
- else if (regBit & gcInfo.gcRegByrefSetCur)
- {
- *byrefRegs |= regBit;
- type = TYP_BYREF;
- }
- else if (noRefRegs != NULL)
- {
- *noRefRegs |= regBit;
- type = TYP_I_IMPL;
- }
- else
- {
- continue;
- }
-
- inst_RV(INS_push, reg, type);
-
- genSinglePush();
- gcInfo.gcMarkRegSetNpt(regBit);
-
- regs &= ~regBit;
- }
-
- return pushedRegs;
-
-#endif // FEATURE_FIXED_OUT_ARGS
-}
-
-/*****************************************************************************
- *
- * Pop the registers pushed by genPushRegs()
- */
-
-void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs)
-{
- if (regs == RBM_NONE)
- return;
-
-#if FEATURE_FIXED_OUT_ARGS
-
- NYI("Don't call genPopRegs with real regs!");
-
-#else // FEATURE_FIXED_OUT_ARGS
-
- noway_assert((regs & byrefRegs) == byrefRegs);
- noway_assert((regs & noRefRegs) == noRefRegs);
- // noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this
- noway_assert((regs & (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur)) == RBM_NONE);
-
- noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_INT));
- noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_INT));
-
- // Walk the registers in the reverse order as genPushRegs()
- for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg))
- {
- regMaskTP regBit = regMaskTP(1) << reg;
-
- if ((regBit & regs) == RBM_NONE)
- continue;
-
- var_types type;
- if (regBit & byrefRegs)
- {
- type = TYP_BYREF;
- }
- else if (regBit & noRefRegs)
- {
- type = TYP_INT;
- }
- else
- {
- type = TYP_REF;
- }
-
- inst_RV(INS_pop, reg, type);
- genSinglePop();
-
- if (type != TYP_INT)
- gcInfo.gcMarkRegPtrVal(reg, type);
-
- regs &= ~regBit;
- }
-
-#endif // FEATURE_FIXED_OUT_ARGS
-}
-
-/*****************************************************************************
- *
* Push the given argument list, right to left; returns the total amount of
* stuff pushed.
*/
@@ -18519,12 +18338,10 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
CORINFO_SIG_INFO* sigInfo = nullptr;
-#ifdef DEBUGGING_SUPPORT
if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != NULL)
{
(void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
}
-#endif
/* Make some sanity checks on the call node */
@@ -19600,6 +19417,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
regNumber indCallReg;
case IAT_VALUE:
+ {
//------------------------------------------------------
// Non-virtual direct calls to known addressess
//
@@ -19607,7 +19425,24 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
// it be nice if they all did!
CLANG_FORMAT_COMMENT_ANCHOR;
#ifdef _TARGET_ARM_
- if (!arm_Valid_Imm_For_BL((ssize_t)addr))
+ // We may use direct call for some of recursive calls
+ // as we can safely estimate the distance from the call site to the top of the method
+ const int codeOffset = MAX_PROLOG_SIZE_BYTES + // prolog size
+ getEmitter()->emitCurCodeOffset + // offset of the current IG
+ getEmitter()->emitCurIGsize + // size of the current IG
+ 4; // size of the jump instruction
+ // that we are now emitting
+ if (compiler->gtIsRecursiveCall(call->AsCall()) && codeOffset <= -CALL_DIST_MAX_NEG)
+ {
+ getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
+ args, retSize, gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset,
+ REG_NA, REG_NA, 0, 0, // ireg, xreg, xmul, disp
+ false, // isJump
+ emitter::emitNoGChelper(helperNum));
+ }
+ else if (!arm_Valid_Imm_For_BL((ssize_t)addr))
{
// Load the address into a register and call through a register
indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the
@@ -19634,7 +19469,8 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
false, /* isJump */
emitter::emitNoGChelper(helperNum));
}
- break;
+ }
+ break;
case IAT_PVALUE:
//------------------------------------------------------
@@ -20046,7 +19882,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
#if defined(_TARGET_X86_)
if (call->gtFlags & GTF_CALL_UNMANAGED)
{
- if ((compiler->opts.eeFlags & CORJIT_FLG_PINVOKE_RESTORE_ESP) ||
+ if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PINVOKE_RESTORE_ESP) ||
compiler->compStressCompile(Compiler::STRESS_PINVOKE_RESTORE_ESP, 50))
{
// P/Invoke signature mismatch resilience - restore ESP to pre-call value. We would ideally
@@ -20756,9 +20592,11 @@ DONE:
}
#endif
- /* Write the lvaShadowSPfirst stack frame slot */
- noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM);
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
+ /* Write the lvaLocAllocSPvar stack frame slot */
+ if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM)
+ {
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
+ }
#if STACK_PROBES
// Don't think it is worth it the codegen complexity to embed this
@@ -20783,98 +20621,6 @@ DONE:
return regCnt;
}
-/*****************************************************************************/
-#ifdef DEBUGGING_SUPPORT
-/*****************************************************************************
- * genSetScopeInfo
- *
- * Called for every scope info piece to record by the main genSetScopeInfo()
- */
-
-void CodeGen::genSetScopeInfo(unsigned which,
- UNATIVE_OFFSET startOffs,
- UNATIVE_OFFSET length,
- unsigned varNum,
- unsigned LVnum,
- bool avail,
- Compiler::siVarLoc& varLoc)
-{
- /* We need to do some mapping while reporting back these variables */
-
- unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
- noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
-
-#ifdef _TARGET_X86_
- // Non-x86 platforms are allowed to access all arguments directly
- // so we don't need this code.
-
- // Is this a varargs function?
-
- if (compiler->info.compIsVarArgs && varNum != compiler->lvaVarargsHandleArg &&
- varNum < compiler->info.compArgsCount && !compiler->lvaTable[varNum].lvIsRegArg)
- {
- noway_assert(varLoc.vlType == Compiler::VLT_STK || varLoc.vlType == Compiler::VLT_STK2);
-
- // All stack arguments (except the varargs handle) have to be
- // accessed via the varargs cookie. Discard generated info,
- // and just find its position relative to the varargs handle
-
- PREFIX_ASSUME(compiler->lvaVarargsHandleArg < compiler->info.compArgsCount);
- if (!compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame)
- {
- noway_assert(!compiler->opts.compDbgCode);
- return;
- }
-
- // Can't check compiler->lvaTable[varNum].lvOnFrame as we don't set it for
- // arguments of vararg functions to avoid reporting them to GC.
- noway_assert(!compiler->lvaTable[varNum].lvRegister);
- unsigned cookieOffset = compiler->lvaTable[compiler->lvaVarargsHandleArg].lvStkOffs;
- unsigned varOffset = compiler->lvaTable[varNum].lvStkOffs;
-
- noway_assert(cookieOffset < varOffset);
- unsigned offset = varOffset - cookieOffset;
- unsigned stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * sizeof(void*);
- noway_assert(offset < stkArgSize);
- offset = stkArgSize - offset;
-
- varLoc.vlType = Compiler::VLT_FIXED_VA;
- varLoc.vlFixedVarArg.vlfvOffset = offset;
- }
-
-#endif // _TARGET_X86_
-
- VarName name = NULL;
-
-#ifdef DEBUG
-
- for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
- {
- if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
- {
- name = compiler->info.compVarScopes[scopeNum].vsdName;
- }
- }
-
- // Hang on to this compiler->info.
-
- TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
-
- tlvi.tlviVarNum = ilVarNum;
- tlvi.tlviLVnum = LVnum;
- tlvi.tlviName = name;
- tlvi.tlviStartPC = startOffs;
- tlvi.tlviLength = length;
- tlvi.tlviAvailable = avail;
- tlvi.tlviVarLoc = varLoc;
-
-#endif // DEBUG
-
- compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
-}
-
-#endif // DEBUGGING_SUPPORT
-
/*****************************************************************************
*
* Return non-zero if the given register is free after the given tree is
diff --git a/src/jit/codegenlinear.cpp b/src/jit/codegenlinear.cpp
new file mode 100644
index 0000000000..9713288e08
--- /dev/null
+++ b/src/jit/codegenlinear.cpp
@@ -0,0 +1,1773 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Code Generation Support Methods for Linear Codegen XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator.
+#include "emit.h"
+#include "codegen.h"
+
+//------------------------------------------------------------------------
+// genCodeForBBlist: Generate code for all the blocks in a method
+//
+// Arguments:
+// None
+//
+// Notes:
+// This is the main method for linear codegen. It calls genCodeForTreeNode
+// to generate the code for each node in each BasicBlock, and handles BasicBlock
+// boundaries and branches.
+//
+void CodeGen::genCodeForBBlist()
+{
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ unsigned savedStkLvl;
+
+#ifdef DEBUG
+ genInterruptibleUsed = true;
+
+ // You have to be careful if you create basic blocks from now on
+ compiler->fgSafeBasicBlockCreation = false;
+
+ // This stress mode is not comptible with fully interruptible GC
+ if (genInterruptible && compiler->opts.compStackCheckOnCall)
+ {
+ compiler->opts.compStackCheckOnCall = false;
+ }
+
+ // This stress mode is not comptible with fully interruptible GC
+ if (genInterruptible && compiler->opts.compStackCheckOnRet)
+ {
+ compiler->opts.compStackCheckOnRet = false;
+ }
+#endif // DEBUG
+
+ // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
+ genPrepForEHCodegen();
+
+ assert(!compiler->fgFirstBBScratch ||
+ compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
+
+ /* Initialize the spill tracking logic */
+
+ regSet.rsSpillBeg();
+
+ /* Initialize the line# tracking logic */
+
+ if (compiler->opts.compScopeInfo)
+ {
+ siInit();
+ }
+
+ // The current implementation of switch tables requires the first block to have a label so it
+ // can generate offsets to the switch label targets.
+ // TODO-CQ: remove this when switches have been re-implemented to not use this.
+ if (compiler->fgHasSwitch)
+ {
+ compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
+ }
+
+ genPendingCallLabel = nullptr;
+
+ /* Initialize the pointer tracking code */
+
+ gcInfo.gcRegPtrSetInit();
+ gcInfo.gcVarPtrSetInit();
+
+ /* If any arguments live in registers, mark those regs as such */
+
+ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ {
+ /* Is this variable a parameter assigned to a register? */
+
+ if (!varDsc->lvIsParam || !varDsc->lvRegister)
+ {
+ continue;
+ }
+
+ /* Is the argument live on entry to the method? */
+
+ if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
+ {
+ continue;
+ }
+
+ /* Is this a floating-point argument? */
+
+ if (varDsc->IsFloatRegType())
+ {
+ continue;
+ }
+
+ noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
+
+ /* Mark the register as holding the variable */
+
+ regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
+ }
+
+ unsigned finallyNesting = 0;
+
+ // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
+ // allocation at the start of each basic block.
+ VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
+
+ /*-------------------------------------------------------------------------
+ *
+ * Walk the basic blocks and generate code for each one
+ *
+ */
+
+ BasicBlock* block;
+ BasicBlock* lblk; /* previous block */
+
+ for (lblk = nullptr, block = compiler->fgFirstBB; block != nullptr; lblk = block, block = block->bbNext)
+ {
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\n=============== Generating ");
+ block->dspBlockHeader(compiler, true, true);
+ compiler->fgDispBBLiveness(block);
+ }
+#endif // DEBUG
+
+ // Figure out which registers hold variables on entry to this block
+
+ regSet.ClearMaskVars();
+ gcInfo.gcRegGCrefSetCur = RBM_NONE;
+ gcInfo.gcRegByrefSetCur = RBM_NONE;
+
+ compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(block);
+
+ genUpdateLife(block->bbLiveIn);
+
+ // Even if liveness didn't change, we need to update the registers containing GC references.
+ // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't
+ // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change
+ // here. That would require handling the changes in recordVarLocationsAtStartOfBB().
+
+ regMaskTP newLiveRegSet = RBM_NONE;
+ regMaskTP newRegGCrefSet = RBM_NONE;
+ regMaskTP newRegByrefSet = RBM_NONE;
+#ifdef DEBUG
+ VARSET_TP VARSET_INIT_NOCOPY(removedGCVars, VarSetOps::MakeEmpty(compiler));
+ VARSET_TP VARSET_INIT_NOCOPY(addedGCVars, VarSetOps::MakeEmpty(compiler));
+#endif
+ VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+ if (varDsc->lvIsInReg())
+ {
+ newLiveRegSet |= varDsc->lvRegMask();
+ if (varDsc->lvType == TYP_REF)
+ {
+ newRegGCrefSet |= varDsc->lvRegMask();
+ }
+ else if (varDsc->lvType == TYP_BYREF)
+ {
+ newRegByrefSet |= varDsc->lvRegMask();
+ }
+#ifdef DEBUG
+ if (verbose && VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
+ {
+ VarSetOps::AddElemD(compiler, removedGCVars, varIndex);
+ }
+#endif // DEBUG
+ VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
+ }
+ else if (compiler->lvaIsGCTracked(varDsc))
+ {
+#ifdef DEBUG
+ if (verbose && !VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
+ {
+ VarSetOps::AddElemD(compiler, addedGCVars, varIndex);
+ }
+#endif // DEBUG
+ VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
+ }
+ }
+
+ regSet.rsMaskVars = newLiveRegSet;
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ if (!VarSetOps::IsEmpty(compiler, addedGCVars))
+ {
+ printf("\t\t\t\t\t\t\tAdded GCVars: ");
+ dumpConvertedVarSet(compiler, addedGCVars);
+ printf("\n");
+ }
+ if (!VarSetOps::IsEmpty(compiler, removedGCVars))
+ {
+ printf("\t\t\t\t\t\t\tRemoved GCVars: ");
+ dumpConvertedVarSet(compiler, removedGCVars);
+ printf("\n");
+ }
+ }
+#endif // DEBUG
+
+ gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUGARG(true));
+ gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUGARG(true));
+
+ /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
+ represent the exception object (TYP_REF).
+ We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
+ to the block, it will be the first thing evaluated
+ (thanks to GTF_ORDER_SIDEEFF).
+ */
+
+ if (handlerGetsXcptnObj(block->bbCatchTyp))
+ {
+ for (GenTree* node : LIR::AsRange(block))
+ {
+ if (node->OperGet() == GT_CATCH_ARG)
+ {
+ gcInfo.gcMarkRegSetGCref(RBM_EXCEPTION_OBJECT);
+ break;
+ }
+ }
+ }
+
+ /* Start a new code output block */
+
+ genUpdateCurrentFunclet(block);
+
+#ifdef _TARGET_XARCH_
+ if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
+ {
+ getEmitter()->emitLoopAlign();
+ }
+#endif
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ {
+ printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
+ }
+#endif
+
+ block->bbEmitCookie = nullptr;
+
+ if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL))
+ {
+ /* Mark a label and update the current set of live GC refs */
+
+ block->bbEmitCookie = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur, FALSE);
+ }
+
+ if (block == compiler->fgFirstColdBlock)
+ {
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\nThis is the start of the cold region of the method\n");
+ }
+#endif
+ // We should never have a block that falls through into the Cold section
+ noway_assert(!lblk->bbFallsThrough());
+
+ // We require the block that starts the Cold section to have a label
+ noway_assert(block->bbEmitCookie);
+ getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
+ }
+
+ /* Both stacks are always empty on entry to a basic block */
+
+ genStackLevel = 0;
+ genAdjustStackLevel(block);
+ savedStkLvl = genStackLevel;
+
+ /* Tell everyone which basic block we're working on */
+
+ compiler->compCurBB = block;
+
+ siBeginBlock(block);
+
+ // BBF_INTERNAL blocks don't correspond to any single IL instruction.
+ if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) &&
+ !compiler->fgBBisScratch(block)) // If the block is the distinguished first scratch block, then no need to
+ // emit a NO_MAPPING entry, immediately after the prolog.
+ {
+ genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true);
+ }
+
+ bool firstMapping = true;
+
+#if FEATURE_EH_FUNCLETS
+ if (block->bbFlags & BBF_FUNCLET_BEG)
+ {
+ genReserveFuncletProlog(block);
+ }
+#endif // FEATURE_EH_FUNCLETS
+
+ // Clear compCurStmt and compCurLifeTree.
+ compiler->compCurStmt = nullptr;
+ compiler->compCurLifeTree = nullptr;
+
+ // Traverse the block in linear order, generating code for each node as we
+ // as we encounter it.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ // Set the use-order numbers for each node.
+ {
+ int useNum = 0;
+ for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
+ {
+ assert((node->gtDebugFlags & GTF_DEBUG_NODE_CG_CONSUMED) == 0);
+
+ node->gtUseNum = -1;
+ if (node->isContained() || node->IsCopyOrReload())
+ {
+ continue;
+ }
+
+ for (GenTree* operand : node->Operands())
+ {
+ genNumberOperandUse(operand, useNum);
+ }
+ }
+ }
+#endif // DEBUG
+
+ IL_OFFSETX currentILOffset = BAD_IL_OFFSET;
+ for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
+ {
+ // Do we have a new IL offset?
+ if (node->OperGet() == GT_IL_OFFSET)
+ {
+ genEnsureCodeEmitted(currentILOffset);
+ currentILOffset = node->gtStmt.gtStmtILoffsx;
+ genIPmappingAdd(currentILOffset, firstMapping);
+ firstMapping = false;
+ }
+
+#ifdef DEBUG
+ if (node->OperGet() == GT_IL_OFFSET)
+ {
+ noway_assert(node->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize ||
+ node->gtStmt.gtStmtLastILoffs == BAD_IL_OFFSET);
+
+ if (compiler->opts.dspCode && compiler->opts.dspInstrs &&
+ node->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
+ {
+ while (genCurDispOffset <= node->gtStmt.gtStmtLastILoffs)
+ {
+ genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> ");
+ }
+ }
+ }
+#endif // DEBUG
+
+ genCodeForTreeNode(node);
+ if (node->gtHasReg() && node->gtLsraInfo.isLocalDefUse)
+ {
+ genConsumeReg(node);
+ }
+ } // end for each node in block
+
+#ifdef DEBUG
+ // The following set of register spill checks and GC pointer tracking checks used to be
+ // performed at statement boundaries. Now, with LIR, there are no statements, so they are
+ // performed at the end of each block.
+ // TODO: could these checks be performed more frequently? E.g., at each location where
+ // the register allocator says there are no live non-variable registers. Perhaps this could
+ // be done by (a) keeping a running count of live non-variable registers by using
+ // gtLsraInfo.srcCount and gtLsraInfo.dstCount to decrement and increment the count, respectively,
+ // and running the checks when the count is zero. Or, (b) use the map maintained by LSRA
+ // (operandToLocationInfoMap) to mark a node somehow when, after the execution of that node,
+ // there will be no live non-variable registers.
+
+ regSet.rsSpillChk();
+
+ /* Make sure we didn't bungle pointer register tracking */
+
+ regMaskTP ptrRegs = gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur;
+ regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
+
+ // If return is a GC-type, clear it. Note that if a common
+ // epilog is generated (genReturnBB) it has a void return
+ // even though we might return a ref. We can't use the compRetType
+ // as the determiner because something we are tracking as a byref
+ // might be used as a return value of a int function (which is legal)
+ GenTree* blockLastNode = block->lastNode();
+ if ((blockLastNode != nullptr) && (blockLastNode->gtOper == GT_RETURN) &&
+ (varTypeIsGC(compiler->info.compRetType) ||
+ (blockLastNode->gtOp.gtOp1 != nullptr && varTypeIsGC(blockLastNode->gtOp.gtOp1->TypeGet()))))
+ {
+ nonVarPtrRegs &= ~RBM_INTRET;
+ }
+
+ if (nonVarPtrRegs)
+ {
+ printf("Regset after BB%02u gcr=", block->bbNum);
+ printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
+ compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
+ printf(", byr=");
+ printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
+ compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
+ printf(", regVars=");
+ printRegMaskInt(regSet.rsMaskVars);
+ compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
+ printf("\n");
+ }
+
+ noway_assert(nonVarPtrRegs == RBM_NONE);
+#endif // DEBUG
+
+#if defined(DEBUG)
+ if (block->bbNext == nullptr)
+ {
+// Unit testing of the emitter: generate a bunch of instructions into the last block
+// (it's as good as any, but better than the prolog, which can only be a single instruction
+// group) then use COMPlus_JitLateDisasm=* to see if the late disassembler
+// thinks the instructions are the same as we do.
+#if defined(_TARGET_AMD64_) && defined(LATE_DISASM)
+ genAmd64EmitterUnitTests();
+#elif defined(_TARGET_ARM64_)
+ genArm64EmitterUnitTests();
+#endif // _TARGET_ARM64_
+ }
+#endif // defined(DEBUG)
+
+ // It is possible to reach the end of the block without generating code for the current IL offset.
+ // For example, if the following IR ends the current block, no code will have been generated for
+ // offset 21:
+ //
+ // ( 0, 0) [000040] ------------ il_offset void IL offset: 21
+ //
+ // N001 ( 0, 0) [000039] ------------ nop void
+ //
+ // This can lead to problems when debugging the generated code. To prevent these issues, make sure
+ // we've generated code for the last IL offset we saw in the block.
+ genEnsureCodeEmitted(currentILOffset);
+
+ if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
+ {
+ siEndBlock(block);
+
+ /* Is this the last block, and are there any open scopes left ? */
+
+ bool isLastBlockProcessed = (block->bbNext == nullptr);
+ if (block->isBBCallAlwaysPair())
+ {
+ isLastBlockProcessed = (block->bbNext->bbNext == nullptr);
+ }
+
+ if (isLastBlockProcessed && siOpenScopeList.scNext)
+ {
+ /* This assert no longer holds, because we may insert a throw
+ block to demarcate the end of a try or finally region when they
+ are at the end of the method. It would be nice if we could fix
+ our code so that this throw block will no longer be necessary. */
+
+ // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
+
+ siCloseAllOpenScopes();
+ }
+ }
+
+ genStackLevel -= savedStkLvl;
+
+#ifdef DEBUG
+ // compCurLife should be equal to the liveOut set, except that we don't keep
+ // it up to date for vars that are not register candidates
+ // (it would be nice to have a xor set function)
+
+ VARSET_TP VARSET_INIT_NOCOPY(extraLiveVars, VarSetOps::Diff(compiler, block->bbLiveOut, compiler->compCurLife));
+ VarSetOps::UnionD(compiler, extraLiveVars, VarSetOps::Diff(compiler, compiler->compCurLife, block->bbLiveOut));
+ VARSET_ITER_INIT(compiler, extraLiveVarIter, extraLiveVars, extraLiveVarIndex);
+ while (extraLiveVarIter.NextElem(compiler, &extraLiveVarIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[extraLiveVarIndex];
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+ assert(!varDsc->lvIsRegCandidate());
+ }
+#endif
+
+ /* Both stacks should always be empty on exit from a basic block */
+ noway_assert(genStackLevel == 0);
+
+#ifdef _TARGET_AMD64_
+ // On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several
+ // situations, to support proper exception handling semantics. This is mostly to ensure that when the stack
+ // walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region.
+ // The document "X64 and ARM ABIs.docx" has more details. The situations:
+ // 1. If the call instruction is in a different EH region as the instruction that follows it.
+ // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might
+ // be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters
+ // here.)
+ // We handle case #1 here, and case #2 in the emitter.
+ if (getEmitter()->emitIsLastInsCall())
+ {
+ // Ok, the last instruction generated is a call instruction. Do any of the other conditions hold?
+ // Note: we may be generating a few too many NOPs for the case of call preceding an epilog. Technically,
+ // if the next block is a BBJ_RETURN, an epilog will be generated, but there may be some instructions
+ // generated before the OS epilog starts, such as a GS cookie check.
+ if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
+ {
+ // We only need the NOP if we're not going to generate any more code as part of the block end.
+
+ switch (block->bbJumpKind)
+ {
+ case BBJ_ALWAYS:
+ case BBJ_THROW:
+ case BBJ_CALLFINALLY:
+ case BBJ_EHCATCHRET:
+ // We're going to generate more code below anyway, so no need for the NOP.
+
+ case BBJ_RETURN:
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ // These are the "epilog follows" case, handled in the emitter.
+
+ break;
+
+ case BBJ_NONE:
+ if (block->bbNext == nullptr)
+ {
+ // Call immediately before the end of the code; we should never get here .
+ instGen(INS_BREAKPOINT); // This should never get executed
+ }
+ else
+ {
+ // We need the NOP
+ instGen(INS_nop);
+ }
+ break;
+
+ case BBJ_COND:
+ case BBJ_SWITCH:
+ // These can't have a call as the last instruction!
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+ }
+ }
+#endif // _TARGET_AMD64_
+
+ /* Do we need to generate a jump or return? */
+
+ switch (block->bbJumpKind)
+ {
+ case BBJ_ALWAYS:
+ inst_JMP(EJ_jmp, block->bbJumpDest);
+ break;
+
+ case BBJ_RETURN:
+ genExitCode(block);
+ break;
+
+ case BBJ_THROW:
+ // If we have a throw at the end of a function or funclet, we need to emit another instruction
+ // afterwards to help the OS unwinder determine the correct context during unwind.
+ // We insert an unexecuted breakpoint instruction in several situations
+ // following a throw instruction:
+ // 1. If the throw is the last instruction of the function or funclet. This helps
+ // the OS unwinder determine the correct context during an unwind from the
+ // thrown exception.
+ // 2. If this is this is the last block of the hot section.
+ // 3. If the subsequent block is a special throw block.
+ // 4. On AMD64, if the next block is in a different EH region.
+ if ((block->bbNext == nullptr) || (block->bbNext->bbFlags & BBF_FUNCLET_BEG) ||
+ !BasicBlock::sameEHRegion(block, block->bbNext) ||
+ (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) ||
+ block->bbNext == compiler->fgFirstColdBlock)
+ {
+ instGen(INS_BREAKPOINT); // This should never get executed
+ }
+
+ break;
+
+ case BBJ_CALLFINALLY:
+ block = genCallFinally(block, lblk);
+ break;
+
+#if FEATURE_EH_FUNCLETS
+
+ case BBJ_EHCATCHRET:
+ genEHCatchRet(block);
+ __fallthrough;
+
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ genReserveFuncletEpilog(block);
+ break;
+
+#else // !FEATURE_EH_FUNCLETS
+
+ case BBJ_EHCATCHRET:
+ noway_assert(!"Unexpected BBJ_EHCATCHRET"); // not used on x86
+
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ genEHFinallyOrFilterRet(block);
+ break;
+
+#endif // !FEATURE_EH_FUNCLETS
+
+ case BBJ_NONE:
+ case BBJ_COND:
+ case BBJ_SWITCH:
+ break;
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+
+#ifdef DEBUG
+ compiler->compCurBB = nullptr;
+#endif
+
+ } //------------------ END-FOR each block of the method -------------------
+
+ /* Nothing is live at this point */
+ genUpdateLife(VarSetOps::MakeEmpty(compiler));
+
+ /* Finalize the spill tracking logic */
+
+ regSet.rsSpillEnd();
+
+ /* Finalize the temp tracking logic */
+
+ compiler->tmpEnd();
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\n# ");
+ printf("compCycleEstimate = %6d, compSizeEstimate = %5d ", compiler->compCycleEstimate,
+ compiler->compSizeEstimate);
+ printf("%s\n", compiler->info.compFullName);
+ }
+#endif
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Register Management XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+//
+
+//------------------------------------------------------------------------
+// genGetAssignedReg: Get the register assigned to the given node
+//
+// Arguments:
+// tree - the lclVar node whose assigned register we want
+//
+// Return Value:
+// The assigned regNumber
+//
+regNumber CodeGenInterface::genGetAssignedReg(GenTreePtr tree)
+{
+ return tree->gtRegNum;
+}
+
+//------------------------------------------------------------------------
+// genSpillVar: Spill a local variable
+//
+// Arguments:
+// tree - the lclVar node for the variable being spilled
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// The lclVar must be a register candidate (lvRegCandidate)
+
+void CodeGen::genSpillVar(GenTreePtr tree)
+{
+ unsigned varNum = tree->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+ assert(varDsc->lvIsRegCandidate());
+
+ // We don't actually need to spill if it is already living in memory
+ bool needsSpill = ((tree->gtFlags & GTF_VAR_DEF) == 0 && varDsc->lvIsInReg());
+ if (needsSpill)
+ {
+ // In order for a lclVar to have been allocated to a register, it must not have been aliasable, and can
+ // therefore be store-normalized (rather than load-normalized). In fact, not performing store normalization
+ // can lead to problems on architectures where a lclVar may be allocated to a register that is not
+ // addressable at the granularity of the lclVar's defined type (e.g. x86).
+ var_types lclTyp = genActualType(varDsc->TypeGet());
+ emitAttr size = emitTypeSize(lclTyp);
+
+ bool restoreRegVar = false;
+ if (tree->gtOper == GT_REG_VAR)
+ {
+ tree->SetOper(GT_LCL_VAR);
+ restoreRegVar = true;
+ }
+
+ // mask off the flag to generate the right spill code, then bring it back
+ tree->gtFlags &= ~GTF_REG_VAL;
+
+ instruction storeIns = ins_Store(tree->TypeGet(), compiler->isSIMDTypeLocalAligned(varNum));
+#if CPU_LONG_USES_REGPAIR
+ if (varTypeIsMultiReg(tree))
+ {
+ assert(varDsc->lvRegNum == genRegPairLo(tree->gtRegPair));
+ assert(varDsc->lvOtherReg == genRegPairHi(tree->gtRegPair));
+ regNumber regLo = genRegPairLo(tree->gtRegPair);
+ regNumber regHi = genRegPairHi(tree->gtRegPair);
+ inst_TT_RV(storeIns, tree, regLo);
+ inst_TT_RV(storeIns, tree, regHi, 4);
+ }
+ else
+#endif
+ {
+ assert(varDsc->lvRegNum == tree->gtRegNum);
+ inst_TT_RV(storeIns, tree, tree->gtRegNum, 0, size);
+ }
+ tree->gtFlags |= GTF_REG_VAL;
+
+ if (restoreRegVar)
+ {
+ tree->SetOper(GT_REG_VAR);
+ }
+
+ genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree));
+ gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask());
+
+ if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex))
+ {
+#ifdef DEBUG
+ if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+ {
+ JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
+ }
+ else
+ {
+ JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
+ }
+#endif
+ VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+ }
+ }
+
+ tree->gtFlags &= ~GTF_SPILL;
+ varDsc->lvRegNum = REG_STK;
+ if (varTypeIsMultiReg(tree))
+ {
+ varDsc->lvOtherReg = REG_STK;
+ }
+}
+
+//------------------------------------------------------------------------
+// genUpdateVarReg: Update the current register location for a lclVar
+//
+// Arguments:
+// varDsc - the LclVarDsc for the lclVar
+// tree - the lclVar node
+//
+// inline
+void CodeGenInterface::genUpdateVarReg(LclVarDsc* varDsc, GenTreePtr tree)
+{
+ assert(tree->OperIsScalarLocal() || (tree->gtOper == GT_COPY));
+ varDsc->lvRegNum = tree->gtRegNum;
+}
+
+//------------------------------------------------------------------------
+// sameRegAsDst: Return the child that has the same reg as the dst (if any)
+//
+// Arguments:
+// tree - the node of interest
+// other - an out parameter to return the other child
+//
+// Notes:
+// If 'tree' has a child with the same assigned register as its target reg,
+// that child will be returned, and 'other' will contain the non-matching child.
+// Otherwise, both other and the return value will be nullptr.
+//
+GenTree* sameRegAsDst(GenTree* tree, GenTree*& other /*out*/)
+{
+ if (tree->gtRegNum == REG_NA)
+ {
+ other = nullptr;
+ return nullptr;
+ }
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ if (op1->gtRegNum == tree->gtRegNum)
+ {
+ other = op2;
+ return op1;
+ }
+ if (op2->gtRegNum == tree->gtRegNum)
+ {
+ other = op1;
+ return op2;
+ }
+ else
+ {
+ other = nullptr;
+ return nullptr;
+ }
+}
+
+//------------------------------------------------------------------------
+// genUnspillRegIfNeeded: Reload the value into a register, if needed
+//
+// Arguments:
+// tree - the node of interest.
+//
+// Notes:
+// In the normal case, the value will be reloaded into the register it
+// was originally computed into. However, if that register is not available,
+// the register allocator will have allocated a different register, and
+// inserted a GT_RELOAD to indicate the register into which it should be
+// reloaded.
+//
+void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
+{
+ regNumber dstReg = tree->gtRegNum;
+ GenTree* unspillTree = tree;
+
+ if (tree->gtOper == GT_RELOAD)
+ {
+ unspillTree = tree->gtOp.gtOp1;
+ }
+
+ if ((unspillTree->gtFlags & GTF_SPILLED) != 0)
+ {
+ if (genIsRegCandidateLocal(unspillTree))
+ {
+ // Reset spilled flag, since we are going to load a local variable from its home location.
+ unspillTree->gtFlags &= ~GTF_SPILLED;
+
+ GenTreeLclVarCommon* lcl = unspillTree->AsLclVarCommon();
+ LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
+
+// TODO-Cleanup: The following code could probably be further merged and cleaned up.
+#ifdef _TARGET_XARCH_
+ // Load local variable from its home location.
+ // In most cases the tree type will indicate the correct type to use for the load.
+ // However, if it is NOT a normalizeOnLoad lclVar (i.e. NOT a small int that always gets
+ // widened when loaded into a register), and its size is not the same as genActualType of
+ // the type of the lclVar, then we need to change the type of the tree node when loading.
+ // This situation happens due to "optimizations" that avoid a cast and
+ // simply retype the node when using long type lclVar as an int.
+ // While loading the int in that case would work for this use of the lclVar, if it is
+ // later used as a long, we will have incorrectly truncated the long.
+ // In the normalizeOnLoad case ins_Load will return an appropriate sign- or zero-
+ // extending load.
+
+ var_types treeType = unspillTree->TypeGet();
+ if (treeType != genActualType(varDsc->lvType) && !varTypeIsGC(treeType) && !varDsc->lvNormalizeOnLoad())
+ {
+ assert(!varTypeIsGC(varDsc));
+ var_types spillType = genActualType(varDsc->lvType);
+ unspillTree->gtType = spillType;
+ inst_RV_TT(ins_Load(spillType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree);
+ unspillTree->gtType = treeType;
+ }
+ else
+ {
+ inst_RV_TT(ins_Load(treeType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree);
+ }
+#elif defined(_TARGET_ARM64_)
+ var_types targetType = unspillTree->gtType;
+ instruction ins = ins_Load(targetType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum));
+ emitAttr attr = emitTypeSize(targetType);
+ emitter* emit = getEmitter();
+
+ // Fixes Issue #3326
+ attr = emit->emitInsAdjustLoadStoreAttr(ins, attr);
+
+ // Load local variable from its home location.
+ inst_RV_TT(ins, dstReg, unspillTree, 0, attr);
+#else
+ NYI("Unspilling not implemented for this target architecture.");
+#endif
+ unspillTree->SetInReg();
+
+ // TODO-Review: We would like to call:
+ // genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(tree));
+ // instead of the following code, but this ends up hitting this assert:
+ // assert((regSet.rsMaskVars & regMask) == 0);
+ // due to issues with LSRA resolution moves.
+ // So, just force it for now. This probably indicates a condition that creates a GC hole!
+ //
+ // Extra note: I think we really want to call something like gcInfo.gcUpdateForRegVarMove,
+ // because the variable is not really going live or dead, but that method is somewhat poorly
+ // factored because it, in turn, updates rsMaskVars which is part of RegSet not GCInfo.
+ // TODO-Cleanup: This code exists in other CodeGen*.cpp files, and should be moved to CodeGenCommon.cpp.
+
+ // Don't update the variable's location if we are just re-spilling it again.
+
+ if ((unspillTree->gtFlags & GTF_SPILL) == 0)
+ {
+ genUpdateVarReg(varDsc, tree);
+#ifdef DEBUG
+ if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+ {
+ JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->gtLclNum);
+ }
+#endif // DEBUG
+ VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tV%02u in reg ", lcl->gtLclNum);
+ varDsc->PrintVarReg();
+ printf(" is becoming live ");
+ compiler->printTreeID(unspillTree);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ regSet.AddMaskVars(genGetRegMask(varDsc));
+ }
+
+ gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
+ }
+ else if (unspillTree->IsMultiRegCall())
+ {
+ GenTreeCall* call = unspillTree->AsCall();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ unsigned regCount = retTypeDesc->GetReturnRegCount();
+ GenTreeCopyOrReload* reloadTree = nullptr;
+ if (tree->OperGet() == GT_RELOAD)
+ {
+ reloadTree = tree->AsCopyOrReload();
+ }
+
+ // In case of multi-reg call node, GTF_SPILLED flag on it indicates that
+ // one or more of its result regs are spilled. Call node needs to be
+ // queried to know which specific result regs to be unspilled.
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ unsigned flags = call->GetRegSpillFlagByIdx(i);
+ if ((flags & GTF_SPILLED) != 0)
+ {
+ var_types dstType = retTypeDesc->GetReturnRegType(i);
+ regNumber unspillTreeReg = call->GetRegNumByIdx(i);
+
+ if (reloadTree != nullptr)
+ {
+ dstReg = reloadTree->GetRegNumByIdx(i);
+ if (dstReg == REG_NA)
+ {
+ dstReg = unspillTreeReg;
+ }
+ }
+ else
+ {
+ dstReg = unspillTreeReg;
+ }
+
+ TempDsc* t = regSet.rsUnspillInPlace(call, unspillTreeReg, i);
+ getEmitter()->emitIns_R_S(ins_Load(dstType), emitActualTypeSize(dstType), dstReg, t->tdTempNum(),
+ 0);
+ compiler->tmpRlsTemp(t);
+ gcInfo.gcMarkRegPtrVal(dstReg, dstType);
+ }
+ }
+
+ unspillTree->gtFlags &= ~GTF_SPILLED;
+ unspillTree->SetInReg();
+ }
+ else
+ {
+ TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum);
+ getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitActualTypeSize(unspillTree->TypeGet()), dstReg,
+ t->tdTempNum(), 0);
+ compiler->tmpRlsTemp(t);
+
+ unspillTree->gtFlags &= ~GTF_SPILLED;
+ unspillTree->SetInReg();
+ gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// genCopyRegIfNeeded: Copy the given node into the specified register
+//
+// Arguments:
+// node - The node that has been evaluated (consumed).
+// needReg - The register in which its value is needed.
+//
+// Notes:
+// This must be a node that has a register.
+//
+void CodeGen::genCopyRegIfNeeded(GenTree* node, regNumber needReg)
+{
+ assert((node->gtRegNum != REG_NA) && (needReg != REG_NA));
+ if (node->gtRegNum != needReg)
+ {
+ inst_RV_RV(INS_mov, needReg, node->gtRegNum, node->TypeGet());
+ }
+}
+
+// Do Liveness update for a subnodes that is being consumed by codegen
+// including the logic for reload in case is needed and also takes care
+// of locating the value on the desired register.
+void CodeGen::genConsumeRegAndCopy(GenTree* node, regNumber needReg)
+{
+ if (needReg == REG_NA)
+ {
+ return;
+ }
+ regNumber treeReg = genConsumeReg(node);
+ genCopyRegIfNeeded(node, needReg);
+}
+
+// Check that registers are consumed in the right order for the current node being generated.
+#ifdef DEBUG
+void CodeGen::genNumberOperandUse(GenTree* const operand, int& useNum) const
+{
+ assert(operand != nullptr);
+ assert(operand->gtUseNum == -1);
+
+ // Ignore argument placeholders.
+ if (operand->OperGet() == GT_ARGPLACE)
+ {
+ return;
+ }
+
+ if (!operand->isContained() && !operand->IsCopyOrReload())
+ {
+ operand->gtUseNum = useNum;
+ useNum++;
+ }
+ else
+ {
+ for (GenTree* operand : operand->Operands())
+ {
+ genNumberOperandUse(operand, useNum);
+ }
+ }
+}
+
+void CodeGen::genCheckConsumeNode(GenTree* const node)
+{
+ assert(node != nullptr);
+
+ if (verbose)
+ {
+ if ((node->gtDebugFlags & GTF_DEBUG_NODE_CG_CONSUMED) != 0)
+ {
+ printf("Node was consumed twice:\n");
+ compiler->gtDispTree(node, nullptr, nullptr, true);
+ }
+ else if ((lastConsumedNode != nullptr) && (node->gtUseNum < lastConsumedNode->gtUseNum))
+ {
+ printf("Nodes were consumed out-of-order:\n");
+ compiler->gtDispTree(lastConsumedNode, nullptr, nullptr, true);
+ compiler->gtDispTree(node, nullptr, nullptr, true);
+ }
+ }
+
+ assert((node->OperGet() == GT_CATCH_ARG) || ((node->gtDebugFlags & GTF_DEBUG_NODE_CG_CONSUMED) == 0));
+ assert((lastConsumedNode == nullptr) || (node->gtUseNum == -1) || (node->gtUseNum > lastConsumedNode->gtUseNum));
+
+ node->gtDebugFlags |= GTF_DEBUG_NODE_CG_CONSUMED;
+ lastConsumedNode = node;
+}
+#endif // DEBUG
+
+//--------------------------------------------------------------------
+// genConsumeReg: Do liveness update for a subnode that is being
+// consumed by codegen.
+//
+// Arguments:
+// tree - GenTree node
+//
+// Return Value:
+// Returns the reg number of tree.
+// In case of multi-reg call node returns the first reg number
+// of the multi-reg return.
+regNumber CodeGen::genConsumeReg(GenTree* tree)
+{
+ if (tree->OperGet() == GT_COPY)
+ {
+ genRegCopy(tree);
+ }
+
+ // Handle the case where we have a lclVar that needs to be copied before use (i.e. because it
+ // interferes with one of the other sources (or the target, if it's a "delayed use" register)).
+ // TODO-Cleanup: This is a special copyReg case in LSRA - consider eliminating these and
+ // always using GT_COPY to make the lclVar location explicit.
+ // Note that we have to do this before calling genUpdateLife because otherwise if we spill it
+ // the lvRegNum will be set to REG_STK and we will lose track of what register currently holds
+ // the lclVar (normally when a lclVar is spilled it is then used from its former register
+ // location, which matches the gtRegNum on the node).
+ // (Note that it doesn't matter if we call this before or after genUnspillRegIfNeeded
+ // because if it's on the stack it will always get reloaded into tree->gtRegNum).
+ if (genIsRegCandidateLocal(tree))
+ {
+ GenTreeLclVarCommon* lcl = tree->AsLclVarCommon();
+ LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
+ if (varDsc->lvRegNum != REG_STK && varDsc->lvRegNum != tree->gtRegNum)
+ {
+ inst_RV_RV(ins_Copy(tree->TypeGet()), tree->gtRegNum, varDsc->lvRegNum);
+ }
+ }
+
+ genUnspillRegIfNeeded(tree);
+
+ // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar
+ genUpdateLife(tree);
+
+ assert(tree->gtHasReg());
+
+ // there are three cases where consuming a reg means clearing the bit in the live mask
+ // 1. it was not produced by a local
+ // 2. it was produced by a local that is going dead
+ // 3. it was produced by a local that does not live in that reg (like one allocated on the stack)
+
+ if (genIsRegCandidateLocal(tree))
+ {
+ GenTreeLclVarCommon* lcl = tree->AsLclVarCommon();
+ LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
+ assert(varDsc->lvLRACandidate);
+
+ if ((tree->gtFlags & GTF_VAR_DEATH) != 0)
+ {
+ gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->lvRegNum));
+ }
+ else if (varDsc->lvRegNum == REG_STK)
+ {
+ // We have loaded this into a register only temporarily
+ gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+ }
+ }
+ else
+ {
+ gcInfo.gcMarkRegSetNpt(tree->gtGetRegMask());
+ }
+
+ genCheckConsumeNode(tree);
+ return tree->gtRegNum;
+}
+
+// Do liveness update for an address tree: one of GT_LEA, GT_LCL_VAR, or GT_CNS_INT (for call indirect).
+void CodeGen::genConsumeAddress(GenTree* addr)
+{
+ if (!addr->isContained())
+ {
+ genConsumeReg(addr);
+ }
+ else if (addr->OperGet() == GT_LEA)
+ {
+ genConsumeAddrMode(addr->AsAddrMode());
+ }
+}
+
+// do liveness update for a subnode that is being consumed by codegen
+void CodeGen::genConsumeAddrMode(GenTreeAddrMode* addr)
+{
+ genConsumeOperands(addr);
+}
+
+void CodeGen::genConsumeRegs(GenTree* tree)
+{
+#if !defined(_TARGET_64BIT_)
+ if (tree->OperGet() == GT_LONG)
+ {
+ genConsumeRegs(tree->gtGetOp1());
+ genConsumeRegs(tree->gtGetOp2());
+ return;
+ }
+#endif // !defined(_TARGET_64BIT_)
+
+ if (tree->isContained())
+ {
+ if (tree->isContainedSpillTemp())
+ {
+ // spill temps are un-tracked and hence no need to update life
+ }
+ else if (tree->isIndir())
+ {
+ genConsumeAddress(tree->AsIndir()->Addr());
+ }
+ else if (tree->OperGet() == GT_AND)
+ {
+ // This is the special contained GT_AND that we created in Lowering::TreeNodeInfoInitCmp()
+ // Now we need to consume the operands of the GT_AND node.
+ genConsumeOperands(tree->AsOp());
+ }
+#ifdef _TARGET_XARCH_
+ else if (tree->OperGet() == GT_LCL_VAR)
+ {
+ // A contained lcl var must be living on stack and marked as reg optional, or not be a
+ // register candidate.
+ unsigned varNum = tree->AsLclVarCommon()->GetLclNum();
+ LclVarDsc* varDsc = compiler->lvaTable + varNum;
+
+ noway_assert(varDsc->lvRegNum == REG_STK);
+ noway_assert(tree->IsRegOptional() || !varDsc->lvLRACandidate);
+
+ // Update the life of the lcl var.
+ genUpdateLife(tree);
+ }
+#endif // _TARGET_XARCH_
+ else if (tree->OperIsInitVal())
+ {
+ genConsumeReg(tree->gtGetOp1());
+ }
+ else
+ {
+#ifdef FEATURE_SIMD
+ // (In)Equality operation that produces bool result, when compared
+ // against Vector zero, marks its Vector Zero operand as contained.
+ assert(tree->OperIsLeaf() || tree->IsIntegralConstVector(0));
+#else
+ assert(tree->OperIsLeaf());
+#endif
+ }
+ }
+ else
+ {
+ genConsumeReg(tree);
+ }
+}
+
+//------------------------------------------------------------------------
+// genConsumeOperands: Do liveness update for the operands of a unary or binary tree
+//
+// Arguments:
+// tree - the GenTreeOp whose operands will have their liveness updated.
+//
+// Return Value:
+// None.
+//
+// Notes:
+// Note that this logic is localized here because we must do the liveness update in
+// the correct execution order. This is important because we may have two operands
+// that involve the same lclVar, and if one is marked "lastUse" we must handle it
+// after the first.
+
+void CodeGen::genConsumeOperands(GenTreeOp* tree)
+{
+ GenTree* firstOp = tree->gtOp1;
+ GenTree* secondOp = tree->gtOp2;
+ if ((tree->gtFlags & GTF_REVERSE_OPS) != 0)
+ {
+ assert(secondOp != nullptr);
+ firstOp = secondOp;
+ secondOp = tree->gtOp1;
+ }
+ if (firstOp != nullptr)
+ {
+ genConsumeRegs(firstOp);
+ }
+ if (secondOp != nullptr)
+ {
+ genConsumeRegs(secondOp);
+ }
+}
+
+#if FEATURE_PUT_STRUCT_ARG_STK
+//------------------------------------------------------------------------
+// genConsumePutStructArgStk: Do liveness update for the operands of a PutArgStk node.
+// Also loads in the right register the addresses of the
+// src/dst for rep mov operation.
+//
+// Arguments:
+// putArgNode - the PUTARG_STK tree.
+// dstReg - the dstReg for the rep move operation.
+// srcReg - the srcReg for the rep move operation.
+// sizeReg - the sizeReg for the rep move operation.
+//
+// Return Value:
+// None.
+//
+// Notes:
+// sizeReg can be REG_NA when this function is used to consume the dstReg and srcReg
+// for copying on the stack a struct with references.
+// The source address/offset is determined from the address on the GT_OBJ node, while
+// the destination address is the address contained in 'm_stkArgVarNum' plus the offset
+// provided in the 'putArgNode'.
+// m_stkArgVarNum must be set to the varnum for the local used for placing the "by-value" args on the stack.
+
+void CodeGen::genConsumePutStructArgStk(GenTreePutArgStk* putArgNode,
+ regNumber dstReg,
+ regNumber srcReg,
+ regNumber sizeReg)
+{
+ assert(varTypeIsStruct(putArgNode));
+
+ // The putArgNode children are always contained. We should not consume any registers.
+ assert(putArgNode->gtGetOp1()->isContained());
+
+ GenTree* dstAddr = putArgNode;
+
+ // Get the source address.
+ GenTree* src = putArgNode->gtGetOp1();
+ assert((src->gtOper == GT_OBJ) || ((src->gtOper == GT_IND && varTypeIsSIMD(src))));
+ GenTree* srcAddr = src->gtGetOp1();
+
+ size_t size = putArgNode->getArgSize();
+
+ assert(dstReg != REG_NA);
+ assert(srcReg != REG_NA);
+
+ // Consume the registers only if they are not contained or set to REG_NA.
+ if (srcAddr->gtRegNum != REG_NA)
+ {
+ genConsumeReg(srcAddr);
+ }
+
+ // If the op1 is already in the dstReg - nothing to do.
+ // Otherwise load the op1 (GT_ADDR) into the dstReg to copy the struct on the stack by value.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_X86_
+ assert(dstReg != REG_SPBASE);
+ inst_RV_RV(INS_mov, dstReg, REG_SPBASE);
+#else // !_TARGET_X86_
+ if (dstAddr->gtRegNum != dstReg)
+ {
+ // Generate LEA instruction to load the stack of the outgoing var + SlotNum offset (or the incoming arg area
+ // for tail calls) in RDI.
+ // Destination is always local (on the stack) - use EA_PTRSIZE.
+ assert(m_stkArgVarNum != BAD_VAR_NUM);
+ getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, dstReg, m_stkArgVarNum, putArgNode->getArgOffset());
+ }
+#endif // !_TARGET_X86_
+
+ if (srcAddr->gtRegNum != srcReg)
+ {
+ if (srcAddr->OperIsLocalAddr())
+ {
+ // The OperLocalAddr is always contained.
+ assert(srcAddr->isContained());
+ GenTreeLclVarCommon* lclNode = srcAddr->AsLclVarCommon();
+
+ // Generate LEA instruction to load the LclVar address in RSI.
+ // Source is known to be on the stack. Use EA_PTRSIZE.
+ unsigned int offset = 0;
+ if (srcAddr->OperGet() == GT_LCL_FLD_ADDR)
+ {
+ offset = srcAddr->AsLclFld()->gtLclOffs;
+ }
+ getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, srcReg, lclNode->gtLclNum, offset);
+ }
+ else
+ {
+ assert(srcAddr->gtRegNum != REG_NA);
+ // Source is not known to be on the stack. Use EA_BYREF.
+ getEmitter()->emitIns_R_R(INS_mov, EA_BYREF, srcReg, srcAddr->gtRegNum);
+ }
+ }
+
+ if (sizeReg != REG_NA)
+ {
+ inst_RV_IV(INS_mov, sizeReg, size, EA_PTRSIZE);
+ }
+}
+#endif // FEATURE_PUT_STRUCT_ARG_STK
+
+//------------------------------------------------------------------------
+// genSetBlockSize: Ensure that the block size is in the given register
+//
+// Arguments:
+// blkNode - The block node
+// sizeReg - The register into which the block's size should go
+//
+
+void CodeGen::genSetBlockSize(GenTreeBlk* blkNode, regNumber sizeReg)
+{
+ if (sizeReg != REG_NA)
+ {
+ unsigned blockSize = blkNode->Size();
+ if (blockSize != 0)
+ {
+ assert((blkNode->gtRsvdRegs & genRegMask(sizeReg)) != 0);
+ genSetRegToIcon(sizeReg, blockSize);
+ }
+ else
+ {
+ noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
+ GenTree* sizeNode = blkNode->AsDynBlk()->gtDynamicSize;
+ if (sizeNode->gtRegNum != sizeReg)
+ {
+ inst_RV_RV(INS_mov, sizeReg, sizeNode->gtRegNum, sizeNode->TypeGet());
+ }
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// genConsumeBlockSrc: Consume the source address register of a block node, if any.
+//
+// Arguments:
+// blkNode - The block node
+
+void CodeGen::genConsumeBlockSrc(GenTreeBlk* blkNode)
+{
+ GenTree* src = blkNode->Data();
+ if (blkNode->OperIsCopyBlkOp())
+ {
+ // For a CopyBlk we need the address of the source.
+ if (src->OperGet() == GT_IND)
+ {
+ src = src->gtOp.gtOp1;
+ }
+ else
+ {
+ // This must be a local.
+ // For this case, there is no source address register, as it is a
+ // stack-based address.
+ assert(src->OperIsLocal());
+ return;
+ }
+ }
+ else
+ {
+ if (src->OperIsInitVal())
+ {
+ src = src->gtGetOp1();
+ }
+ }
+ genConsumeReg(src);
+}
+
+//------------------------------------------------------------------------
+// genSetBlockSrc: Ensure that the block source is in its allocated register.
+//
+// Arguments:
+// blkNode - The block node
+// srcReg - The register in which to set the source (address or init val).
+//
+void CodeGen::genSetBlockSrc(GenTreeBlk* blkNode, regNumber srcReg)
+{
+ GenTree* src = blkNode->Data();
+ if (blkNode->OperIsCopyBlkOp())
+ {
+ // For a CopyBlk we need the address of the source.
+ if (src->OperGet() == GT_IND)
+ {
+ src = src->gtOp.gtOp1;
+ }
+ else
+ {
+ // This must be a local struct.
+ // Load its address into srcReg.
+ inst_RV_TT(INS_lea, srcReg, src, 0, EA_BYREF);
+ return;
+ }
+ }
+ else
+ {
+ if (src->OperIsInitVal())
+ {
+ src = src->gtGetOp1();
+ }
+ }
+ genCopyRegIfNeeded(src, srcReg);
+}
+
+//------------------------------------------------------------------------
+// genConsumeBlockOp: Ensure that the block's operands are enregistered
+// as needed.
+// Arguments:
+// blkNode - The block node
+//
+// Notes:
+// This ensures that the operands are consumed in the proper order to
+// obey liveness modeling.
+
+void CodeGen::genConsumeBlockOp(GenTreeBlk* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg)
+{
+ // We have to consume the registers, and perform any copies, in the actual execution order.
+ // The nominal order is: dst, src, size. However this may have been changed
+ // with reverse flags on the blkNode and the setting of gtEvalSizeFirst in the case of a dynamic
+ // block size.
+ // Note that the register allocator ensures that the registers ON THE NODES will not interfere
+ // with one another if consumed (i.e. reloaded or moved to their ASSIGNED reg) in execution order.
+ // Further, it ensures that they will not interfere with one another if they are then copied
+ // to the REQUIRED register (if a fixed register requirement) in execution order. This requires,
+ // then, that we first consume all the operands, then do any necessary moves.
+
+ GenTree* dstAddr = blkNode->Addr();
+ GenTree* src = nullptr;
+ unsigned blockSize = blkNode->Size();
+ GenTree* size = nullptr;
+ bool evalSizeFirst = true;
+
+ // First, consume all the sources in order
+ if (blkNode->OperGet() == GT_STORE_DYN_BLK)
+ {
+ size = blkNode->AsDynBlk()->gtDynamicSize;
+ if (blkNode->AsDynBlk()->gtEvalSizeFirst)
+ {
+ genConsumeReg(size);
+ }
+ else
+ {
+ evalSizeFirst = false;
+ }
+ }
+ if (blkNode->IsReverseOp())
+ {
+
+ genConsumeBlockSrc(blkNode);
+ genConsumeReg(dstAddr);
+ }
+ else
+ {
+ genConsumeReg(dstAddr);
+ genConsumeBlockSrc(blkNode);
+ }
+ if (!evalSizeFirst)
+ {
+ noway_assert(size != nullptr);
+ genConsumeReg(size);
+ }
+
+ // Next, perform any necessary moves.
+ if (evalSizeFirst)
+ {
+ genSetBlockSize(blkNode, sizeReg);
+ }
+ if (blkNode->IsReverseOp())
+ {
+ genSetBlockSrc(blkNode, srcReg);
+ genCopyRegIfNeeded(dstAddr, dstReg);
+ }
+ else
+ {
+ genCopyRegIfNeeded(dstAddr, dstReg);
+ genSetBlockSrc(blkNode, srcReg);
+ }
+ if (!evalSizeFirst)
+ {
+ genSetBlockSize(blkNode, sizeReg);
+ }
+}
+
+//-------------------------------------------------------------------------
+// genProduceReg: do liveness update for register produced by the current
+// node in codegen.
+//
+// Arguments:
+// tree - Gentree node
+//
+// Return Value:
+// None.
+void CodeGen::genProduceReg(GenTree* tree)
+{
+#ifdef DEBUG
+ assert((tree->gtDebugFlags & GTF_DEBUG_NODE_CG_PRODUCED) == 0);
+ tree->gtDebugFlags |= GTF_DEBUG_NODE_CG_PRODUCED;
+#endif
+
+ if (tree->gtFlags & GTF_SPILL)
+ {
+ // Code for GT_COPY node gets generated as part of consuming regs by its parent.
+ // A GT_COPY node in turn produces reg result and it should never be marked to
+ // spill.
+ //
+ // Similarly GT_RELOAD node gets generated as part of consuming regs by its
+ // parent and should never be marked for spilling.
+ noway_assert(!tree->IsCopyOrReload());
+
+ if (genIsRegCandidateLocal(tree))
+ {
+ // Store local variable to its home location.
+ tree->gtFlags &= ~GTF_REG_VAL;
+ // Ensure that lclVar stores are typed correctly.
+ unsigned varNum = tree->gtLclVarCommon.gtLclNum;
+ assert(!compiler->lvaTable[varNum].lvNormalizeOnStore() ||
+ (tree->TypeGet() == genActualType(compiler->lvaTable[varNum].TypeGet())));
+ inst_TT_RV(ins_Store(tree->gtType, compiler->isSIMDTypeLocalAligned(varNum)), tree, tree->gtRegNum);
+ }
+ else
+ {
+ // In case of multi-reg call node, spill flag on call node
+ // indicates that one or more of its allocated regs need to
+ // be spilled. Call node needs to be further queried to
+ // know which of its result regs needs to be spilled.
+ if (tree->IsMultiRegCall())
+ {
+ GenTreeCall* call = tree->AsCall();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ unsigned regCount = retTypeDesc->GetReturnRegCount();
+
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ unsigned flags = call->GetRegSpillFlagByIdx(i);
+ if ((flags & GTF_SPILL) != 0)
+ {
+ regNumber reg = call->GetRegNumByIdx(i);
+ call->SetInReg();
+ regSet.rsSpillTree(reg, call, i);
+ gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+ }
+ }
+ }
+ else
+ {
+ tree->SetInReg();
+ regSet.rsSpillTree(tree->gtRegNum, tree);
+ gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+ }
+
+ tree->gtFlags |= GTF_SPILLED;
+ tree->gtFlags &= ~GTF_SPILL;
+
+ return;
+ }
+ }
+
+ genUpdateLife(tree);
+
+ // If we've produced a register, mark it as a pointer, as needed.
+ if (tree->gtHasReg())
+ {
+ // We only mark the register in the following cases:
+ // 1. It is not a register candidate local. In this case, we're producing a
+ // register from a local, but the local is not a register candidate. Thus,
+ // we must be loading it as a temp register, and any "last use" flag on
+ // the register wouldn't be relevant.
+ // 2. The register candidate local is going dead. There's no point to mark
+ // the register as live, with a GC pointer, if the variable is dead.
+ if (!genIsRegCandidateLocal(tree) || ((tree->gtFlags & GTF_VAR_DEATH) == 0))
+ {
+ // Multi-reg call node will produce more than one register result.
+ // Mark all the regs produced by call node.
+ if (tree->IsMultiRegCall())
+ {
+ GenTreeCall* call = tree->AsCall();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ unsigned regCount = retTypeDesc->GetReturnRegCount();
+
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ regNumber reg = call->GetRegNumByIdx(i);
+ var_types type = retTypeDesc->GetReturnRegType(i);
+ gcInfo.gcMarkRegPtrVal(reg, type);
+ }
+ }
+ else if (tree->IsCopyOrReloadOfMultiRegCall())
+ {
+ // we should never see reload of multi-reg call here
+ // because GT_RELOAD gets generated in reg consuming path.
+ noway_assert(tree->OperGet() == GT_COPY);
+
+ // A multi-reg GT_COPY node produces those regs to which
+ // copy has taken place.
+ GenTreeCopyOrReload* copy = tree->AsCopyOrReload();
+ GenTreeCall* call = copy->gtGetOp1()->AsCall();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ unsigned regCount = retTypeDesc->GetReturnRegCount();
+
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ var_types type = retTypeDesc->GetReturnRegType(i);
+ regNumber fromReg = call->GetRegNumByIdx(i);
+ regNumber toReg = copy->GetRegNumByIdx(i);
+
+ if (toReg != REG_NA)
+ {
+ gcInfo.gcMarkRegPtrVal(toReg, type);
+ }
+ }
+ }
+ else
+ {
+ gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet());
+ }
+ }
+ }
+ tree->SetInReg();
+}
+
+// transfer gc/byref status of src reg to dst reg
+void CodeGen::genTransferRegGCState(regNumber dst, regNumber src)
+{
+ regMaskTP srcMask = genRegMask(src);
+ regMaskTP dstMask = genRegMask(dst);
+
+ if (gcInfo.gcRegGCrefSetCur & srcMask)
+ {
+ gcInfo.gcMarkRegSetGCref(dstMask);
+ }
+ else if (gcInfo.gcRegByrefSetCur & srcMask)
+ {
+ gcInfo.gcMarkRegSetByref(dstMask);
+ }
+ else
+ {
+ gcInfo.gcMarkRegSetNpt(dstMask);
+ }
+}
+
+// generates an ip-relative call or indirect call via reg ('call reg')
+// pass in 'addr' for a relative call or 'base' for a indirect register call
+// methHnd - optional, only used for pretty printing
+// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
+void CodeGen::genEmitCall(int callType,
+ CORINFO_METHOD_HANDLE methHnd,
+ INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) void* addr X86_ARG(ssize_t argSize),
+ emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+ IL_OFFSETX ilOffset,
+ regNumber base,
+ bool isJump,
+ bool isNoGC)
+{
+#if !defined(_TARGET_X86_)
+ ssize_t argSize = 0;
+#endif // !defined(_TARGET_X86_)
+ getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, argSize,
+ retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset, base, REG_NA, 0, 0, isJump,
+ emitter::emitNoGChelper(compiler->eeGetHelperNum(methHnd)));
+}
+
+// generates an indirect call via addressing mode (call []) given an indir node
+// methHnd - optional, only used for pretty printing
+// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
+void CodeGen::genEmitCall(int callType,
+ CORINFO_METHOD_HANDLE methHnd,
+ INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) GenTreeIndir* indir X86_ARG(ssize_t argSize),
+ emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+ IL_OFFSETX ilOffset)
+{
+#if !defined(_TARGET_X86_)
+ ssize_t argSize = 0;
+#endif // !defined(_TARGET_X86_)
+ genConsumeAddress(indir->Addr());
+
+ getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) nullptr,
+ argSize, retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+ gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset,
+ indir->Base() ? indir->Base()->gtRegNum : REG_NA,
+ indir->Index() ? indir->Index()->gtRegNum : REG_NA, indir->Scale(), indir->Offset());
+}
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h
index fb0d6ea165..406ab779f1 100644
--- a/src/jit/codegenlinear.h
+++ b/src/jit/codegenlinear.h
@@ -16,6 +16,10 @@ void genCodeForTreeNode(GenTreePtr treeNode);
void genCodeForBinary(GenTreePtr treeNode);
+#if defined(_TARGET_X86_)
+void genCodeForLongUMod(GenTreeOp* node);
+#endif // _TARGET_X86_
+
void genCodeForDivMod(GenTreeOp* treeNode);
void genCodeForMulHi(GenTreeOp* treeNode);
@@ -24,6 +28,10 @@ void genLeaInstruction(GenTreeAddrMode* lea);
void genSetRegToCond(regNumber dstReg, GenTreePtr tree);
+#if !defined(_TARGET_64BIT_)
+void genLongToIntCast(GenTreePtr treeNode);
+#endif
+
void genIntToIntCast(GenTreePtr treeNode);
void genFloatToFloatCast(GenTreePtr treeNode);
@@ -36,7 +44,7 @@ void genCkfinite(GenTreePtr treeNode);
void genIntrinsic(GenTreePtr treeNode);
-void genPutArgStk(GenTreePtr treeNode);
+void genPutArgStk(GenTreePutArgStk* treeNode);
unsigned getBaseVarForPutArgStk(GenTreePtr treeNode);
#if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_)
@@ -49,7 +57,6 @@ void genCompareInt(GenTreePtr treeNode);
#if !defined(_TARGET_64BIT_)
void genCompareLong(GenTreePtr treeNode);
-void genJTrueLong(GenTreePtr treeNode);
#endif
#ifdef FEATURE_SIMD
@@ -61,7 +68,8 @@ enum SIMDScalarMoveType
};
instruction getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival = nullptr);
-void genSIMDScalarMove(var_types type, regNumber target, regNumber src, SIMDScalarMoveType moveType);
+void genSIMDScalarMove(
+ var_types targetType, var_types type, regNumber target, regNumber src, SIMDScalarMoveType moveType);
void genSIMDZero(var_types targetType, var_types baseType, regNumber targetReg);
void genSIMDIntrinsicInit(GenTreeSIMD* simdNode);
void genSIMDIntrinsicInitN(GenTreeSIMD* simdNode);
@@ -87,7 +95,10 @@ void genSIMDCheck(GenTree* treeNode);
void genStoreIndTypeSIMD12(GenTree* treeNode);
void genStoreLclFldTypeSIMD12(GenTree* treeNode);
void genLoadIndTypeSIMD12(GenTree* treeNode);
-void genLoadLclFldTypeSIMD12(GenTree* treeNode);
+void genLoadLclTypeSIMD12(GenTree* treeNode);
+#ifdef _TARGET_X86_
+void genPutArgStkSIMD12(GenTree* treeNode);
+#endif // _TARGET_X86_
#endif // FEATURE_SIMD
#if !defined(_TARGET_64BIT_)
@@ -104,6 +115,7 @@ void genUnspillRegIfNeeded(GenTree* tree);
regNumber genConsumeReg(GenTree* tree);
+void genCopyRegIfNeeded(GenTree* tree, regNumber needReg);
void genConsumeRegAndCopy(GenTree* tree, regNumber needReg);
void genConsumeIfReg(GenTreePtr tree)
@@ -122,15 +134,14 @@ void genConsumeAddress(GenTree* addr);
void genConsumeAddrMode(GenTreeAddrMode* mode);
-void genConsumeBlockSize(GenTreeBlk* blkNode, regNumber sizeReg);
-void genConsumeBlockDst(GenTreeBlk* blkNode);
-GenTree* genConsumeBlockSrc(GenTreeBlk* blkNode);
+void genSetBlockSize(GenTreeBlk* blkNode, regNumber sizeReg);
+void genConsumeBlockSrc(GenTreeBlk* blkNode);
+void genSetBlockSrc(GenTreeBlk* blkNode, regNumber srcReg);
void genConsumeBlockOp(GenTreeBlk* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg);
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
-void genConsumePutStructArgStk(
- GenTreePutArgStk* putArgStkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg, unsigned baseVarNum);
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
+void genConsumePutStructArgStk(GenTreePutArgStk* putArgStkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg);
+#endif // FEATURE_PUT_STRUCT_ARG_STK
void genConsumeRegs(GenTree* tree);
@@ -142,6 +153,10 @@ void genSetRegToIcon(regNumber reg, ssize_t val, var_types type = TYP_INT, insFl
void genCodeForShift(GenTreePtr tree);
+#if defined(_TARGET_X86_)
+void genCodeForShiftLong(GenTreePtr tree);
+#endif
+
#ifdef _TARGET_XARCH_
void genCodeForShiftRMW(GenTreeStoreInd* storeInd);
#endif // _TARGET_XARCH_
@@ -154,12 +169,23 @@ void genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode);
void genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode);
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
-void genPutStructArgStk(GenTreePtr treeNode, unsigned baseVarNum);
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
+#ifdef _TARGET_X86_
+bool genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk);
+void genPushReg(var_types type, regNumber srcReg);
+void genPutArgStkFieldList(GenTreePutArgStk* putArgStk);
+#endif // _TARGET_X86_
+
+void genPutStructArgStk(GenTreePutArgStk* treeNode);
-void genStructPutArgRepMovs(GenTreePutArgStk* putArgStkNode, unsigned baseVarNum);
-void genStructPutArgUnroll(GenTreePutArgStk* putArgStkNode, unsigned baseVarNum);
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+int genMove8IfNeeded(unsigned size, regNumber tmpReg, GenTree* srcAddr, unsigned offset);
+int genMove4IfNeeded(unsigned size, regNumber tmpReg, GenTree* srcAddr, unsigned offset);
+int genMove2IfNeeded(unsigned size, regNumber tmpReg, GenTree* srcAddr, unsigned offset);
+int genMove1IfNeeded(unsigned size, regNumber tmpReg, GenTree* srcAddr, unsigned offset);
+void genStructPutArgRepMovs(GenTreePutArgStk* putArgStkNode);
+void genStructPutArgUnroll(GenTreePutArgStk* putArgStkNode);
+void genStoreRegToStackArg(var_types type, regNumber reg, int offset);
+#endif // FEATURE_PUT_STRUCT_ARG_STK
void genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset);
@@ -191,6 +217,14 @@ void genCallInstruction(GenTreePtr call);
void genJmpMethod(GenTreePtr jmp);
+BasicBlock* genCallFinally(BasicBlock* block, BasicBlock* lblk);
+
+#if FEATURE_EH_FUNCLETS
+void genEHCatchRet(BasicBlock* block);
+#else // !FEATURE_EH_FUNCLETS
+void genEHFinallyOrFilterRet(BasicBlock* block);
+#endif // !FEATURE_EH_FUNCLETS
+
void genMultiRegCallStoreToLocal(GenTreePtr treeNode);
// Deals with codegen for muti-register struct returns.
@@ -212,9 +246,19 @@ bool genIsRegCandidateLocal(GenTreePtr tree)
return (varDsc->lvIsRegCandidate());
}
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
+#ifdef _TARGET_X86_
+bool m_pushStkArg;
+#else // !_TARGET_X86_
+unsigned m_stkArgVarNum;
+unsigned m_stkArgOffset;
+#endif // !_TARGET_X86_
+#endif // !FEATURE_PUT_STRUCT_ARG_STK
+
#ifdef DEBUG
GenTree* lastConsumedNode;
-void genCheckConsumeNode(GenTree* treeNode);
+void genNumberOperandUse(GenTree* const operand, int& useNum) const;
+void genCheckConsumeNode(GenTree* const node);
#else // !DEBUG
inline void genCheckConsumeNode(GenTree* treeNode)
{
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index a41c28695b..8e0af48799 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -24,114 +24,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "gcinfo.h"
#include "gcinfoencoder.h"
-// Get the register assigned to the given node
-
-regNumber CodeGenInterface::genGetAssignedReg(GenTreePtr tree)
-{
- return tree->gtRegNum;
-}
-
-//------------------------------------------------------------------------
-// genSpillVar: Spill a local variable
-//
-// Arguments:
-// tree - the lclVar node for the variable being spilled
-//
-// Return Value:
-// None.
-//
-// Assumptions:
-// The lclVar must be a register candidate (lvRegCandidate)
-
-void CodeGen::genSpillVar(GenTreePtr tree)
-{
- unsigned varNum = tree->gtLclVarCommon.gtLclNum;
- LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
-
- assert(varDsc->lvIsRegCandidate());
-
- // We don't actually need to spill if it is already living in memory
- bool needsSpill = ((tree->gtFlags & GTF_VAR_DEF) == 0 && varDsc->lvIsInReg());
- if (needsSpill)
- {
- var_types lclTyp = varDsc->TypeGet();
- if (varDsc->lvNormalizeOnStore())
- {
- lclTyp = genActualType(lclTyp);
- }
- emitAttr size = emitTypeSize(lclTyp);
-
- bool restoreRegVar = false;
- if (tree->gtOper == GT_REG_VAR)
- {
- tree->SetOper(GT_LCL_VAR);
- restoreRegVar = true;
- }
-
- // mask off the flag to generate the right spill code, then bring it back
- tree->gtFlags &= ~GTF_REG_VAL;
-
- instruction storeIns = ins_Store(tree->TypeGet(), compiler->isSIMDTypeLocalAligned(varNum));
-#if CPU_LONG_USES_REGPAIR
- if (varTypeIsMultiReg(tree))
- {
- assert(varDsc->lvRegNum == genRegPairLo(tree->gtRegPair));
- assert(varDsc->lvOtherReg == genRegPairHi(tree->gtRegPair));
- regNumber regLo = genRegPairLo(tree->gtRegPair);
- regNumber regHi = genRegPairHi(tree->gtRegPair);
- inst_TT_RV(storeIns, tree, regLo);
- inst_TT_RV(storeIns, tree, regHi, 4);
- }
- else
-#endif
- {
- assert(varDsc->lvRegNum == tree->gtRegNum);
- inst_TT_RV(storeIns, tree, tree->gtRegNum, 0, size);
- }
- tree->gtFlags |= GTF_REG_VAL;
-
- if (restoreRegVar)
- {
- tree->SetOper(GT_REG_VAR);
- }
-
- genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree));
- gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask());
-
- if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex))
- {
-#ifdef DEBUG
- if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
- {
- JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
- }
- else
- {
- JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
- }
-#endif
- VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
- }
- }
-
- tree->gtFlags &= ~GTF_SPILL;
- varDsc->lvRegNum = REG_STK;
- if (varTypeIsMultiReg(tree))
- {
- varDsc->lvOtherReg = REG_STK;
- }
-}
-
-// inline
-void CodeGenInterface::genUpdateVarReg(LclVarDsc* varDsc, GenTreePtr tree)
-{
- assert(tree->OperIsScalarLocal() || (tree->gtOper == GT_COPY));
- varDsc->lvRegNum = tree->gtRegNum;
-}
-
-/*****************************************************************************/
-/*****************************************************************************/
-
/*****************************************************************************
*
* Generate code that will set the given register to the integer constant.
@@ -231,6 +123,8 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
}
regNumber regGSCheck;
+ regMaskTP regMaskGSCheck = RBM_NONE;
+
if (!pushReg)
{
// Non-tail call: we can use any callee trash register that is not
@@ -251,8 +145,11 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
else
{
#ifdef _TARGET_X86_
- NYI_X86("Tail calls from methods that need GS check");
- regGSCheck = REG_NA;
+ // It doesn't matter which register we pick, since we're going to save and restore it
+ // around the check.
+ // TODO-CQ: Can we optimize the choice of register to avoid doing the push/pop sometimes?
+ regGSCheck = REG_EAX;
+ regMaskGSCheck = RBM_EAX;
#else // !_TARGET_X86_
// Tail calls from methods that need GS check: We need to preserve registers while
// emitting GS cookie check for a tail prefixed call or a jmp. To emit GS cookie
@@ -287,8 +184,13 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
#endif // !_TARGET_X86_
}
+ regMaskTP byrefPushedRegs = RBM_NONE;
+ regMaskTP norefPushedRegs = RBM_NONE;
+ regMaskTP pushedRegs = RBM_NONE;
+
if (compiler->gsGlobalSecurityCookieAddr == nullptr)
{
+#if defined(_TARGET_AMD64_)
// If GS cookie value fits within 32-bits we can use 'cmp mem64, imm32'.
// Otherwise, load the value into a reg and use 'cmp mem64, reg64'.
if ((int)compiler->gsGlobalSecurityCookieVal != (ssize_t)compiler->gsGlobalSecurityCookieVal)
@@ -297,7 +199,9 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
}
else
+#endif // defined(_TARGET_AMD64_)
{
+ assert((int)compiler->gsGlobalSecurityCookieVal == (ssize_t)compiler->gsGlobalSecurityCookieVal);
getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0,
(int)compiler->gsGlobalSecurityCookieVal);
}
@@ -305,6 +209,9 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
else
{
// Ngen case - GS cookie value needs to be accessed through an indirection.
+
+ pushedRegs = genPushRegs(regMaskGSCheck, &byrefPushedRegs, &norefPushedRegs);
+
instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSCheck, regGSCheck, 0);
getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
@@ -315,821 +222,180 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
inst_JMP(jmpEqual, gsCheckBlk);
genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
genDefineTempLabel(gsCheckBlk);
-}
-/*****************************************************************************
- *
- * Generate code for all the basic blocks in the function.
- */
+ genPopRegs(pushedRegs, byrefPushedRegs, norefPushedRegs);
+}
-void CodeGen::genCodeForBBlist()
+BasicBlock* CodeGen::genCallFinally(BasicBlock* block, BasicBlock* lblk)
{
- unsigned varNum;
- LclVarDsc* varDsc;
-
- unsigned savedStkLvl;
-
-#ifdef DEBUG
- genInterruptibleUsed = true;
-
- // You have to be careful if you create basic blocks from now on
- compiler->fgSafeBasicBlockCreation = false;
-
- // This stress mode is not comptible with fully interruptible GC
- if (genInterruptible && compiler->opts.compStackCheckOnCall)
- {
- compiler->opts.compStackCheckOnCall = false;
- }
-
- // This stress mode is not comptible with fully interruptible GC
- if (genInterruptible && compiler->opts.compStackCheckOnRet)
- {
- compiler->opts.compStackCheckOnRet = false;
- }
-#endif // DEBUG
-
- // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
- genPrepForEHCodegen();
-
- assert(!compiler->fgFirstBBScratch ||
- compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
-
- /* Initialize the spill tracking logic */
-
- regSet.rsSpillBeg();
-
-#ifdef DEBUGGING_SUPPORT
- /* Initialize the line# tracking logic */
+#if FEATURE_EH_FUNCLETS
+ // Generate a call to the finally, like this:
+ // mov rcx,qword ptr [rbp + 20H] // Load rcx with PSPSym
+ // call finally-funclet
+ // jmp finally-return // Only for non-retless finally calls
+ // The jmp can be a NOP if we're going to the next block.
+ // If we're generating code for the main function (not a funclet), and there is no localloc,
+ // then RSP at this point is the same value as that stored in the PSPSym. So just copy RSP
+ // instead of loading the PSPSym in this case, or if PSPSym is not used (CoreRT ABI).
- if (compiler->opts.compScopeInfo)
+ if ((compiler->lvaPSPSym == BAD_VAR_NUM) ||
+ (!compiler->compLocallocUsed && (compiler->funCurrentFunc()->funKind == FUNC_ROOT)))
{
- siInit();
+ inst_RV_RV(INS_mov, REG_ARG_0, REG_SPBASE, TYP_I_IMPL);
}
-#endif
-
- // The current implementation of switch tables requires the first block to have a label so it
- // can generate offsets to the switch label targets.
- // TODO-XArch-CQ: remove this when switches have been re-implemented to not use this.
- if (compiler->fgHasSwitch)
+ else
{
- compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0, compiler->lvaPSPSym, 0);
}
+ getEmitter()->emitIns_J(INS_call, block->bbJumpDest);
- genPendingCallLabel = nullptr;
-
- /* Initialize the pointer tracking code */
-
- gcInfo.gcRegPtrSetInit();
- gcInfo.gcVarPtrSetInit();
-
- /* If any arguments live in registers, mark those regs as such */
-
- for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+ if (block->bbFlags & BBF_RETLESS_CALL)
{
- /* Is this variable a parameter assigned to a register? */
-
- if (!varDsc->lvIsParam || !varDsc->lvRegister)
- {
- continue;
- }
+ // We have a retless call, and the last instruction generated was a call.
+ // If the next block is in a different EH region (or is the end of the code
+ // block), then we need to generate a breakpoint here (since it will never
+ // get executed) to get proper unwind behavior.
- /* Is the argument live on entry to the method? */
-
- if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
+ if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
{
- continue;
- }
-
- /* Is this a floating-point argument? */
-
- if (varDsc->IsFloatRegType())
- {
- continue;
+ instGen(INS_BREAKPOINT); // This should never get executed
}
-
- noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
-
- /* Mark the register as holding the variable */
-
- regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
}
-
- unsigned finallyNesting = 0;
-
- // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
- // allocation at the start of each basic block.
- VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
-
- /*-------------------------------------------------------------------------
- *
- * Walk the basic blocks and generate code for each one
- *
- */
-
- BasicBlock* block;
- BasicBlock* lblk; /* previous block */
-
- for (lblk = nullptr, block = compiler->fgFirstBB; block != nullptr; lblk = block, block = block->bbNext)
+ else
{
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\n=============== Generating ");
- block->dspBlockHeader(compiler, true, true);
- compiler->fgDispBBLiveness(block);
- }
-#endif // DEBUG
-
- // Figure out which registers hold variables on entry to this block
-
- regSet.ClearMaskVars();
- gcInfo.gcRegGCrefSetCur = RBM_NONE;
- gcInfo.gcRegByrefSetCur = RBM_NONE;
-
- compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(block);
-
- genUpdateLife(block->bbLiveIn);
-
- // Even if liveness didn't change, we need to update the registers containing GC references.
- // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't
- // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change
- // here. That would require handling the changes in recordVarLocationsAtStartOfBB().
-
- regMaskTP newLiveRegSet = RBM_NONE;
- regMaskTP newRegGCrefSet = RBM_NONE;
- regMaskTP newRegByrefSet = RBM_NONE;
-#ifdef DEBUG
- VARSET_TP VARSET_INIT_NOCOPY(removedGCVars, VarSetOps::MakeEmpty(compiler));
- VARSET_TP VARSET_INIT_NOCOPY(addedGCVars, VarSetOps::MakeEmpty(compiler));
-#endif
- VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex);
- while (iter.NextElem(compiler, &varIndex))
- {
- unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
- LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
-
- if (varDsc->lvIsInReg())
- {
- newLiveRegSet |= varDsc->lvRegMask();
- if (varDsc->lvType == TYP_REF)
- {
- newRegGCrefSet |= varDsc->lvRegMask();
- }
- else if (varDsc->lvType == TYP_BYREF)
- {
- newRegByrefSet |= varDsc->lvRegMask();
- }
-#ifdef DEBUG
- if (verbose && VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
- {
- VarSetOps::AddElemD(compiler, removedGCVars, varIndex);
- }
-#endif // DEBUG
- VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
- }
- else if (compiler->lvaIsGCTracked(varDsc))
- {
-#ifdef DEBUG
- if (verbose && !VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
- {
- VarSetOps::AddElemD(compiler, addedGCVars, varIndex);
- }
-#endif // DEBUG
- VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
- }
- }
-
- regSet.rsMaskVars = newLiveRegSet;
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- if (!VarSetOps::IsEmpty(compiler, addedGCVars))
- {
- printf("\t\t\t\t\t\t\tAdded GCVars: ");
- dumpConvertedVarSet(compiler, addedGCVars);
- printf("\n");
- }
- if (!VarSetOps::IsEmpty(compiler, removedGCVars))
- {
- printf("\t\t\t\t\t\t\tRemoved GCVars: ");
- dumpConvertedVarSet(compiler, removedGCVars);
- printf("\n");
- }
- }
-#endif // DEBUG
-
- gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUGARG(true));
- gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUGARG(true));
-
- /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
- represent the exception object (TYP_REF).
- We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
- to the block, it will be the first thing evaluated
- (thanks to GTF_ORDER_SIDEEFF).
- */
-
- if (handlerGetsXcptnObj(block->bbCatchTyp))
- {
- for (GenTree* node : LIR::AsRange(block))
- {
- if (node->OperGet() == GT_CATCH_ARG)
- {
- gcInfo.gcMarkRegSetGCref(RBM_EXCEPTION_OBJECT);
- break;
- }
- }
- }
-
- /* Start a new code output block */
-
- genUpdateCurrentFunclet(block);
-
- if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
- {
- getEmitter()->emitLoopAlign();
- }
-
-#ifdef DEBUG
- if (compiler->opts.dspCode)
- {
- printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
- }
-#endif
-
- block->bbEmitCookie = nullptr;
-
- if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL))
- {
- /* Mark a label and update the current set of live GC refs */
-
- block->bbEmitCookie = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
- gcInfo.gcRegByrefSetCur, FALSE);
- }
-
- if (block == compiler->fgFirstColdBlock)
- {
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\nThis is the start of the cold region of the method\n");
- }
-#endif
- // We should never have a block that falls through into the Cold section
- noway_assert(!lblk->bbFallsThrough());
-
- // We require the block that starts the Cold section to have a label
- noway_assert(block->bbEmitCookie);
- getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
- }
-
- /* Both stacks are always empty on entry to a basic block */
-
- genStackLevel = 0;
-
- savedStkLvl = genStackLevel;
-
- /* Tell everyone which basic block we're working on */
-
- compiler->compCurBB = block;
-
-#ifdef DEBUGGING_SUPPORT
- siBeginBlock(block);
-
- // BBF_INTERNAL blocks don't correspond to any single IL instruction.
- if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) &&
- !compiler->fgBBisScratch(block)) // If the block is the distinguished first scratch block, then no need to
- // emit a NO_MAPPING entry, immediately after the prolog.
- {
- genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true);
- }
-
- bool firstMapping = true;
-#endif // DEBUGGING_SUPPORT
-
- /*---------------------------------------------------------------------
- *
- * Generate code for each statement-tree in the block
- *
- */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if FEATURE_EH_FUNCLETS
- if (block->bbFlags & BBF_FUNCLET_BEG)
- {
- genReserveFuncletProlog(block);
- }
-#endif // FEATURE_EH_FUNCLETS
-
- // Clear compCurStmt and compCurLifeTree.
- compiler->compCurStmt = nullptr;
- compiler->compCurLifeTree = nullptr;
-
- // Traverse the block in linear order, generating code for each node as we
- // as we encounter it.
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef DEBUGGING_SUPPORT
- IL_OFFSETX currentILOffset = BAD_IL_OFFSET;
-#endif
- for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
- {
-#ifdef DEBUGGING_SUPPORT
- // Do we have a new IL offset?
- if (node->OperGet() == GT_IL_OFFSET)
- {
- genEnsureCodeEmitted(currentILOffset);
- currentILOffset = node->gtStmt.gtStmtILoffsx;
- genIPmappingAdd(currentILOffset, firstMapping);
- firstMapping = false;
- }
-#endif // DEBUGGING_SUPPORT
-
-#ifdef DEBUG
- if (node->OperGet() == GT_IL_OFFSET)
- {
- noway_assert(node->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize ||
- node->gtStmt.gtStmtLastILoffs == BAD_IL_OFFSET);
-
- if (compiler->opts.dspCode && compiler->opts.dspInstrs &&
- node->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
- {
- while (genCurDispOffset <= node->gtStmt.gtStmtLastILoffs)
- {
- genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> ");
- }
- }
- }
-#endif // DEBUG
-
- genCodeForTreeNode(node);
- if (node->gtHasReg() && node->gtLsraInfo.isLocalDefUse)
- {
- genConsumeReg(node);
- }
- } // end for each node in block
-
-#ifdef DEBUG
- // The following set of register spill checks and GC pointer tracking checks used to be
- // performed at statement boundaries. Now, with LIR, there are no statements, so they are
- // performed at the end of each block.
- // TODO: could these checks be performed more frequently? E.g., at each location where
- // the register allocator says there are no live non-variable registers. Perhaps this could
- // be done by (a) keeping a running count of live non-variable registers by using
- // gtLsraInfo.srcCount and gtLsraInfo.dstCount to decrement and increment the count, respectively,
- // and running the checks when the count is zero. Or, (b) use the map maintained by LSRA
- // (operandToLocationInfoMap) to mark a node somehow when, after the execution of that node,
- // there will be no live non-variable registers.
-
- regSet.rsSpillChk();
-
- /* Make sure we didn't bungle pointer register tracking */
-
- regMaskTP ptrRegs = gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur;
- regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
-
- // If return is a GC-type, clear it. Note that if a common
- // epilog is generated (genReturnBB) it has a void return
- // even though we might return a ref. We can't use the compRetType
- // as the determiner because something we are tracking as a byref
- // might be used as a return value of a int function (which is legal)
- GenTree* blockLastNode = block->lastNode();
- if ((blockLastNode != nullptr) && (blockLastNode->gtOper == GT_RETURN) &&
- (varTypeIsGC(compiler->info.compRetType) ||
- (blockLastNode->gtOp.gtOp1 != nullptr && varTypeIsGC(blockLastNode->gtOp.gtOp1->TypeGet()))))
- {
- nonVarPtrRegs &= ~RBM_INTRET;
- }
-
- if (nonVarPtrRegs)
- {
- printf("Regset after BB%02u gcr=", block->bbNum);
- printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
- compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
- printf(", byr=");
- printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
- compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
- printf(", regVars=");
- printRegMaskInt(regSet.rsMaskVars);
- compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
- printf("\n");
- }
-
- noway_assert(nonVarPtrRegs == RBM_NONE);
-#endif // DEBUG
-
-#if defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
- if (block->bbNext == nullptr)
- {
- // Unit testing of the AMD64 emitter: generate a bunch of instructions into the last block
- // (it's as good as any, but better than the prolog, which can only be a single instruction
- // group) then use COMPlus_JitLateDisasm=* to see if the late disassembler
- // thinks the instructions are the same as we do.
- genAmd64EmitterUnitTests();
- }
-#endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_ARM64_)
-
-#ifdef DEBUGGING_SUPPORT
- // It is possible to reach the end of the block without generating code for the current IL offset.
- // For example, if the following IR ends the current block, no code will have been generated for
- // offset 21:
- //
- // ( 0, 0) [000040] ------------ il_offset void IL offset: 21
- //
- // N001 ( 0, 0) [000039] ------------ nop void
- //
- // This can lead to problems when debugging the generated code. To prevent these issues, make sure
- // we've generated code for the last IL offset we saw in the block.
- genEnsureCodeEmitted(currentILOffset);
+ // Because of the way the flowgraph is connected, the liveness info for this one instruction
+ // after the call is not (can not be) correct in cases where a variable has a last use in the
+ // handler. So turn off GC reporting for this single instruction.
+ getEmitter()->emitDisableGC();
- if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
+ // Now go to where the finally funclet needs to return to.
+ if (block->bbNext->bbJumpDest == block->bbNext->bbNext)
{
- siEndBlock(block);
-
- /* Is this the last block, and are there any open scopes left ? */
-
- bool isLastBlockProcessed = (block->bbNext == nullptr);
- if (block->isBBCallAlwaysPair())
- {
- isLastBlockProcessed = (block->bbNext->bbNext == nullptr);
- }
-
- if (isLastBlockProcessed && siOpenScopeList.scNext)
- {
- /* This assert no longer holds, because we may insert a throw
- block to demarcate the end of a try or finally region when they
- are at the end of the method. It would be nice if we could fix
- our code so that this throw block will no longer be necessary. */
-
- // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
-
- siCloseAllOpenScopes();
- }
+ // Fall-through.
+ // TODO-XArch-CQ: Can we get rid of this instruction, and just have the call return directly
+ // to the next instruction? This would depend on stack walking from within the finally
+ // handler working without this instruction being in this special EH region.
+ instGen(INS_nop);
}
-
-#endif // DEBUGGING_SUPPORT
-
- genStackLevel -= savedStkLvl;
-
-#ifdef DEBUG
- // compCurLife should be equal to the liveOut set, except that we don't keep
- // it up to date for vars that are not register candidates
- // (it would be nice to have a xor set function)
-
- VARSET_TP VARSET_INIT_NOCOPY(extraLiveVars, VarSetOps::Diff(compiler, block->bbLiveOut, compiler->compCurLife));
- VarSetOps::UnionD(compiler, extraLiveVars, VarSetOps::Diff(compiler, compiler->compCurLife, block->bbLiveOut));
- VARSET_ITER_INIT(compiler, extraLiveVarIter, extraLiveVars, extraLiveVarIndex);
- while (extraLiveVarIter.NextElem(compiler, &extraLiveVarIndex))
+ else
{
- unsigned varNum = compiler->lvaTrackedToVarNum[extraLiveVarIndex];
- LclVarDsc* varDsc = compiler->lvaTable + varNum;
- assert(!varDsc->lvIsRegCandidate());
+ inst_JMP(EJ_jmp, block->bbNext->bbJumpDest);
}
-#endif
-
- /* Both stacks should always be empty on exit from a basic block */
- noway_assert(genStackLevel == 0);
-
-#ifdef _TARGET_AMD64_
- // On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several
- // situations, to support proper exception handling semantics. This is mostly to ensure that when the stack
- // walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region.
- // The document "X64 and ARM ABIs.docx" has more details. The situations:
- // 1. If the call instruction is in a different EH region as the instruction that follows it.
- // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might
- // be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters
- // here.)
- // We handle case #1 here, and case #2 in the emitter.
- if (getEmitter()->emitIsLastInsCall())
- {
- // Ok, the last instruction generated is a call instruction. Do any of the other conditions hold?
- // Note: we may be generating a few too many NOPs for the case of call preceding an epilog. Technically,
- // if the next block is a BBJ_RETURN, an epilog will be generated, but there may be some instructions
- // generated before the OS epilog starts, such as a GS cookie check.
- if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
- {
- // We only need the NOP if we're not going to generate any more code as part of the block end.
-
- switch (block->bbJumpKind)
- {
- case BBJ_ALWAYS:
- case BBJ_THROW:
- case BBJ_CALLFINALLY:
- case BBJ_EHCATCHRET:
- // We're going to generate more code below anyway, so no need for the NOP.
-
- case BBJ_RETURN:
- case BBJ_EHFINALLYRET:
- case BBJ_EHFILTERRET:
- // These are the "epilog follows" case, handled in the emitter.
-
- break;
-
- case BBJ_NONE:
- if (block->bbNext == nullptr)
- {
- // Call immediately before the end of the code; we should never get here .
- instGen(INS_BREAKPOINT); // This should never get executed
- }
- else
- {
- // We need the NOP
- instGen(INS_nop);
- }
- break;
-
- case BBJ_COND:
- case BBJ_SWITCH:
- // These can't have a call as the last instruction!
-
- default:
- noway_assert(!"Unexpected bbJumpKind");
- break;
- }
- }
- }
-#endif // _TARGET_AMD64_
-
- /* Do we need to generate a jump or return? */
-
- switch (block->bbJumpKind)
- {
- case BBJ_ALWAYS:
- inst_JMP(EJ_jmp, block->bbJumpDest);
- break;
-
- case BBJ_RETURN:
- genExitCode(block);
- break;
-
- case BBJ_THROW:
- // If we have a throw at the end of a function or funclet, we need to emit another instruction
- // afterwards to help the OS unwinder determine the correct context during unwind.
- // We insert an unexecuted breakpoint instruction in several situations
- // following a throw instruction:
- // 1. If the throw is the last instruction of the function or funclet. This helps
- // the OS unwinder determine the correct context during an unwind from the
- // thrown exception.
- // 2. If this is this is the last block of the hot section.
- // 3. If the subsequent block is a special throw block.
- // 4. On AMD64, if the next block is in a different EH region.
- if ((block->bbNext == nullptr) || (block->bbNext->bbFlags & BBF_FUNCLET_BEG) ||
- !BasicBlock::sameEHRegion(block, block->bbNext) ||
- (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) ||
- block->bbNext == compiler->fgFirstColdBlock)
- {
- instGen(INS_BREAKPOINT); // This should never get executed
- }
-
- break;
-
- case BBJ_CALLFINALLY:
-
-#if FEATURE_EH_FUNCLETS
-
- // Generate a call to the finally, like this:
- // mov rcx,qword ptr [rbp + 20H] // Load rcx with PSPSym
- // call finally-funclet
- // jmp finally-return // Only for non-retless finally calls
- // The jmp can be a NOP if we're going to the next block.
- // If we're generating code for the main function (not a funclet), and there is no localloc,
- // then RSP at this point is the same value as that stored in the PSPsym. So just copy RSP
- // instead of loading the PSPSym in this case.
- if (!compiler->compLocallocUsed && (compiler->funCurrentFunc()->funKind == FUNC_ROOT))
- {
- inst_RV_RV(INS_mov, REG_ARG_0, REG_SPBASE, TYP_I_IMPL);
- }
- else
- {
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0, compiler->lvaPSPSym, 0);
- }
- getEmitter()->emitIns_J(INS_call, block->bbJumpDest);
+ getEmitter()->emitEnableGC();
+ }
- if (block->bbFlags & BBF_RETLESS_CALL)
- {
- // We have a retless call, and the last instruction generated was a call.
- // If the next block is in a different EH region (or is the end of the code
- // block), then we need to generate a breakpoint here (since it will never
- // get executed) to get proper unwind behavior.
+#else // !FEATURE_EH_FUNCLETS
- if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
- {
- instGen(INS_BREAKPOINT); // This should never get executed
- }
- }
- else
- {
- // Because of the way the flowgraph is connected, the liveness info for this one instruction
- // after the call is not (can not be) correct in cases where a variable has a last use in the
- // handler. So turn off GC reporting for this single instruction.
- getEmitter()->emitDisableGC();
+ // If we are about to invoke a finally locally from a try block, we have to set the ShadowSP slot
+ // corresponding to the finally's nesting level. When invoked in response to an exception, the
+ // EE does this.
+ //
+ // We have a BBJ_CALLFINALLY followed by a BBJ_ALWAYS.
+ //
+ // We will emit :
+ // mov [ebp - (n + 1)], 0
+ // mov [ebp - n ], 0xFC
+ // push &step
+ // jmp finallyBlock
+ // ...
+ // step:
+ // mov [ebp - n ], 0
+ // jmp leaveTarget
+ // ...
+ // leaveTarget:
+
+ noway_assert(isFramePointerUsed());
+
+ // Get the nesting level which contains the finally
+ unsigned finallyNesting = 0;
+ compiler->fgGetNestingLevel(block, &finallyNesting);
- // Now go to where the finally funclet needs to return to.
- if (block->bbNext->bbJumpDest == block->bbNext->bbNext)
- {
- // Fall-through.
- // TODO-XArch-CQ: Can we get rid of this instruction, and just have the call return directly
- // to the next instruction? This would depend on stack walking from within the finally
- // handler working without this instruction being in this special EH region.
- instGen(INS_nop);
- }
- else
- {
- inst_JMP(EJ_jmp, block->bbNext->bbJumpDest);
- }
+ // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
+ unsigned filterEndOffsetSlotOffs;
+ filterEndOffsetSlotOffs = (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
- getEmitter()->emitEnableGC();
- }
+ unsigned curNestingSlotOffs;
+ curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE));
-#else // !FEATURE_EH_FUNCLETS
+ // Zero out the slot for the next nesting level
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar,
+ curNestingSlotOffs - TARGET_POINTER_SIZE);
+ instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar,
+ curNestingSlotOffs);
- // If we are about to invoke a finally locally from a try block, we have to set the ShadowSP slot
- // corresponding to the finally's nesting level. When invoked in response to an exception, the
- // EE does this.
- //
- // We have a BBJ_CALLFINALLY followed by a BBJ_ALWAYS.
- //
- // We will emit :
- // mov [ebp - (n + 1)], 0
- // mov [ebp - n ], 0xFC
- // push &step
- // jmp finallyBlock
- // ...
- // step:
- // mov [ebp - n ], 0
- // jmp leaveTarget
- // ...
- // leaveTarget:
-
- noway_assert(isFramePointerUsed());
-
- // Get the nesting level which contains the finally
- compiler->fgGetNestingLevel(block, &finallyNesting);
-
- // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
- unsigned filterEndOffsetSlotOffs;
- filterEndOffsetSlotOffs =
- (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
-
- unsigned curNestingSlotOffs;
- curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE));
-
- // Zero out the slot for the next nesting level
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar,
- curNestingSlotOffs - TARGET_POINTER_SIZE);
- instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar,
- curNestingSlotOffs);
-
- // Now push the address where the finally funclet should return to directly.
- if (!(block->bbFlags & BBF_RETLESS_CALL))
- {
- assert(block->isBBCallAlwaysPair());
- getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest);
- }
- else
- {
- // EE expects a DWORD, so we give him 0
- inst_IV(INS_push_hide, 0);
- }
+ // Now push the address where the finally funclet should return to directly.
+ if (!(block->bbFlags & BBF_RETLESS_CALL))
+ {
+ assert(block->isBBCallAlwaysPair());
+ getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest);
+ }
+ else
+ {
+ // EE expects a DWORD, so we give him 0
+ inst_IV(INS_push_hide, 0);
+ }
- // Jump to the finally BB
- inst_JMP(EJ_jmp, block->bbJumpDest);
+ // Jump to the finally BB
+ inst_JMP(EJ_jmp, block->bbJumpDest);
#endif // !FEATURE_EH_FUNCLETS
- // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
- // jump target using bbJumpDest - that is already used to point
- // to the finally block. So just skip past the BBJ_ALWAYS unless the
- // block is RETLESS.
- if (!(block->bbFlags & BBF_RETLESS_CALL))
- {
- assert(block->isBBCallAlwaysPair());
-
- lblk = block;
- block = block->bbNext;
- }
+ // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
+ // jump target using bbJumpDest - that is already used to point
+ // to the finally block. So just skip past the BBJ_ALWAYS unless the
+ // block is RETLESS.
+ if (!(block->bbFlags & BBF_RETLESS_CALL))
+ {
+ assert(block->isBBCallAlwaysPair());
- break;
+ lblk = block;
+ block = block->bbNext;
+ }
+ return block;
+}
#if FEATURE_EH_FUNCLETS
-
- case BBJ_EHCATCHRET:
- // Set RAX to the address the VM should return to after the catch.
- // Generate a RIP-relative
- // lea reg, [rip + disp32] ; the RIP is implicit
- // which will be position-indepenent.
- getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, block->bbJumpDest, REG_INTRET);
- __fallthrough;
-
- case BBJ_EHFINALLYRET:
- case BBJ_EHFILTERRET:
- genReserveFuncletEpilog(block);
- break;
+void CodeGen::genEHCatchRet(BasicBlock* block)
+{
+ // Set RAX to the address the VM should return to after the catch.
+ // Generate a RIP-relative
+ // lea reg, [rip + disp32] ; the RIP is implicit
+ // which will be position-indepenent.
+ getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, block->bbJumpDest, REG_INTRET);
+}
#else // !FEATURE_EH_FUNCLETS
- case BBJ_EHCATCHRET:
- noway_assert(!"Unexpected BBJ_EHCATCHRET"); // not used on x86
-
- case BBJ_EHFINALLYRET:
- case BBJ_EHFILTERRET:
- {
- // The last statement of the block must be a GT_RETFILT, which has already been generated.
- assert(block->lastNode() != nullptr);
- assert(block->lastNode()->OperGet() == GT_RETFILT);
-
- if (block->bbJumpKind == BBJ_EHFINALLYRET)
- {
- assert(block->lastNode()->gtOp.gtOp1 == nullptr); // op1 == nullptr means endfinally
-
- // Return using a pop-jmp sequence. As the "try" block calls
- // the finally with a jmp, this leaves the x86 call-ret stack
- // balanced in the normal flow of path.
-
- noway_assert(isFramePointerRequired());
- inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL);
- inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL);
- }
- else
- {
- assert(block->bbJumpKind == BBJ_EHFILTERRET);
-
- // The return value has already been computed.
- instGen_Return(0);
- }
- }
- break;
-
-#endif // !FEATURE_EH_FUNCLETS
-
- case BBJ_NONE:
- case BBJ_COND:
- case BBJ_SWITCH:
- break;
-
- default:
- noway_assert(!"Unexpected bbJumpKind");
- break;
- }
-
-#ifdef DEBUG
- compiler->compCurBB = nullptr;
-#endif
-
- } //------------------ END-FOR each block of the method -------------------
-
- /* Nothing is live at this point */
- genUpdateLife(VarSetOps::MakeEmpty(compiler));
-
- /* Finalize the spill tracking logic */
-
- regSet.rsSpillEnd();
-
- /* Finalize the temp tracking logic */
-
- compiler->tmpEnd();
+void CodeGen::genEHFinallyOrFilterRet(BasicBlock* block)
+{
+ // The last statement of the block must be a GT_RETFILT, which has already been generated.
+ assert(block->lastNode() != nullptr);
+ assert(block->lastNode()->OperGet() == GT_RETFILT);
-#ifdef DEBUG
- if (compiler->verbose)
+ if (block->bbJumpKind == BBJ_EHFINALLYRET)
{
- printf("\n# ");
- printf("compCycleEstimate = %6d, compSizeEstimate = %5d ", compiler->compCycleEstimate,
- compiler->compSizeEstimate);
- printf("%s\n", compiler->info.compFullName);
- }
-#endif
-}
+ assert(block->lastNode()->gtOp.gtOp1 == nullptr); // op1 == nullptr means endfinally
-// return the child that has the same reg as the dst (if any)
-// other child returned (out param) in 'other'
-GenTree* sameRegAsDst(GenTree* tree, GenTree*& other /*out*/)
-{
- if (tree->gtRegNum == REG_NA)
- {
- other = nullptr;
- return nullptr;
- }
+ // Return using a pop-jmp sequence. As the "try" block calls
+ // the finally with a jmp, this leaves the x86 call-ret stack
+ // balanced in the normal flow of path.
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- if (op1->gtRegNum == tree->gtRegNum)
- {
- other = op2;
- return op1;
- }
- if (op2->gtRegNum == tree->gtRegNum)
- {
- other = op1;
- return op2;
+ noway_assert(isFramePointerRequired());
+ inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL);
+ inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL);
}
else
{
- other = nullptr;
- return nullptr;
+ assert(block->bbJumpKind == BBJ_EHFILTERRET);
+
+ // The return value has already been computed.
+ instGen_Return(0);
}
}
+#endif // !FEATURE_EH_FUNCLETS
+
// Move an immediate value into an integer register
void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags)
@@ -1227,7 +493,10 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
// Generate code to get the high N bits of a N*N=2N bit multiplication result
void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
{
- assert(!(treeNode->gtFlags & GTF_UNSIGNED));
+ if (treeNode->OperGet() == GT_MULHI)
+ {
+ assert(!(treeNode->gtFlags & GTF_UNSIGNED));
+ }
assert(!treeNode->gtOverflowEx());
regNumber targetReg = treeNode->gtRegNum;
@@ -1247,8 +516,7 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
GenTree* rmOp = op2;
// Set rmOp to the contained memory operand (if any)
- //
- if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == targetReg)))
+ if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == REG_RAX)))
{
regOp = op2;
rmOp = op1;
@@ -1256,25 +524,131 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
assert(!regOp->isContained());
// Setup targetReg when neither of the source operands was a matching register
- if (regOp->gtRegNum != targetReg)
+ if (regOp->gtRegNum != REG_RAX)
{
- inst_RV_RV(ins_Copy(targetType), targetReg, regOp->gtRegNum, targetType);
+ inst_RV_RV(ins_Copy(targetType), REG_RAX, regOp->gtRegNum, targetType);
}
- emit->emitInsBinary(INS_imulEAX, size, treeNode, rmOp);
+ instruction ins;
+ if ((treeNode->gtFlags & GTF_UNSIGNED) == 0)
+ {
+ ins = INS_imulEAX;
+ }
+ else
+ {
+ ins = INS_mulEAX;
+ }
+ emit->emitInsBinary(ins, size, treeNode, rmOp);
// Move the result to the desired register, if necessary
- if (targetReg != REG_RDX)
+ if (treeNode->OperGet() == GT_MULHI && targetReg != REG_RDX)
{
inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
}
}
-// generate code for a DIV or MOD operation
+#ifdef _TARGET_X86_
+//------------------------------------------------------------------------
+// genCodeForLongUMod: Generate code for a tree of the form
+// `(umod (gt_long x y) (const int))`
+//
+// Arguments:
+// node - the node for which to generate code
+//
+void CodeGen::genCodeForLongUMod(GenTreeOp* node)
+{
+ assert(node != nullptr);
+ assert(node->OperGet() == GT_UMOD);
+ assert(node->TypeGet() == TYP_INT);
+
+ GenTreeOp* const dividend = node->gtOp1->AsOp();
+ assert(dividend->OperGet() == GT_LONG);
+ assert(varTypeIsLong(dividend));
+
+ genConsumeOperands(node);
+
+ GenTree* const dividendLo = dividend->gtOp1;
+ GenTree* const dividendHi = dividend->gtOp2;
+ assert(!dividendLo->isContained());
+ assert(!dividendHi->isContained());
+
+ GenTree* const divisor = node->gtOp2;
+ assert(divisor->gtSkipReloadOrCopy()->OperGet() == GT_CNS_INT);
+ assert(!divisor->gtSkipReloadOrCopy()->isContained());
+ assert(divisor->gtSkipReloadOrCopy()->AsIntCon()->gtIconVal >= 2);
+ assert(divisor->gtSkipReloadOrCopy()->AsIntCon()->gtIconVal <= 0x3fffffff);
+
+ // dividendLo must be in RAX; dividendHi must be in RDX
+ genCopyRegIfNeeded(dividendLo, REG_EAX);
+ genCopyRegIfNeeded(dividendHi, REG_EDX);
+
+ // At this point, EAX:EDX contains the 64bit dividend and op2->gtRegNum
+ // contains the 32bit divisor. We want to generate the following code:
+ //
+ // cmp edx, divisor->gtRegNum
+ // jb noOverflow
+ //
+ // mov temp, eax
+ // mov eax, edx
+ // xor edx, edx
+ // div divisor->gtRegNum
+ // mov eax, temp
+ //
+ // noOverflow:
+ // div divisor->gtRegNum
+ //
+ // This works because (a * 2^32 + b) % c = ((a % c) * 2^32 + b) % c.
+
+ BasicBlock* const noOverflow = genCreateTempLabel();
+
+ // cmp edx, divisor->gtRegNum
+ // jb noOverflow
+ inst_RV_RV(INS_cmp, REG_EDX, divisor->gtRegNum);
+ inst_JMP(EJ_jb, noOverflow);
+
+ // mov temp, eax
+ // mov eax, edx
+ // xor edx, edx
+ // div divisor->gtRegNum
+ // mov eax, temp
+ const regNumber tempReg = genRegNumFromMask(node->gtRsvdRegs);
+ inst_RV_RV(INS_mov, tempReg, REG_EAX, TYP_INT);
+ inst_RV_RV(INS_mov, REG_EAX, REG_EDX, TYP_INT);
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX);
+ inst_RV(INS_div, divisor->gtRegNum, TYP_INT);
+ inst_RV_RV(INS_mov, REG_EAX, tempReg, TYP_INT);
+
+ // noOverflow:
+ // div divisor->gtRegNum
+ genDefineTempLabel(noOverflow);
+ inst_RV(INS_div, divisor->gtRegNum, TYP_INT);
+
+ const regNumber targetReg = node->gtRegNum;
+ if (targetReg != REG_EDX)
+ {
+ inst_RV_RV(INS_mov, targetReg, REG_RDX, TYP_INT);
+ }
+ genProduceReg(node);
+}
+#endif // _TARGET_X86_
+
+//------------------------------------------------------------------------
+// genCodeForDivMod: Generate code for a DIV or MOD operation.
+//
+// Arguments:
+// treeNode - the node to generate the code for
//
void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
{
- GenTree* dividend = treeNode->gtOp1;
+ GenTree* dividend = treeNode->gtOp1;
+#ifdef _TARGET_X86_
+ if (varTypeIsLong(dividend->TypeGet()))
+ {
+ genCodeForLongUMod(treeNode);
+ return;
+ }
+#endif // _TARGET_X86_
+
GenTree* divisor = treeNode->gtOp2;
genTreeOps oper = treeNode->OperGet();
emitAttr size = emitTypeSize(treeNode);
@@ -1319,10 +693,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
else
{
// dividend must be in RAX
- if (dividend->gtRegNum != REG_RAX)
- {
- inst_RV_RV(INS_mov, REG_RAX, dividend->gtRegNum, targetType);
- }
+ genCopyRegIfNeeded(dividend, REG_RAX);
// zero or sign extend rax to rdx
if (oper == GT_UMOD || oper == GT_UDIV)
@@ -1395,7 +766,7 @@ void CodeGen::genCodeForBinary(GenTree* treeNode)
assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD || oper == GT_SUB);
#else // !defined(_TARGET_64BIT_)
assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD_LO || oper == GT_ADD_HI ||
- oper == GT_SUB_LO || oper == GT_SUB_HI || oper == GT_MUL_HI || oper == GT_DIV_HI || oper == GT_MOD_HI ||
+ oper == GT_SUB_LO || oper == GT_SUB_HI || oper == GT_MUL_LONG || oper == GT_DIV_HI || oper == GT_MOD_HI ||
oper == GT_ADD || oper == GT_SUB);
#endif // !defined(_TARGET_64BIT_)
@@ -1443,7 +814,7 @@ void CodeGen::genCodeForBinary(GenTree* treeNode)
}
// now we know there are 3 different operands so attempt to use LEA
else if (oper == GT_ADD && !varTypeIsFloating(treeNode) && !treeNode->gtOverflowEx() // LEA does not set flags
- && (op2->isContainedIntOrIImmed() || !op2->isContained()))
+ && (op2->isContainedIntOrIImmed() || !op2->isContained()) && !treeNode->gtSetFlags())
{
if (op2->isContainedIntOrIImmed())
{
@@ -1833,7 +1204,7 @@ void CodeGen::genReturn(GenTreePtr treeNode)
//
// Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN:
// In flowgraph and other places assert that the last node of a block marked as
- // GT_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to
+ // BBJ_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to
// maintain such an invariant irrespective of whether profiler hook needed or not.
// Also, there is not much to be gained by materializing it as an explicit node.
if (compiler->compCurBB == compiler->genReturnBB)
@@ -1913,9 +1284,11 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
switch (treeNode->gtOper)
{
+#ifndef JIT32_GCENCODER
case GT_START_NONGC:
getEmitter()->emitDisableGC();
break;
+#endif // !defined(JIT32_GCENCODER)
case GT_PROF_HOOK:
#ifdef PROFILING_SUPPORTED
@@ -1996,14 +1369,18 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
// genCodeForShift() calls genProduceReg()
break;
- case GT_CAST:
#if !defined(_TARGET_64BIT_)
- // We will NYI in DecomposeNode() if we are cast TO a long type, but we do not
- // yet support casting FROM a long type either, and that's simpler to catch
- // here.
- NYI_IF(varTypeIsLong(treeNode->gtOp.gtOp1), "Casts from TYP_LONG");
-#endif // !defined(_TARGET_64BIT_)
+ case GT_LSH_HI:
+ case GT_RSH_LO:
+ // TODO-X86-CQ: This only handles the case where the operand being shifted is in a register. We don't
+ // need sourceHi to be always in reg in case of GT_LSH_HI (because it could be moved from memory to
+ // targetReg if sourceHi is a contained mem-op). Similarly for GT_RSH_LO, sourceLo could be marked as
+ // contained memory-op. Even if not a memory-op, we could mark it as reg-optional.
+ genCodeForShiftLong(treeNode);
+ break;
+#endif
+ case GT_CAST:
if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1))
{
// Casts float/double <--> double/float
@@ -2037,7 +1414,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH))
{
- assert((treeNode->InReg()) || (treeNode->gtFlags & GTF_SPILLED));
+ assert(treeNode->InReg() || (treeNode->gtFlags & GTF_SPILLED));
}
// If this is a register candidate that has been spilled, genConsumeReg() will
@@ -2047,6 +1424,15 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
{
assert(!isRegCandidate);
+#if defined(FEATURE_SIMD) && defined(_TARGET_X86_)
+ // Loading of TYP_SIMD12 (i.e. Vector3) variable
+ if (treeNode->TypeGet() == TYP_SIMD12)
+ {
+ genLoadLclTypeSIMD12(treeNode);
+ break;
+ }
+#endif // defined(FEATURE_SIMD) && defined(_TARGET_X86_)
+
emit->emitIns_R_S(ins_Load(treeNode->TypeGet(), compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)),
emitTypeSize(treeNode), treeNode->gtRegNum, lcl->gtLclNum, 0);
genProduceReg(treeNode);
@@ -2075,7 +1461,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
// Loading of TYP_SIMD12 (i.e. Vector3) field
if (treeNode->TypeGet() == TYP_SIMD12)
{
- genLoadLclFldTypeSIMD12(treeNode);
+ genLoadLclTypeSIMD12(treeNode);
break;
}
#endif
@@ -2243,6 +1629,9 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
break;
case GT_MULHI:
+#ifdef _TARGET_X86_
+ case GT_MUL_LONG:
+#endif
genCodeForMulHi(treeNode->AsOp());
genProduceReg(treeNode);
break;
@@ -2408,18 +1797,18 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
// X86 Long comparison
else if (varTypeIsLong(op1Type))
{
- // When not materializing the result in a register, the compare logic is generated
- // when we generate the GT_JTRUE.
- if (treeNode->gtRegNum != REG_NA)
- {
- genCompareLong(treeNode);
- }
- else
- {
- // We generate the compare when we generate the GT_JTRUE, but we need to consume
- // the operands now.
- genConsumeOperands(treeNode->AsOp());
- }
+#ifdef DEBUG
+ // The result of an unlowered long compare on a 32-bit target must either be
+ // a) materialized into a register, or
+ // b) unused.
+ //
+ // A long compare that has a result that is used but not materialized into a register should
+ // have been handled by Lowering::LowerCompare.
+
+ LIR::Use use;
+ assert((treeNode->gtRegNum != REG_NA) || !LIR::AsRange(compiler->compCurBB).TryGetUse(treeNode, &use));
+#endif
+ genCompareLong(treeNode);
}
#endif // !defined(_TARGET_64BIT_)
else
@@ -2437,52 +1826,60 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
#if !defined(_TARGET_64BIT_)
- // For long compares, we emit special logic
- if (varTypeIsLong(cmp->gtGetOp1()))
- {
- genJTrueLong(cmp);
- }
- else
+ // Long-typed compares should have been handled by Lowering::LowerCompare.
+ assert(!varTypeIsLong(cmp->gtGetOp1()));
#endif
- {
- // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp
- // is governed by a flag NOT by the inherent type of the node
- // TODO-XArch-CQ: Check if we can use the currently set flags.
- emitJumpKind jumpKind[2];
- bool branchToTrueLabel[2];
- genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel);
- BasicBlock* skipLabel = nullptr;
- if (jumpKind[0] != EJ_NONE)
- {
- BasicBlock* jmpTarget;
- if (branchToTrueLabel[0])
- {
- jmpTarget = compiler->compCurBB->bbJumpDest;
- }
- else
- {
- // This case arises only for ordered GT_EQ right now
- assert((cmp->gtOper == GT_EQ) && ((cmp->gtFlags & GTF_RELOP_NAN_UN) == 0));
- skipLabel = genCreateTempLabel();
- jmpTarget = skipLabel;
- }
-
- inst_JMP(jumpKind[0], jmpTarget);
- }
+ // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp
+ // is governed by a flag NOT by the inherent type of the node
+ // TODO-XArch-CQ: Check if we can use the currently set flags.
+ emitJumpKind jumpKind[2];
+ bool branchToTrueLabel[2];
+ genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel);
- if (jumpKind[1] != EJ_NONE)
+ BasicBlock* skipLabel = nullptr;
+ if (jumpKind[0] != EJ_NONE)
+ {
+ BasicBlock* jmpTarget;
+ if (branchToTrueLabel[0])
{
- // the second conditional branch always has to be to the true label
- assert(branchToTrueLabel[1]);
- inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest);
+ jmpTarget = compiler->compCurBB->bbJumpDest;
}
-
- if (skipLabel != nullptr)
+ else
{
- genDefineTempLabel(skipLabel);
+ // This case arises only for ordered GT_EQ right now
+ assert((cmp->gtOper == GT_EQ) && ((cmp->gtFlags & GTF_RELOP_NAN_UN) == 0));
+ skipLabel = genCreateTempLabel();
+ jmpTarget = skipLabel;
}
+
+ inst_JMP(jumpKind[0], jmpTarget);
+ }
+
+ if (jumpKind[1] != EJ_NONE)
+ {
+ // the second conditional branch always has to be to the true label
+ assert(branchToTrueLabel[1]);
+ inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest);
}
+
+ if (skipLabel != nullptr)
+ {
+ genDefineTempLabel(skipLabel);
+ }
+ }
+ break;
+
+ case GT_JCC:
+ {
+ GenTreeJumpCC* jcc = treeNode->AsJumpCC();
+
+ assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
+
+ CompareKind compareKind = ((jcc->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
+ emitJumpKind jumpKind = genJumpKindForOper(jcc->gtCondition, compareKind);
+
+ inst_JMP(jumpKind, compiler->compCurBB->bbJumpDest);
}
break;
@@ -2572,12 +1969,13 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
break;
case GT_LIST:
+ case GT_FIELD_LIST:
case GT_ARGPLACE:
// Nothing to do
break;
case GT_PUTARG_STK:
- genPutArgStk(treeNode);
+ genPutArgStk(treeNode->AsPutArgStk());
break;
case GT_PUTARG_REG:
@@ -2608,7 +2006,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_LOCKADD:
case GT_XCHG:
case GT_XADD:
- genLockedInstructions(treeNode);
+ genLockedInstructions(treeNode->AsOp());
break;
case GT_MEMORYBARRIER:
@@ -2795,7 +2193,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
{
#ifdef DEBUG
char message[256];
- sprintf(message, "Unimplemented node type %s\n", GenTree::NodeName(treeNode->OperGet()));
+ _snprintf_s(message, _countof(message), _TRUNCATE, "Unimplemented node type %s\n",
+ GenTree::NodeName(treeNode->OperGet()));
#endif
assert(!"Unknown node in codegen");
}
@@ -3330,8 +2729,10 @@ ALLOC_DONE:
BAILOUT:
// Write the lvaLocAllocSPvar stack frame slot
- noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM);
- getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
+ if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM)
+ {
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
+ }
#if STACK_PROBES
if (compiler->opts.compNeedStackProbes)
@@ -3356,10 +2757,15 @@ BAILOUT:
void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode)
{
+#ifdef JIT32_GCENCODER
+ assert(!storeBlkNode->gtBlkOpGcUnsafe);
+#else
if (storeBlkNode->gtBlkOpGcUnsafe)
{
getEmitter()->emitDisableGC();
}
+#endif // JIT32_GCENCODER
+
bool isCopyBlk = storeBlkNode->OperIsCopyBlkOp();
switch (storeBlkNode->gtBlkOpKind)
@@ -3399,23 +2805,40 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode)
default:
unreached();
}
+
+#ifndef JIT32_GCENCODER
if (storeBlkNode->gtBlkOpGcUnsafe)
{
getEmitter()->emitEnableGC();
}
+#endif // !defined(JIT32_GCENCODER)
}
-// Generate code for InitBlk using rep stos.
+//
+//------------------------------------------------------------------------
+// genCodeForInitBlkRepStos: Generate code for InitBlk using rep stos.
+//
+// Arguments:
+// initBlkNode - The Block store for which we are generating code.
+//
// Preconditions:
-// The size of the buffers must be a constant and also less than INITBLK_STOS_LIMIT bytes.
-// Any value larger than that, we'll use the helper even if both the
-// fill byte and the size are integer constants.
+// On x64:
+// The size of the buffers must be a constant and also less than INITBLK_STOS_LIMIT bytes.
+// Any value larger than that, we'll use the helper even if both the fill byte and the
+// size are integer constants.
+// On x86:
+// The size must either be a non-constant or less than INITBLK_STOS_LIMIT bytes.
+//
void CodeGen::genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode)
{
- // Make sure we got the arguments of the initblk/initobj operation in the right registers
+ // Make sure we got the arguments of the initblk/initobj operation in the right registers.
unsigned size = initBlkNode->Size();
GenTreePtr dstAddr = initBlkNode->Addr();
GenTreePtr initVal = initBlkNode->Data();
+ if (initVal->OperIsInitVal())
+ {
+ initVal = initVal->gtGetOp1();
+ }
#ifdef DEBUG
assert(!dstAddr->isContained());
@@ -3428,7 +2851,8 @@ void CodeGen::genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode)
#ifdef _TARGET_AMD64_
assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT);
#else
- assert(size > CPBLK_UNROLL_LIMIT);
+ // Note that a size of zero means a non-constant size.
+ assert((size == 0) || (size > CPBLK_UNROLL_LIMIT));
#endif
}
@@ -3449,9 +2873,13 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
unsigned size = initBlkNode->Size();
GenTreePtr dstAddr = initBlkNode->Addr();
GenTreePtr initVal = initBlkNode->Data();
+ if (initVal->OperIsInitVal())
+ {
+ initVal = initVal->gtGetOp1();
+ }
assert(!dstAddr->isContained());
- assert(!initVal->isContained());
+ assert(!initVal->isContained() || (initVal->IsIntegralConst(0) && ((size & 0xf) == 0)));
assert(size != 0);
assert(size <= INITBLK_UNROLL_LIMIT);
assert(initVal->gtSkipReloadOrCopy()->IsCnsIntOrI());
@@ -3512,9 +2940,11 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
offset += 4;
emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
offset += 4;
-#else // !_TARGET_X86_
+#else // !_TARGET_X86_
+
emit->emitIns_AR_R(INS_mov, EA_8BYTE, valReg, dstAddr->gtRegNum, offset);
offset += 8;
+
#endif // !_TARGET_X86_
}
if ((size & 4) != 0)
@@ -3544,6 +2974,10 @@ void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode)
unsigned blockSize = initBlkNode->Size();
GenTreePtr dstAddr = initBlkNode->Addr();
GenTreePtr initVal = initBlkNode->Data();
+ if (initVal->OperIsInitVal())
+ {
+ initVal = initVal->gtGetOp1();
+ }
assert(!dstAddr->isContained());
assert(!initVal->isContained());
@@ -3760,21 +3194,145 @@ void CodeGen::genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode)
instGen(INS_r_movsb);
}
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
+//------------------------------------------------------------------------
+// CodeGen::genMove8IfNeeded: Conditionally move 8 bytes of a struct to the argument area
+//
+// Arguments:
+// size - The size of bytes remaining to be moved
+// longTmpReg - The tmp register to be used for the long value
+// srcAddr - The address of the source struct
+// offset - The current offset being copied
+//
+// Return Value:
+// Returns the number of bytes moved (8 or 0).
+//
+// Notes:
+// This is used in the PutArgStkKindUnroll case, to move any bytes that are
+// not an even multiple of 16.
+// On x86, longTmpReg must be an xmm reg; on x64 it must be an integer register.
+// This is checked by genStoreRegToStackArg.
+//
+int CodeGen::genMove8IfNeeded(unsigned size, regNumber longTmpReg, GenTree* srcAddr, unsigned offset)
+{
+#ifdef _TARGET_X86_
+ instruction longMovIns = INS_movq;
+#else // !_TARGET_X86_
+ instruction longMovIns = INS_mov;
+#endif // !_TARGET_X86_
+ if ((size & 8) != 0)
+ {
+ genCodeForLoadOffset(longMovIns, EA_8BYTE, longTmpReg, srcAddr, offset);
+ genStoreRegToStackArg(TYP_LONG, longTmpReg, offset);
+ return 8;
+ }
+ return 0;
+}
+
+//------------------------------------------------------------------------
+// CodeGen::genMove4IfNeeded: Conditionally move 4 bytes of a struct to the argument area
+//
+// Arguments:
+// size - The size of bytes remaining to be moved
+// intTmpReg - The tmp register to be used for the long value
+// srcAddr - The address of the source struct
+// offset - The current offset being copied
+//
+// Return Value:
+// Returns the number of bytes moved (4 or 0).
+//
+// Notes:
+// This is used in the PutArgStkKindUnroll case, to move any bytes that are
+// not an even multiple of 16.
+// intTmpReg must be an integer register.
+// This is checked by genStoreRegToStackArg.
+//
+int CodeGen::genMove4IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset)
+{
+ if ((size & 4) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_4BYTE, intTmpReg, srcAddr, offset);
+ genStoreRegToStackArg(TYP_INT, intTmpReg, offset);
+ return 4;
+ }
+ return 0;
+}
+
+//------------------------------------------------------------------------
+// CodeGen::genMove2IfNeeded: Conditionally move 2 bytes of a struct to the argument area
+//
+// Arguments:
+// size - The size of bytes remaining to be moved
+// intTmpReg - The tmp register to be used for the long value
+// srcAddr - The address of the source struct
+// offset - The current offset being copied
+//
+// Return Value:
+// Returns the number of bytes moved (2 or 0).
+//
+// Notes:
+// This is used in the PutArgStkKindUnroll case, to move any bytes that are
+// not an even multiple of 16.
+// intTmpReg must be an integer register.
+// This is checked by genStoreRegToStackArg.
+//
+int CodeGen::genMove2IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset)
+{
+ if ((size & 2) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_2BYTE, intTmpReg, srcAddr, offset);
+ genStoreRegToStackArg(TYP_SHORT, intTmpReg, offset);
+ return 2;
+ }
+ return 0;
+}
+
+//------------------------------------------------------------------------
+// CodeGen::genMove1IfNeeded: Conditionally move 1 byte of a struct to the argument area
+//
+// Arguments:
+// size - The size of bytes remaining to be moved
+// intTmpReg - The tmp register to be used for the long value
+// srcAddr - The address of the source struct
+// offset - The current offset being copied
+//
+// Return Value:
+// Returns the number of bytes moved (1 or 0).
+//
+// Notes:
+// This is used in the PutArgStkKindUnroll case, to move any bytes that are
+// not an even multiple of 16.
+// intTmpReg must be an integer register.
+// This is checked by genStoreRegToStackArg.
+//
+int CodeGen::genMove1IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset)
+{
+
+ if ((size & 1) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_1BYTE, intTmpReg, srcAddr, offset);
+ genStoreRegToStackArg(TYP_BYTE, intTmpReg, offset);
+ return 1;
+ }
+ return 0;
+}
//---------------------------------------------------------------------------------------------------------------//
// genStructPutArgUnroll: Generates code for passing a struct arg on stack by value using loop unrolling.
//
// Arguments:
// putArgNode - the PutArgStk tree.
-// baseVarNum - the base var number, relative to which the by-val struct will be copied on the stack.
+//
+// Notes:
+// m_stkArgVarNum must be set to the base var number, relative to which the by-val struct will be copied to the
+// stack.
//
// TODO-Amd64-Unix: Try to share code with copyblk.
// Need refactoring of copyblk before it could be used for putarg_stk.
// The difference for now is that a putarg_stk contains its children, while cpyblk does not.
// This creates differences in code. After some significant refactoring it could be reused.
//
-void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseVarNum)
+void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode)
{
// We will never call this method for SIMD types, which are stored directly
// in genPutStructArgStk().
@@ -3801,14 +3359,43 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseV
unsigned offset = 0;
+ regNumber xmmTmpReg = REG_NA;
+ regNumber intTmpReg = REG_NA;
+ regNumber longTmpReg = REG_NA;
+#ifdef _TARGET_X86_
+ // On x86 we use an XMM register for both 16 and 8-byte chunks, but if it's
+ // less than 16 bytes, we will just be using pushes
+ if (size >= 8)
+ {
+ xmmTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT);
+ longTmpReg = xmmTmpReg;
+ }
+ if ((size & 0x7) != 0)
+ {
+ intTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT);
+ }
+#else // !_TARGET_X86_
+ // On x64 we use an XMM register only for 16-byte chunks.
+ if (size >= XMM_REGSIZE_BYTES)
+ {
+ xmmTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT);
+ }
+ if ((size & 0xf) != 0)
+ {
+ intTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT);
+ longTmpReg = intTmpReg;
+ }
+#endif // !_TARGET_X86_
+
// If the size of this struct is larger than 16 bytes
// let's use SSE2 to be able to do 16 byte at a time
// loads and stores.
if (size >= XMM_REGSIZE_BYTES)
{
+#ifdef _TARGET_X86_
+ assert(!m_pushStkArg);
+#endif // _TARGET_X86_
assert(putArgNode->gtRsvdRegs != RBM_NONE);
- regNumber xmmReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT);
- assert(genIsValidFloatReg(xmmReg));
size_t slots = size / XMM_REGSIZE_BYTES;
assert(putArgNode->gtGetOp1()->isContained());
@@ -3820,11 +3407,10 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseV
while (slots-- > 0)
{
// Load
- genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, src->gtGetOp1(),
- offset); // Load the address of the child of the Obj node.
+ genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmTmpReg, src->gtGetOp1(), offset);
// Store
- emit->emitIns_S_R(INS_movdqu, EA_8BYTE, xmmReg, baseVarNum, putArgOffset + offset);
+ genStoreRegToStackArg(TYP_STRUCT, xmmTmpReg, offset);
offset += XMM_REGSIZE_BYTES;
}
@@ -3833,41 +3419,29 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseV
// Fill the remainder (15 bytes or less) if there's one.
if ((size & 0xf) != 0)
{
- // Grab the integer temp register to emit the remaining loads and stores.
- regNumber tmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT);
- assert(genIsValidIntReg(tmpReg));
-
- if ((size & 8) != 0)
- {
- genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, src->gtOp.gtOp1, offset);
-
- emit->emitIns_S_R(INS_mov, EA_8BYTE, tmpReg, baseVarNum, putArgOffset + offset);
-
- offset += 8;
- }
-
- if ((size & 4) != 0)
- {
- genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, src->gtOp.gtOp1, offset);
-
- emit->emitIns_S_R(INS_mov, EA_4BYTE, tmpReg, baseVarNum, putArgOffset + offset);
-
- offset += 4;
- }
-
- if ((size & 2) != 0)
- {
- genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, src->gtOp.gtOp1, offset);
-
- emit->emitIns_S_R(INS_mov, EA_2BYTE, tmpReg, baseVarNum, putArgOffset + offset);
-
- offset += 2;
+#ifdef _TARGET_X86_
+ if (m_pushStkArg)
+ {
+ // This case is currently supported only for the case where the total size is
+ // less than XMM_REGSIZE_BYTES. We need to push the remaining chunks in reverse
+ // order. However, morph has ensured that we have a struct that is an even
+ // multiple of TARGET_POINTER_SIZE, so we don't need to worry about alignment.
+ assert(((size & 0xc) == size) && (offset == 0));
+ // If we have a 4 byte chunk, load it from either offset 0 or 8, depending on
+ // whether we've got an 8 byte chunk, and then push it on the stack.
+ unsigned pushedBytes = genMove4IfNeeded(size, intTmpReg, src->gtOp.gtOp1, size & 0x8);
+ // Now if we have an 8 byte chunk, load it from offset 0 (it's the first chunk)
+ // and push it on the stack.
+ pushedBytes += genMove8IfNeeded(size, longTmpReg, src->gtOp.gtOp1, 0);
}
-
- if ((size & 1) != 0)
+ else
+#endif // _TARGET_X86_
{
- genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, src->gtOp.gtOp1, offset);
- emit->emitIns_S_R(INS_mov, EA_1BYTE, tmpReg, baseVarNum, putArgOffset + offset);
+ offset += genMove8IfNeeded(size, longTmpReg, src->gtOp.gtOp1, offset);
+ offset += genMove4IfNeeded(size, intTmpReg, src->gtOp.gtOp1, offset);
+ offset += genMove2IfNeeded(size, intTmpReg, src->gtOp.gtOp1, offset);
+ offset += genMove1IfNeeded(size, intTmpReg, src->gtOp.gtOp1, offset);
+ assert(offset == size);
}
}
}
@@ -3877,17 +3451,16 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseV
//
// Arguments:
// putArgNode - the PutArgStk tree.
-// baseVarNum - the base var number, relative to which the by-val struct bits will go.
//
// Preconditions:
// The size argument of the PutArgStk (for structs) is a constant and is between
// CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes.
+// m_stkArgVarNum must be set to the base var number, relative to which the by-val struct bits will go.
//
-void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode, unsigned baseVarNum)
+void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode)
{
assert(putArgNode->TypeGet() == TYP_STRUCT);
assert(putArgNode->getArgSize() > CPBLK_UNROLL_LIMIT);
- assert(baseVarNum != BAD_VAR_NUM);
// Make sure we got the arguments of the cpblk operation in the right registers
GenTreePtr dstAddr = putArgNode;
@@ -3897,7 +3470,7 @@ void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode, unsigned base
assert(putArgNode->gtRsvdRegs == (RBM_RDI | RBM_RCX | RBM_RSI));
assert(srcAddr->isContained());
- genConsumePutStructArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX, baseVarNum);
+ genConsumePutStructArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX);
instGen(INS_r_movsb);
}
@@ -3906,12 +3479,14 @@ void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode, unsigned base
// must be cleared to zeroes. The native compiler doesn't clear the upper bits
// and there is no way to know if the caller is native or not. So, the upper
// 32 bits of Vector argument on stack are always cleared to zero.
-#ifdef FEATURE_SIMD
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
void CodeGen::genClearStackVec3ArgUpperBits()
{
#ifdef DEBUG
if (verbose)
+ {
printf("*************** In genClearStackVec3ArgUpperBits()\n");
+ }
#endif
assert(compiler->compGeneratingProlog);
@@ -3948,12 +3523,13 @@ void CodeGen::genClearStackVec3ArgUpperBits()
}
}
}
-#endif // FEATURE_SIMD
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+#endif // FEATURE_PUT_STRUCT_ARG_STK
// Generate code for CpObj nodes wich copy structs that have interleaved
// GC pointers.
-// This will generate a sequence of movsq instructions for the cases of non-gc members
+// This will generate a sequence of movsp instructions for the cases of non-gc members.
+// Note that movsp is an alias for movsd on x86 and movsq on x64.
// and calls to the BY_REF_ASSIGN helper otherwise.
void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
{
@@ -3961,6 +3537,7 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
GenTreePtr dstAddr = cpObjNode->Addr();
GenTreePtr source = cpObjNode->Data();
GenTreePtr srcAddr = nullptr;
+ var_types srcAddrType = TYP_BYREF;
bool sourceIsLocal = false;
assert(source->isContained());
@@ -3973,24 +3550,12 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
{
noway_assert(source->IsLocal());
sourceIsLocal = true;
- // TODO: Consider making the addrForm() method in Rationalize public, e.g. in GenTree.
- // OR: transform source to GT_IND(GT_LCL_VAR_ADDR)
- if (source->OperGet() == GT_LCL_VAR)
- {
- source->SetOper(GT_LCL_VAR_ADDR);
- }
- else
- {
- assert(source->OperGet() == GT_LCL_FLD);
- source->SetOper(GT_LCL_FLD_ADDR);
- }
- srcAddr = source;
}
bool dstOnStack = dstAddr->OperIsLocalAddr();
#ifdef DEBUG
- bool isRepMovsqUsed = false;
+ bool isRepMovspUsed = false;
assert(!dstAddr->isContained());
@@ -3998,44 +3563,40 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
// with CpObj, so this requires special logic.
assert(cpObjNode->gtGcPtrCount > 0);
- // MovSq instruction is used for copying non-gcref fields and it needs
- // src = RSI and dst = RDI.
+ // MovSp (alias for movsq on x64 and movsd on x86) instruction is used for copying non-gcref fields
+ // and it needs src = RSI and dst = RDI.
// Either these registers must not contain lclVars, or they must be dying or marked for spill.
// This is because these registers are incremented as we go through the struct.
- GenTree* actualSrcAddr = srcAddr->gtSkipReloadOrCopy();
- GenTree* actualDstAddr = dstAddr->gtSkipReloadOrCopy();
- unsigned srcLclVarNum = BAD_VAR_NUM;
- unsigned dstLclVarNum = BAD_VAR_NUM;
- bool isSrcAddrLiveOut = false;
- bool isDstAddrLiveOut = false;
- if (genIsRegCandidateLocal(actualSrcAddr))
- {
- srcLclVarNum = actualSrcAddr->AsLclVarCommon()->gtLclNum;
- isSrcAddrLiveOut = ((actualSrcAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
- }
- if (genIsRegCandidateLocal(actualDstAddr))
- {
- dstLclVarNum = actualDstAddr->AsLclVarCommon()->gtLclNum;
- isDstAddrLiveOut = ((actualDstAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
- }
- assert((actualSrcAddr->gtRegNum != REG_RSI) || !isSrcAddrLiveOut ||
- ((srcLclVarNum == dstLclVarNum) && !isDstAddrLiveOut));
- assert((actualDstAddr->gtRegNum != REG_RDI) || !isDstAddrLiveOut ||
- ((srcLclVarNum == dstLclVarNum) && !isSrcAddrLiveOut));
+ if (!sourceIsLocal)
+ {
+ GenTree* actualSrcAddr = srcAddr->gtSkipReloadOrCopy();
+ GenTree* actualDstAddr = dstAddr->gtSkipReloadOrCopy();
+ unsigned srcLclVarNum = BAD_VAR_NUM;
+ unsigned dstLclVarNum = BAD_VAR_NUM;
+ bool isSrcAddrLiveOut = false;
+ bool isDstAddrLiveOut = false;
+ if (genIsRegCandidateLocal(actualSrcAddr))
+ {
+ srcLclVarNum = actualSrcAddr->AsLclVarCommon()->gtLclNum;
+ isSrcAddrLiveOut = ((actualSrcAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
+ }
+ if (genIsRegCandidateLocal(actualDstAddr))
+ {
+ dstLclVarNum = actualDstAddr->AsLclVarCommon()->gtLclNum;
+ isDstAddrLiveOut = ((actualDstAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
+ }
+ assert((actualSrcAddr->gtRegNum != REG_RSI) || !isSrcAddrLiveOut ||
+ ((srcLclVarNum == dstLclVarNum) && !isDstAddrLiveOut));
+ assert((actualDstAddr->gtRegNum != REG_RDI) || !isDstAddrLiveOut ||
+ ((srcLclVarNum == dstLclVarNum) && !isSrcAddrLiveOut));
+ srcAddrType = srcAddr->TypeGet();
+ }
#endif // DEBUG
- // Consume these registers.
+ // Consume the operands and get them into the right registers.
// They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
- if (sourceIsLocal)
- {
- inst_RV_TT(INS_lea, REG_RSI, source, 0, EA_BYREF);
- genConsumeBlockOp(cpObjNode, REG_RDI, REG_NA, REG_NA);
- }
- else
- {
- genConsumeBlockOp(cpObjNode, REG_RDI, REG_RSI, REG_NA);
- }
- gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddr->TypeGet());
+ genConsumeBlockOp(cpObjNode, REG_RDI, REG_RSI, REG_NA);
+ gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddrType);
gcInfo.gcMarkRegPtrVal(REG_RDI, dstAddr->TypeGet());
unsigned slots = cpObjNode->gtSlots;
@@ -4046,23 +3607,23 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
if (slots >= CPOBJ_NONGC_SLOTS_LIMIT)
{
#ifdef DEBUG
- // If the destination of the CpObj is on the stack
- // make sure we allocated RCX to emit rep movsq.
- regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT);
- assert(tmpReg == REG_RCX);
- isRepMovsqUsed = true;
+ // If the destination of the CpObj is on the stack, make sure we allocated
+ // RCX to emit the movsp (alias for movsd or movsq for 32 and 64 bits respectively).
+ assert((cpObjNode->gtRsvdRegs & RBM_RCX) != 0);
+ regNumber tmpReg = REG_RCX;
+ isRepMovspUsed = true;
#endif // DEBUG
getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, slots);
- instGen(INS_r_movsq);
+ instGen(INS_r_movsp);
}
else
{
- // For small structs, it's better to emit a sequence of movsq than to
- // emit a rep movsq instruction.
+ // For small structs, it's better to emit a sequence of movsp than to
+ // emit a rep movsp instruction.
while (slots > 0)
{
- instGen(INS_movsq);
+ instGen(INS_movsp);
slots--;
}
}
@@ -4078,7 +3639,7 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
switch (gcPtrs[i])
{
case TYPE_GC_NONE:
- // Let's see if we can use rep movsq instead of a sequence of movsq instructions
+ // Let's see if we can use rep movsp instead of a sequence of movsp instructions
// to save cycles and code size.
{
unsigned nonGcSlotCount = 0;
@@ -4090,12 +3651,12 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
} while (i < slots && gcPtrs[i] == TYPE_GC_NONE);
// If we have a very small contiguous non-gc region, it's better just to
- // emit a sequence of movsq instructions
+ // emit a sequence of movsp instructions
if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
{
while (nonGcSlotCount > 0)
{
- instGen(INS_movsq);
+ instGen(INS_movsp);
nonGcSlotCount--;
}
}
@@ -4103,13 +3664,13 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
{
#ifdef DEBUG
// Otherwise, we can save code-size and improve CQ by emitting
- // rep movsq
- regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT);
- assert(tmpReg == REG_RCX);
- isRepMovsqUsed = true;
+ // rep movsp (alias for movsd/movsq for x86/x64)
+ assert((cpObjNode->gtRsvdRegs & RBM_RCX) != 0);
+ regNumber tmpReg = REG_RCX;
+ isRepMovspUsed = true;
#endif // DEBUG
getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
- instGen(INS_r_movsq);
+ instGen(INS_r_movsp);
}
}
break;
@@ -4235,7 +3796,7 @@ void CodeGen::genJumpTable(GenTree* treeNode)
// generate code for the locked operations:
// GT_LOCKADD, GT_XCHG, GT_XADD
-void CodeGen::genLockedInstructions(GenTree* treeNode)
+void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
{
GenTree* data = treeNode->gtOp.gtOp2;
GenTree* addr = treeNode->gtOp.gtOp1;
@@ -4244,11 +3805,6 @@ void CodeGen::genLockedInstructions(GenTree* treeNode)
regNumber addrReg = addr->gtRegNum;
instruction ins;
- // all of these nodes implicitly do an indirection on op1
- // so create a temporary node to feed into the pattern matching
- GenTreeIndir i = indirForm(data->TypeGet(), addr);
- genConsumeReg(addr);
-
// The register allocator should have extended the lifetime of the address
// so that it is not used as the target.
noway_assert(addrReg != targetReg);
@@ -4258,7 +3814,7 @@ void CodeGen::genLockedInstructions(GenTree* treeNode)
assert(targetReg != REG_NA || treeNode->OperGet() == GT_LOCKADD || !genIsRegCandidateLocal(data) ||
(data->gtFlags & GTF_VAR_DEATH) != 0);
- genConsumeIfReg(data);
+ genConsumeOperands(treeNode);
if (targetReg != REG_NA && dataReg != REG_NA && dataReg != targetReg)
{
inst_RV_RV(ins_Copy(data->TypeGet()), targetReg, dataReg);
@@ -4284,6 +3840,10 @@ void CodeGen::genLockedInstructions(GenTree* treeNode)
default:
unreached();
}
+
+ // all of these nodes implicitly do an indirection on op1
+ // so create a temporary node to feed into the pattern matching
+ GenTreeIndir i = indirForm(data->TypeGet(), addr);
getEmitter()->emitInsBinary(ins, emitTypeSize(data), &i, data);
if (treeNode->gtRegNum != REG_NA)
@@ -4459,22 +4019,22 @@ void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
GenTreePtr arrObj = arrOffset->gtArrObj;
regNumber tgtReg = arrOffset->gtRegNum;
-
- noway_assert(tgtReg != REG_NA);
+ assert(tgtReg != REG_NA);
unsigned dim = arrOffset->gtCurrDim;
unsigned rank = arrOffset->gtArrRank;
var_types elemType = arrOffset->gtArrElemType;
- // We will use a temp register for the offset*scale+effectiveIndex computation.
- regMaskTP tmpRegMask = arrOffset->gtRsvdRegs;
- regNumber tmpReg = genRegNumFromMask(tmpRegMask);
-
// First, consume the operands in the correct order.
regNumber offsetReg = REG_NA;
+ regNumber tmpReg = REG_NA;
if (!offsetNode->IsIntegralConst(0))
{
offsetReg = genConsumeReg(offsetNode);
+
+ // We will use a temp register for the offset*scale+effectiveIndex computation.
+ regMaskTP tmpRegMask = arrOffset->gtRsvdRegs;
+ tmpReg = genRegNumFromMask(tmpRegMask);
}
else
{
@@ -4495,6 +4055,9 @@ void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
if (!offsetNode->IsIntegralConst(0))
{
+ assert(tmpReg != REG_NA);
+ assert(arrReg != REG_NA);
+
// Evaluate tgtReg = offsetReg*dim_size + indexReg.
// tmpReg is used to load dim_size and the result of the multiplication.
// Note that dim_size will never be negative.
@@ -4617,6 +4180,12 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
case GT_SUB_HI:
ins = INS_sbb;
break;
+ case GT_LSH_HI:
+ ins = INS_shld;
+ break;
+ case GT_RSH_LO:
+ ins = INS_shrd;
+ break;
#endif // !defined(_TARGET_64BIT_)
default:
unreached();
@@ -4654,6 +4223,7 @@ void CodeGen::genCodeForShift(GenTreePtr tree)
regNumber operandReg = operand->gtRegNum;
GenTreePtr shiftBy = tree->gtGetOp2();
+
if (shiftBy->isContainedIntOrIImmed())
{
// First, move the operand to the destination register and
@@ -4672,12 +4242,7 @@ void CodeGen::genCodeForShift(GenTreePtr tree)
// We must have the number of bits to shift stored in ECX, since we constrained this node to
// sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single
// register destination requirement.
- regNumber shiftReg = shiftBy->gtRegNum;
- if (shiftReg != REG_RCX)
- {
- // Issue the mov to RCX:
- inst_RV_RV(INS_mov, REG_RCX, shiftReg, shiftBy->TypeGet());
- }
+ genCopyRegIfNeeded(shiftBy, REG_RCX);
// The operand to be shifted must not be in ECX
noway_assert(operandReg != REG_RCX);
@@ -4692,6 +4257,67 @@ void CodeGen::genCodeForShift(GenTreePtr tree)
genProduceReg(tree);
}
+#ifdef _TARGET_X86_
+//------------------------------------------------------------------------
+// genCodeForShiftLong: Generates the code sequence for a GenTree node that
+// represents a three operand bit shift or rotate operation (<<Hi, >>Lo).
+//
+// Arguments:
+// tree - the bit shift node (that specifies the type of bit shift to perform).
+//
+// Assumptions:
+// a) All GenTrees are register allocated.
+// b) The shift-by-amount in tree->gtOp.gtOp2 is a contained constant
+//
+void CodeGen::genCodeForShiftLong(GenTreePtr tree)
+{
+ // Only the non-RMW case here.
+ genTreeOps oper = tree->OperGet();
+ assert(oper == GT_LSH_HI || oper == GT_RSH_LO);
+
+ GenTree* operand = tree->gtOp.gtOp1;
+ assert(operand->OperGet() == GT_LONG);
+ assert(!operand->gtOp.gtOp1->isContained());
+ assert(!operand->gtOp.gtOp2->isContained());
+
+ GenTree* operandLo = operand->gtGetOp1();
+ GenTree* operandHi = operand->gtGetOp2();
+
+ regNumber regLo = operandLo->gtRegNum;
+ regNumber regHi = operandHi->gtRegNum;
+
+ genConsumeOperands(tree->AsOp());
+
+ var_types targetType = tree->TypeGet();
+ instruction ins = genGetInsForOper(oper, targetType);
+
+ GenTreePtr shiftBy = tree->gtGetOp2();
+
+ assert(shiftBy->isContainedIntOrIImmed());
+
+ unsigned int count = shiftBy->AsIntConCommon()->IconValue();
+
+ regNumber regResult = (oper == GT_LSH_HI) ? regHi : regLo;
+
+ if (regResult != tree->gtRegNum)
+ {
+ inst_RV_RV(INS_mov, tree->gtRegNum, regResult, targetType);
+ }
+
+ if (oper == GT_LSH_HI)
+ {
+ inst_RV_RV_IV(ins, emitTypeSize(targetType), tree->gtRegNum, regLo, count);
+ }
+ else
+ {
+ assert(oper == GT_RSH_LO);
+ inst_RV_RV_IV(ins, emitTypeSize(targetType), tree->gtRegNum, regHi, count);
+ }
+
+ genProduceReg(tree);
+}
+#endif
+
//------------------------------------------------------------------------
// genCodeForShiftRMW: Generates the code sequence for a GT_STOREIND GenTree node that
// represents a RMW bit shift or rotate operation (<<, >>, >>>, rol, ror), for example:
@@ -4739,182 +4365,13 @@ void CodeGen::genCodeForShiftRMW(GenTreeStoreInd* storeInd)
// sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single
// register destination requirement.
regNumber shiftReg = shiftBy->gtRegNum;
- if (shiftReg != REG_RCX)
- {
- // Issue the mov to RCX:
- inst_RV_RV(INS_mov, REG_RCX, shiftReg, shiftBy->TypeGet());
- }
+ genCopyRegIfNeeded(shiftBy, REG_RCX);
// The shiftBy operand is implicit, so call the unary version of emitInsRMW.
getEmitter()->emitInsRMW(ins, attr, storeInd);
}
}
-void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
-{
- regNumber dstReg = tree->gtRegNum;
- GenTree* unspillTree = tree;
-
- if (tree->gtOper == GT_RELOAD)
- {
- unspillTree = tree->gtOp.gtOp1;
- }
-
- if ((unspillTree->gtFlags & GTF_SPILLED) != 0)
- {
- if (genIsRegCandidateLocal(unspillTree))
- {
- // Reset spilled flag, since we are going to load a local variable from its home location.
- unspillTree->gtFlags &= ~GTF_SPILLED;
-
- GenTreeLclVarCommon* lcl = unspillTree->AsLclVarCommon();
- LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
-
- // Load local variable from its home location.
- // In most cases the tree type will indicate the correct type to use for the load.
- // However, if it is NOT a normalizeOnLoad lclVar (i.e. NOT a small int that always gets
- // widened when loaded into a register), and its size is not the same as genActualType of
- // the type of the lclVar, then we need to change the type of the tree node when loading.
- // This situation happens due to "optimizations" that avoid a cast and
- // simply retype the node when using long type lclVar as an int.
- // While loading the int in that case would work for this use of the lclVar, if it is
- // later used as a long, we will have incorrectly truncated the long.
- // In the normalizeOnLoad case ins_Load will return an appropriate sign- or zero-
- // extending load.
-
- var_types treeType = unspillTree->TypeGet();
- if (treeType != genActualType(varDsc->lvType) && !varTypeIsGC(treeType) && !varDsc->lvNormalizeOnLoad())
- {
- assert(!varTypeIsGC(varDsc));
- var_types spillType = genActualType(varDsc->lvType);
- unspillTree->gtType = spillType;
- inst_RV_TT(ins_Load(spillType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree);
- unspillTree->gtType = treeType;
- }
- else
- {
- inst_RV_TT(ins_Load(treeType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree);
- }
-
- unspillTree->SetInReg();
-
- // TODO-Review: We would like to call:
- // genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(tree));
- // instead of the following code, but this ends up hitting this assert:
- // assert((regSet.rsMaskVars & regMask) == 0);
- // due to issues with LSRA resolution moves.
- // So, just force it for now. This probably indicates a condition that creates a GC hole!
- //
- // Extra note: I think we really want to call something like gcInfo.gcUpdateForRegVarMove,
- // because the variable is not really going live or dead, but that method is somewhat poorly
- // factored because it, in turn, updates rsMaskVars which is part of RegSet not GCInfo.
- // TODO-Cleanup: This code exists in other CodeGen*.cpp files, and should be moved to CodeGenCommon.cpp.
-
- // Don't update the variable's location if we are just re-spilling it again.
-
- if ((unspillTree->gtFlags & GTF_SPILL) == 0)
- {
- genUpdateVarReg(varDsc, tree);
-#ifdef DEBUG
- if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
- {
- JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->gtLclNum);
- }
-#endif // DEBUG
- VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\t\t\t\t\t\t\tV%02u in reg ", lcl->gtLclNum);
- varDsc->PrintVarReg();
- printf(" is becoming live ");
- compiler->printTreeID(unspillTree);
- printf("\n");
- }
-#endif // DEBUG
-
- regSet.AddMaskVars(genGetRegMask(varDsc));
- }
-
- gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
- }
- else if (unspillTree->IsMultiRegCall())
- {
- GenTreeCall* call = unspillTree->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = retTypeDesc->GetReturnRegCount();
- GenTreeCopyOrReload* reloadTree = nullptr;
- if (tree->OperGet() == GT_RELOAD)
- {
- reloadTree = tree->AsCopyOrReload();
- }
-
- // In case of multi-reg call node, GTF_SPILLED flag on it indicates that
- // one or more of its result regs are spilled. Call node needs to be
- // queried to know which specific result regs to be unspilled.
- for (unsigned i = 0; i < regCount; ++i)
- {
- unsigned flags = call->GetRegSpillFlagByIdx(i);
- if ((flags & GTF_SPILLED) != 0)
- {
- var_types dstType = retTypeDesc->GetReturnRegType(i);
- regNumber unspillTreeReg = call->GetRegNumByIdx(i);
-
- if (reloadTree != nullptr)
- {
- dstReg = reloadTree->GetRegNumByIdx(i);
- if (dstReg == REG_NA)
- {
- dstReg = unspillTreeReg;
- }
- }
- else
- {
- dstReg = unspillTreeReg;
- }
-
- TempDsc* t = regSet.rsUnspillInPlace(call, unspillTreeReg, i);
- getEmitter()->emitIns_R_S(ins_Load(dstType), emitActualTypeSize(dstType), dstReg, t->tdTempNum(),
- 0);
- compiler->tmpRlsTemp(t);
- gcInfo.gcMarkRegPtrVal(dstReg, dstType);
- }
- }
-
- unspillTree->gtFlags &= ~GTF_SPILLED;
- unspillTree->SetInReg();
- }
- else
- {
- TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum);
- getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitActualTypeSize(unspillTree->TypeGet()), dstReg,
- t->tdTempNum(), 0);
- compiler->tmpRlsTemp(t);
-
- unspillTree->gtFlags &= ~GTF_SPILLED;
- unspillTree->SetInReg();
- gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
- }
- }
-}
-
-// Do Liveness update for a subnodes that is being consumed by codegen
-// including the logic for reload in case is needed and also takes care
-// of locating the value on the desired register.
-void CodeGen::genConsumeRegAndCopy(GenTree* tree, regNumber needReg)
-{
- if (needReg == REG_NA)
- {
- return;
- }
- regNumber treeReg = genConsumeReg(tree);
- if (treeReg != needReg)
- {
- inst_RV_RV(INS_mov, needReg, treeReg, tree->TypeGet());
- }
-}
-
void CodeGen::genRegCopy(GenTree* treeNode)
{
assert(treeNode->OperGet() == GT_COPY);
@@ -5022,662 +4479,6 @@ void CodeGen::genRegCopy(GenTree* treeNode)
genProduceReg(treeNode);
}
-// Check that registers are consumed in the right order for the current node being generated.
-#ifdef DEBUG
-void CodeGen::genCheckConsumeNode(GenTree* treeNode)
-{
- // GT_PUTARG_REG is consumed out of order.
- if (treeNode->gtSeqNum != 0 && treeNode->OperGet() != GT_PUTARG_REG)
- {
- if (lastConsumedNode != nullptr)
- {
- if (treeNode == lastConsumedNode)
- {
- if (verbose)
- {
- printf("Node was consumed twice:\n ");
- compiler->gtDispTree(treeNode, nullptr, nullptr, true);
- }
- }
- else
- {
- if (verbose && (lastConsumedNode->gtSeqNum > treeNode->gtSeqNum))
- {
- printf("Nodes were consumed out-of-order:\n");
- compiler->gtDispTree(lastConsumedNode, nullptr, nullptr, true);
- compiler->gtDispTree(treeNode, nullptr, nullptr, true);
- }
- // assert(lastConsumedNode->gtSeqNum < treeNode->gtSeqNum);
- }
- }
- lastConsumedNode = treeNode;
- }
-}
-#endif // DEBUG
-
-//--------------------------------------------------------------------
-// genConsumeReg: Do liveness update for a subnode that is being
-// consumed by codegen.
-//
-// Arguments:
-// tree - GenTree node
-//
-// Return Value:
-// Returns the reg number of tree.
-// In case of multi-reg call node returns the first reg number
-// of the multi-reg return.
-regNumber CodeGen::genConsumeReg(GenTree* tree)
-{
- if (tree->OperGet() == GT_COPY)
- {
- genRegCopy(tree);
- }
-
- // Handle the case where we have a lclVar that needs to be copied before use (i.e. because it
- // interferes with one of the other sources (or the target, if it's a "delayed use" register)).
- // TODO-Cleanup: This is a special copyReg case in LSRA - consider eliminating these and
- // always using GT_COPY to make the lclVar location explicit.
- // Note that we have to do this before calling genUpdateLife because otherwise if we spill it
- // the lvRegNum will be set to REG_STK and we will lose track of what register currently holds
- // the lclVar (normally when a lclVar is spilled it is then used from its former register
- // location, which matches the gtRegNum on the node).
- // (Note that it doesn't matter if we call this before or after genUnspillRegIfNeeded
- // because if it's on the stack it will always get reloaded into tree->gtRegNum).
- if (genIsRegCandidateLocal(tree))
- {
- GenTreeLclVarCommon* lcl = tree->AsLclVarCommon();
- LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
- if (varDsc->lvRegNum != REG_STK && varDsc->lvRegNum != tree->gtRegNum)
- {
- inst_RV_RV(INS_mov, tree->gtRegNum, varDsc->lvRegNum);
- }
- }
-
- genUnspillRegIfNeeded(tree);
-
- // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar
- genUpdateLife(tree);
-
- assert(tree->gtHasReg());
-
- // there are three cases where consuming a reg means clearing the bit in the live mask
- // 1. it was not produced by a local
- // 2. it was produced by a local that is going dead
- // 3. it was produced by a local that does not live in that reg (like one allocated on the stack)
-
- if (genIsRegCandidateLocal(tree))
- {
- GenTreeLclVarCommon* lcl = tree->AsLclVarCommon();
- LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
- assert(varDsc->lvLRACandidate);
-
- if ((tree->gtFlags & GTF_VAR_DEATH) != 0)
- {
- gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->lvRegNum));
- }
- else if (varDsc->lvRegNum == REG_STK)
- {
- // We have loaded this into a register only temporarily
- gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
- }
- }
- else
- {
- gcInfo.gcMarkRegSetNpt(tree->gtGetRegMask());
- }
-
- genCheckConsumeNode(tree);
- return tree->gtRegNum;
-}
-
-// Do liveness update for an address tree: one of GT_LEA, GT_LCL_VAR, or GT_CNS_INT (for call indirect).
-void CodeGen::genConsumeAddress(GenTree* addr)
-{
- if (!addr->isContained())
- {
- genConsumeReg(addr);
- }
- else if (addr->OperGet() == GT_LEA)
- {
- genConsumeAddrMode(addr->AsAddrMode());
- }
-}
-
-// do liveness update for a subnode that is being consumed by codegen
-void CodeGen::genConsumeAddrMode(GenTreeAddrMode* addr)
-{
- genConsumeOperands(addr);
-}
-
-void CodeGen::genConsumeRegs(GenTree* tree)
-{
-#if !defined(_TARGET_64BIT_)
- if (tree->OperGet() == GT_LONG)
- {
- genConsumeRegs(tree->gtGetOp1());
- genConsumeRegs(tree->gtGetOp2());
- return;
- }
-#endif // !defined(_TARGET_64BIT_)
-
- if (tree->isContained())
- {
- if (tree->isContainedSpillTemp())
- {
- // spill temps are un-tracked and hence no need to update life
- }
- else if (tree->isIndir())
- {
- genConsumeAddress(tree->AsIndir()->Addr());
- }
- else if (tree->OperGet() == GT_AND)
- {
- // This is the special contained GT_AND that we created in Lowering::LowerCmp()
- // Now we need to consume the operands of the GT_AND node.
- genConsumeOperands(tree->AsOp());
- }
- else if (tree->OperGet() == GT_LCL_VAR)
- {
- // A contained lcl var must be living on stack and marked as reg optional.
- unsigned varNum = tree->AsLclVarCommon()->GetLclNum();
- LclVarDsc* varDsc = compiler->lvaTable + varNum;
-
- noway_assert(varDsc->lvRegNum == REG_STK);
- noway_assert(tree->IsRegOptional());
-
- // Update the life of reg optional lcl var.
- genUpdateLife(tree);
- }
- else
- {
- assert(tree->OperIsLeaf());
- }
- }
- else
- {
- genConsumeReg(tree);
- }
-}
-
-//------------------------------------------------------------------------
-// genConsumeOperands: Do liveness update for the operands of a unary or binary tree
-//
-// Arguments:
-// tree - the GenTreeOp whose operands will have their liveness updated.
-//
-// Return Value:
-// None.
-//
-// Notes:
-// Note that this logic is localized here because we must do the liveness update in
-// the correct execution order. This is important because we may have two operands
-// that involve the same lclVar, and if one is marked "lastUse" we must handle it
-// after the first.
-
-void CodeGen::genConsumeOperands(GenTreeOp* tree)
-{
- GenTree* firstOp = tree->gtOp1;
- GenTree* secondOp = tree->gtOp2;
- if ((tree->gtFlags & GTF_REVERSE_OPS) != 0)
- {
- assert(secondOp != nullptr);
- firstOp = secondOp;
- secondOp = tree->gtOp1;
- }
- if (firstOp != nullptr)
- {
- genConsumeRegs(firstOp);
- }
- if (secondOp != nullptr)
- {
- genConsumeRegs(secondOp);
- }
-}
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
-//------------------------------------------------------------------------
-// genConsumePutStructArgStk: Do liveness update for the operands of a PutArgStk node.
-// Also loads in the right register the addresses of the
-// src/dst for rep mov operation.
-//
-// Arguments:
-// putArgNode - the PUTARG_STK tree.
-// dstReg - the dstReg for the rep move operation.
-// srcReg - the srcReg for the rep move operation.
-// sizeReg - the sizeReg for the rep move operation.
-// baseVarNum - the varnum for the local used for placing the "by-value" args on the stack.
-//
-// Return Value:
-// None.
-//
-// Note: sizeReg can be REG_NA when this function is used to consume the dstReg and srcReg
-// for copying on the stack a struct with references.
-// The source address/offset is determined from the address on the GT_OBJ node, while
-// the destination address is the address contained in 'baseVarNum' plus the offset
-// provided in the 'putArgNode'.
-
-void CodeGen::genConsumePutStructArgStk(
- GenTreePutArgStk* putArgNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg, unsigned baseVarNum)
-{
- assert(varTypeIsStruct(putArgNode));
- assert(baseVarNum != BAD_VAR_NUM);
-
- // The putArgNode children are always contained. We should not consume any registers.
- assert(putArgNode->gtGetOp1()->isContained());
-
- GenTree* dstAddr = putArgNode;
-
- // Get the source address.
- GenTree* src = putArgNode->gtGetOp1();
- assert((src->gtOper == GT_OBJ) || ((src->gtOper == GT_IND && varTypeIsSIMD(src))));
- GenTree* srcAddr = src->gtGetOp1();
-
- size_t size = putArgNode->getArgSize();
-
- assert(dstReg != REG_NA);
- assert(srcReg != REG_NA);
-
- // Consume the registers only if they are not contained or set to REG_NA.
- if (srcAddr->gtRegNum != REG_NA)
- {
- genConsumeReg(srcAddr);
- }
-
- // If the op1 is already in the dstReg - nothing to do.
- // Otherwise load the op1 (GT_ADDR) into the dstReg to copy the struct on the stack by value.
- if (dstAddr->gtRegNum != dstReg)
- {
- // Generate LEA instruction to load the stack of the outgoing var + SlotNum offset (or the incoming arg area
- // for tail calls) in RDI.
- // Destination is always local (on the stack) - use EA_PTRSIZE.
- getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, dstReg, baseVarNum, putArgNode->getArgOffset());
- }
-
- if (srcAddr->gtRegNum != srcReg)
- {
- if (srcAddr->OperIsLocalAddr())
- {
- // The OperLocalAddr is always contained.
- assert(srcAddr->isContained());
- GenTreeLclVarCommon* lclNode = srcAddr->AsLclVarCommon();
-
- // Generate LEA instruction to load the LclVar address in RSI.
- // Source is known to be on the stack. Use EA_PTRSIZE.
- unsigned int offset = 0;
- if (srcAddr->OperGet() == GT_LCL_FLD_ADDR)
- {
- offset = srcAddr->AsLclFld()->gtLclOffs;
- }
- getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, srcReg, lclNode->gtLclNum, offset);
- }
- else
- {
- assert(srcAddr->gtRegNum != REG_NA);
- // Source is not known to be on the stack. Use EA_BYREF.
- getEmitter()->emitIns_R_R(INS_mov, EA_BYREF, srcReg, srcAddr->gtRegNum);
- }
- }
-
- if (sizeReg != REG_NA)
- {
- inst_RV_IV(INS_mov, sizeReg, size, EA_8BYTE);
- }
-}
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
-
-//------------------------------------------------------------------------
-// genConsumeBlockSize: Ensure that the block size is in the given register
-//
-// Arguments:
-// blkNode - The block node
-// sizeReg - The register into which the block's size should go
-//
-
-void CodeGen::genConsumeBlockSize(GenTreeBlk* blkNode, regNumber sizeReg)
-{
- if (sizeReg != REG_NA)
- {
- unsigned blockSize = blkNode->Size();
- if (blockSize != 0)
- {
- assert(blkNode->gtRsvdRegs == genRegMask(sizeReg));
- genSetRegToIcon(sizeReg, blockSize);
- }
- else
- {
- noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
- genConsumeReg(blkNode->AsDynBlk()->gtDynamicSize);
- }
- }
-}
-
-//------------------------------------------------------------------------
-// genConsumeBlockDst: Ensure that the block destination address is in its
-// allocated register.
-// Arguments:
-// blkNode - The block node
-//
-
-void CodeGen::genConsumeBlockDst(GenTreeBlk* blkNode)
-{
- GenTree* dstAddr = blkNode->Addr();
- genConsumeReg(dstAddr);
-}
-
-//------------------------------------------------------------------------
-// genConsumeBlockSrc: Ensure that the block source address is in its
-// allocated register if it is non-local.
-// Arguments:
-// blkNode - The block node
-//
-// Return Value:
-// Returns the source address node, if it is non-local,
-// and nullptr otherwise.
-
-GenTree* CodeGen::genConsumeBlockSrc(GenTreeBlk* blkNode)
-{
- GenTree* src = blkNode->Data();
- if (blkNode->OperIsCopyBlkOp())
- {
- // For a CopyBlk we need the address of the source.
- if (src->OperGet() == GT_IND)
- {
- src = src->gtOp.gtOp1;
- }
- else
- {
- // This must be a local.
- // For this case, there is no source address register, as it is a
- // stack-based address.
- assert(src->OperIsLocal());
- return nullptr;
- }
- }
- genConsumeReg(src);
- return src;
-}
-
-//------------------------------------------------------------------------
-// genConsumeBlockOp: Ensure that the block's operands are enregistered
-// as needed.
-// Arguments:
-// blkNode - The block node
-//
-// Notes:
-// This ensures that the operands are consumed in the proper order to
-// obey liveness modeling.
-
-void CodeGen::genConsumeBlockOp(GenTreeBlk* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg)
-{
- // We have to consume the registers, and perform any copies, in the actual execution order.
- // The nominal order is: dst, src, size. However this may have been changed
- // with reverse flags on the blkNode and the setting of gtEvalSizeFirst in the case of a dynamic
- // block size.
- // Note that the register allocator ensures that the registers ON THE NODES will not interfere
- // with one another if consumed (i.e. reloaded or moved to their ASSIGNED reg) in execution order.
- // Further, it ensures that they will not interfere with one another if they are then copied
- // to the REQUIRED register (if a fixed register requirement) in execution order. This requires,
- // then, that we first consume all the operands, then do any necessary moves.
-
- GenTree* dstAddr = blkNode->Addr();
- GenTree* src = nullptr;
- unsigned blockSize = blkNode->Size();
- GenTree* size = nullptr;
- bool evalSizeFirst = true;
-
- if (blkNode->OperGet() == GT_STORE_DYN_BLK)
- {
- evalSizeFirst = blkNode->AsDynBlk()->gtEvalSizeFirst;
- size = blkNode->AsDynBlk()->gtDynamicSize;
- }
-
- // First, consusme all the sources in order
- if (evalSizeFirst)
- {
- genConsumeBlockSize(blkNode, sizeReg);
- }
- if (blkNode->IsReverseOp())
- {
- src = genConsumeBlockSrc(blkNode);
- genConsumeBlockDst(blkNode);
- }
- else
- {
- genConsumeBlockDst(blkNode);
- src = genConsumeBlockSrc(blkNode);
- }
- if (!evalSizeFirst)
- {
- genConsumeBlockSize(blkNode, sizeReg);
- }
- // Next, perform any necessary moves.
- if (evalSizeFirst && (size != nullptr) && (size->gtRegNum != sizeReg))
- {
- inst_RV_RV(INS_mov, sizeReg, size->gtRegNum, size->TypeGet());
- }
- if (blkNode->IsReverseOp())
- {
- if ((src != nullptr) && (src->gtRegNum != srcReg))
- {
- inst_RV_RV(INS_mov, srcReg, src->gtRegNum, src->TypeGet());
- }
- if (dstAddr->gtRegNum != dstReg)
- {
- inst_RV_RV(INS_mov, dstReg, dstAddr->gtRegNum, dstAddr->TypeGet());
- }
- }
- else
- {
- if (dstAddr->gtRegNum != dstReg)
- {
- inst_RV_RV(INS_mov, dstReg, dstAddr->gtRegNum, dstAddr->TypeGet());
- }
- if ((src != nullptr) && (src->gtRegNum != srcReg))
- {
- inst_RV_RV(INS_mov, srcReg, src->gtRegNum, src->TypeGet());
- }
- }
- if (!evalSizeFirst && size != nullptr && (size->gtRegNum != sizeReg))
- {
- inst_RV_RV(INS_mov, sizeReg, size->gtRegNum, size->TypeGet());
- }
-}
-
-//-------------------------------------------------------------------------
-// genProduceReg: do liveness update for register produced by the current
-// node in codegen.
-//
-// Arguments:
-// tree - Gentree node
-//
-// Return Value:
-// None.
-void CodeGen::genProduceReg(GenTree* tree)
-{
- if (tree->gtFlags & GTF_SPILL)
- {
- // Code for GT_COPY node gets generated as part of consuming regs by its parent.
- // A GT_COPY node in turn produces reg result and it should never be marked to
- // spill.
- //
- // Similarly GT_RELOAD node gets generated as part of consuming regs by its
- // parent and should never be marked for spilling.
- noway_assert(!tree->IsCopyOrReload());
-
- if (genIsRegCandidateLocal(tree))
- {
- // Store local variable to its home location.
- tree->gtFlags &= ~GTF_REG_VAL;
- // Ensure that lclVar stores are typed correctly.
- unsigned varNum = tree->gtLclVarCommon.gtLclNum;
- assert(!compiler->lvaTable[varNum].lvNormalizeOnStore() ||
- (tree->TypeGet() == genActualType(compiler->lvaTable[varNum].TypeGet())));
- inst_TT_RV(ins_Store(tree->gtType, compiler->isSIMDTypeLocalAligned(varNum)), tree, tree->gtRegNum);
- }
- else
- {
- // In case of multi-reg call node, spill flag on call node
- // indicates that one or more of its allocated regs need to
- // be spilled. Call node needs to be further queried to
- // know which of its result regs needs to be spilled.
- if (tree->IsMultiRegCall())
- {
- GenTreeCall* call = tree->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = retTypeDesc->GetReturnRegCount();
-
- for (unsigned i = 0; i < regCount; ++i)
- {
- unsigned flags = call->GetRegSpillFlagByIdx(i);
- if ((flags & GTF_SPILL) != 0)
- {
- regNumber reg = call->GetRegNumByIdx(i);
- call->SetInReg();
- regSet.rsSpillTree(reg, call, i);
- gcInfo.gcMarkRegSetNpt(genRegMask(reg));
- }
- }
- }
- else
- {
- tree->SetInReg();
- regSet.rsSpillTree(tree->gtRegNum, tree);
- gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
- }
-
- tree->gtFlags |= GTF_SPILLED;
- tree->gtFlags &= ~GTF_SPILL;
-
- return;
- }
- }
-
- genUpdateLife(tree);
-
- // If we've produced a register, mark it as a pointer, as needed.
- if (tree->gtHasReg())
- {
- // We only mark the register in the following cases:
- // 1. It is not a register candidate local. In this case, we're producing a
- // register from a local, but the local is not a register candidate. Thus,
- // we must be loading it as a temp register, and any "last use" flag on
- // the register wouldn't be relevant.
- // 2. The register candidate local is going dead. There's no point to mark
- // the register as live, with a GC pointer, if the variable is dead.
- if (!genIsRegCandidateLocal(tree) || ((tree->gtFlags & GTF_VAR_DEATH) == 0))
- {
- // Multi-reg call node will produce more than one register result.
- // Mark all the regs produced by call node.
- if (tree->IsMultiRegCall())
- {
- GenTreeCall* call = tree->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = retTypeDesc->GetReturnRegCount();
-
- for (unsigned i = 0; i < regCount; ++i)
- {
- regNumber reg = call->GetRegNumByIdx(i);
- var_types type = retTypeDesc->GetReturnRegType(i);
- gcInfo.gcMarkRegPtrVal(reg, type);
- }
- }
- else if (tree->IsCopyOrReloadOfMultiRegCall())
- {
- // we should never see reload of multi-reg call here
- // because GT_RELOAD gets generated in reg consuming path.
- noway_assert(tree->OperGet() == GT_COPY);
-
- // A multi-reg GT_COPY node produces those regs to which
- // copy has taken place.
- GenTreeCopyOrReload* copy = tree->AsCopyOrReload();
- GenTreeCall* call = copy->gtGetOp1()->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = retTypeDesc->GetReturnRegCount();
-
- for (unsigned i = 0; i < regCount; ++i)
- {
- var_types type = retTypeDesc->GetReturnRegType(i);
- regNumber fromReg = call->GetRegNumByIdx(i);
- regNumber toReg = copy->GetRegNumByIdx(i);
-
- if (toReg != REG_NA)
- {
- gcInfo.gcMarkRegPtrVal(toReg, type);
- }
- }
- }
- else
- {
- gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet());
- }
- }
- }
- tree->SetInReg();
-}
-
-// transfer gc/byref status of src reg to dst reg
-void CodeGen::genTransferRegGCState(regNumber dst, regNumber src)
-{
- regMaskTP srcMask = genRegMask(src);
- regMaskTP dstMask = genRegMask(dst);
-
- if (gcInfo.gcRegGCrefSetCur & srcMask)
- {
- gcInfo.gcMarkRegSetGCref(dstMask);
- }
- else if (gcInfo.gcRegByrefSetCur & srcMask)
- {
- gcInfo.gcMarkRegSetByref(dstMask);
- }
- else
- {
- gcInfo.gcMarkRegSetNpt(dstMask);
- }
-}
-
-// generates an ip-relative call or indirect call via reg ('call reg')
-// pass in 'addr' for a relative call or 'base' for a indirect register call
-// methHnd - optional, only used for pretty printing
-// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
-void CodeGen::genEmitCall(int callType,
- CORINFO_METHOD_HANDLE methHnd,
- INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) void* addr X86_ARG(ssize_t argSize),
- emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
- IL_OFFSETX ilOffset,
- regNumber base,
- bool isJump,
- bool isNoGC)
-{
-#if !defined(_TARGET_X86_)
- ssize_t argSize = 0;
-#endif // !defined(_TARGET_X86_)
- getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, argSize,
- retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), gcInfo.gcVarPtrSetCur,
- gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset, base, REG_NA, 0, 0, isJump,
- emitter::emitNoGChelper(compiler->eeGetHelperNum(methHnd)));
-}
-
-// generates an indirect call via addressing mode (call []) given an indir node
-// methHnd - optional, only used for pretty printing
-// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
-void CodeGen::genEmitCall(int callType,
- CORINFO_METHOD_HANDLE methHnd,
- INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) GenTreeIndir* indir X86_ARG(ssize_t argSize),
- emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
- IL_OFFSETX ilOffset)
-{
-#if !defined(_TARGET_X86_)
- ssize_t argSize = 0;
-#endif // !defined(_TARGET_X86_)
- genConsumeAddress(indir->Addr());
-
- getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) nullptr,
- argSize, retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
- gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset,
- indir->Base() ? indir->Base()->gtRegNum : REG_NA,
- indir->Index() ? indir->Index()->gtRegNum : REG_NA, indir->Scale(), indir->Offset());
-}
-
//------------------------------------------------------------------------
// genStoreInd: Generate code for a GT_STOREIND node.
//
@@ -5724,16 +4525,10 @@ void CodeGen::genStoreInd(GenTreePtr node)
noway_assert(data->gtRegNum != REG_ARG_0);
// addr goes in REG_ARG_0
- if (addr->gtRegNum != REG_ARG_0)
- {
- inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet());
- }
+ genCopyRegIfNeeded(addr, REG_ARG_0);
// data goes in REG_ARG_1
- if (data->gtRegNum != REG_ARG_1)
- {
- inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet());
- }
+ genCopyRegIfNeeded(data, REG_ARG_1);
genGCWriteBarrier(storeInd, writeBarrierForm);
}
@@ -5821,6 +4616,23 @@ void CodeGen::genStoreInd(GenTreePtr node)
assert(rmwSrc == data->gtGetOp2());
genCodeForShiftRMW(storeInd);
}
+ else if (!compiler->opts.compDbgCode && data->OperGet() == GT_ADD &&
+ (rmwSrc->IsIntegralConst(1) || rmwSrc->IsIntegralConst(-1)))
+ {
+ // Generate "inc/dec [mem]" instead of "add/sub [mem], 1".
+ //
+ // Notes:
+ // 1) Global morph transforms GT_SUB(x, +/-1) into GT_ADD(x, -/+1).
+ // 2) TODO-AMD64: Debugger routine NativeWalker::Decode() runs into
+ // an assert while decoding ModR/M byte of "inc dword ptr [rax]".
+ // It is not clear whether Decode() can handle all possible
+ // addr modes with inc/dec. For this reason, inc/dec [mem]
+ // is not generated while generating debuggable code. Update
+ // the above if condition once Decode() routine is fixed.
+ assert(rmwSrc->isContainedIntOrIImmed());
+ instruction ins = rmwSrc->IsIntegralConst(1) ? INS_inc : INS_dec;
+ getEmitter()->emitInsRMW(ins, emitTypeSize(storeInd), storeInd);
+ }
else
{
// generate code for remaining binary RMW memory ops like add/sub/and/or/xor
@@ -5905,10 +4717,7 @@ bool CodeGen::genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarri
// call write_barrier_helper_reg
// addr goes in REG_ARG_0
- if (addr->gtRegNum != REG_WRITE_BARRIER) // REVIEW: can it ever not already by in this register?
- {
- inst_RV_RV(INS_mov, REG_WRITE_BARRIER, addr->gtRegNum, addr->TypeGet());
- }
+ genCopyRegIfNeeded(addr, REG_WRITE_BARRIER);
unsigned tgtAnywhere = 0;
if (writeBarrierForm != GCInfo::WBF_BarrierUnchecked)
@@ -5943,10 +4752,28 @@ void CodeGen::genCallInstruction(GenTreePtr node)
// all virtuals should have been expanded into a control expression
assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
+ // Insert a GS check if necessary
+ if (call->IsTailCallViaHelper())
+ {
+ if (compiler->getNeedsGSSecurityCookie())
+ {
+#if FEATURE_FIXED_OUT_ARGS
+ // If either of the conditions below is true, we will need a temporary register in order to perform the GS
+ // cookie check. When FEATURE_FIXED_OUT_ARGS is disabled, we save and restore the temporary register using
+ // push/pop. When FEATURE_FIXED_OUT_ARGS is enabled, however, we need an alternative solution. For now,
+ // though, the tail prefix is ignored on all platforms that use fixed out args, so we should never hit this
+ // case.
+ assert(compiler->gsGlobalSecurityCookieAddr == nullptr);
+ assert((int)compiler->gsGlobalSecurityCookieVal == (ssize_t)compiler->gsGlobalSecurityCookieVal);
+#endif
+ genEmitGSCookieCheck(true);
+ }
+ }
+
// Consume all the arg regs
for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
{
- assert(list->IsList());
+ assert(list->OperIsList());
GenTreePtr argNode = list->Current();
@@ -5960,13 +4787,13 @@ void CodeGen::genCallInstruction(GenTreePtr node)
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
// Deal with multi register passed struct args.
- if (argNode->OperGet() == GT_LIST)
+ if (argNode->OperGet() == GT_FIELD_LIST)
{
- GenTreeArgList* argListPtr = argNode->AsArgList();
- unsigned iterationNum = 0;
- for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++)
+ GenTreeFieldList* fieldListPtr = argNode->AsFieldList();
+ unsigned iterationNum = 0;
+ for (; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), iterationNum++)
{
- GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
+ GenTreePtr putArgRegNode = fieldListPtr->gtOp.gtOp1;
assert(putArgRegNode->gtOper == GT_PUTARG_REG);
regNumber argReg = REG_NA;
@@ -6036,20 +4863,34 @@ void CodeGen::genCallInstruction(GenTreePtr node)
{
assert((arg->gtGetOp1()->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp2()->OperGet() == GT_PUTARG_STK));
}
+ if ((arg->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp1()->OperGet() == GT_FIELD_LIST))
+ {
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
+ assert(curArgTabEntry);
+ stackArgBytes += curArgTabEntry->numSlots * TARGET_POINTER_SIZE;
+ }
+ else
#endif // defined(_TARGET_X86_)
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- if (genActualType(arg->TypeGet()) == TYP_STRUCT)
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
+ if (genActualType(arg->TypeGet()) == TYP_STRUCT)
{
assert(arg->OperGet() == GT_PUTARG_STK);
- GenTreeObj* obj = arg->gtGetOp1()->AsObj();
- stackArgBytes = compiler->info.compCompHnd->getClassSize(obj->gtClass);
+ GenTreeObj* obj = arg->gtGetOp1()->AsObj();
+ unsigned argBytes = (unsigned)roundUp(obj->gtBlkSize, TARGET_POINTER_SIZE);
+#ifdef DEBUG
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
+ assert((curArgTabEntry->numSlots * TARGET_POINTER_SIZE) == argBytes);
+#endif // DEBUG
+ stackArgBytes += argBytes;
}
else
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+#endif // FEATURE_PUT_STRUCT_ARG_STK
stackArgBytes += genTypeSize(genActualType(arg->TypeGet()));
+ }
}
args = args->gtOp.gtOp2;
}
@@ -6098,10 +4939,7 @@ void CodeGen::genCallInstruction(GenTreePtr node)
assert(target != nullptr);
genConsumeReg(target);
- if (target->gtRegNum != REG_RAX)
- {
- inst_RV_RV(INS_mov, REG_RAX, target->gtRegNum);
- }
+ genCopyRegIfNeeded(target, REG_RAX);
return;
}
@@ -6141,7 +4979,6 @@ void CodeGen::genCallInstruction(GenTreePtr node)
bool fPossibleSyncHelperCall = false;
CorInfoHelpFunc helperNum = CORINFO_HELP_UNDEF;
-#ifdef DEBUGGING_SUPPORT
// We need to propagate the IL offset information to the call instruction, so we can emit
// an IL to native mapping record for the call, to support managed return value debugging.
// We don't want tail call helper calls that were converted from normal calls to get a record,
@@ -6150,7 +4987,6 @@ void CodeGen::genCallInstruction(GenTreePtr node)
{
(void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
}
-#endif // DEBUGGING_SUPPORT
#if defined(_TARGET_X86_)
// If the callee pops the arguments, we pass a positive value as the argSize, and the emitter will
@@ -6167,7 +5003,38 @@ void CodeGen::genCallInstruction(GenTreePtr node)
if (target != nullptr)
{
- if (target->isContainedIndir())
+#ifdef _TARGET_X86_
+ if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT))
+ {
+ // On x86, we need to generate a very specific pattern for indirect VSD calls:
+ //
+ // 3-byte nop
+ // call dword ptr [eax]
+ //
+ // Where EAX is also used as an argument to the stub dispatch helper. Make
+ // sure that the call target address is computed into EAX in this case.
+
+ assert(REG_VIRTUAL_STUB_PARAM == REG_VIRTUAL_STUB_TARGET);
+
+ assert(target->isContainedIndir());
+ assert(target->OperGet() == GT_IND);
+
+ GenTree* addr = target->AsIndir()->Addr();
+ assert(!addr->isContained());
+
+ genConsumeReg(addr);
+ genCopyRegIfNeeded(addr, REG_VIRTUAL_STUB_TARGET);
+
+ getEmitter()->emitIns_Nop(3);
+ getEmitter()->emitIns_Call(emitter::EmitCallType(emitter::EC_INDIR_ARD), methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo) nullptr, argSizeForEmitter,
+ retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+ gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+ ilOffset, REG_VIRTUAL_STUB_TARGET, REG_NA, 1, 0);
+ }
+ else
+#endif
+ if (target->isContainedIndir())
{
if (target->AsIndir()->HasBase() && target->AsIndir()->Base()->isContainedIntOrIImmed())
{
@@ -6977,8 +5844,6 @@ void CodeGen::genCompareLong(GenTreePtr treeNode)
genConsumeOperands(tree);
- assert(targetReg != REG_NA);
-
GenTreePtr loOp1 = op1->gtGetOp1();
GenTreePtr hiOp1 = op1->gtGetOp2();
GenTreePtr loOp2 = op2->gtGetOp1();
@@ -6992,6 +5857,12 @@ void CodeGen::genCompareLong(GenTreePtr treeNode)
// Emit the compare instruction
getEmitter()->emitInsBinary(ins, cmpAttr, hiOp1, hiOp2);
+ // If the result is not being materialized in a register, we're done.
+ if (targetReg == REG_NA)
+ {
+ return;
+ }
+
// Generate the first jump for the high compare
CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
@@ -7015,10 +5886,6 @@ void CodeGen::genCompareLong(GenTreePtr treeNode)
emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED);
inst_SET(jumpKindLo, targetReg);
- // Set the higher bytes to 0
- inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
- genProduceReg(tree);
-
inst_JMP(EJ_jmp, labelFinal);
// Define the label for hi jump target here. If we have jumped here, we want to set
@@ -7027,11 +5894,10 @@ void CodeGen::genCompareLong(GenTreePtr treeNode)
genDefineTempLabel(labelHi);
inst_SET(genJumpKindForOper(tree->gtOper, compareKind), targetReg);
+ genDefineTempLabel(labelFinal);
// Set the higher bytes to 0
inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
genProduceReg(tree);
-
- genDefineTempLabel(labelFinal);
}
else
{
@@ -7062,152 +5928,6 @@ void CodeGen::genCompareLong(GenTreePtr treeNode)
genProduceReg(tree);
}
}
-
-//------------------------------------------------------------------------
-// genJTrueLong: Generate code for comparing two longs on x86 for the case where the result
-// is not manifested in a register.
-//
-// Arguments:
-// treeNode - the compare tree
-//
-// Return Value:
-// None.
-// Comments:
-// For long compares, we need to compare the high parts of operands first, then the low parts.
-// We only have to do the low compare if the high parts of the operands are equal.
-//
-// In the case where the result of a rel-op is not realized in a register, we generate:
-//
-// Opcode x86 equivalent Comment
-// ------ -------------- -------
-//
-// GT_LT; unsigned cmp hiOp1,hiOp2
-// jb trueLabel
-// ja falseLabel
-// cmp loOp1,loOp2
-// jb trueLabel
-// falseLabel:
-//
-// GT_LE; unsigned cmp hiOp1,hiOp2
-// jb trueLabel
-// ja falseLabel
-// cmp loOp1,loOp2
-// jbe trueLabel
-// falseLabel:
-//
-// GT_GT; unsigned cmp hiOp1,hiOp2
-// ja trueLabel
-// jb falseLabel
-// cmp loOp1,loOp2
-// ja trueLabel
-// falseLabel:
-//
-// GT_GE; unsigned cmp hiOp1,hiOp2
-// ja trueLabel
-// jb falseLabel
-// cmp loOp1,loOp2
-// jae trueLabel
-// falseLabel:
-//
-// GT_LT; signed cmp hiOp1,hiOp2
-// jl trueLabel
-// jg falseLabel
-// cmp loOp1,loOp2
-// jb trueLabel
-// falseLabel:
-//
-// GT_LE; signed cmp hiOp1,hiOp2
-// jl trueLabel
-// jg falseLabel
-// cmp loOp1,loOp2
-// jbe trueLabel
-// falseLabel:
-//
-// GT_GT; signed cmp hiOp1,hiOp2
-// jg trueLabel
-// jl falseLabel
-// cmp loOp1,loOp2
-// ja trueLabel
-// falseLabel:
-//
-// GT_GE; signed cmp hiOp1,hiOp2
-// jg trueLabel
-// jl falseLabel
-// cmp loOp1,loOp2
-// jae trueLabel
-// falseLabel:
-//
-// GT_EQ; cmp hiOp1,hiOp2
-// jne falseLabel
-// cmp loOp1,loOp2
-// je trueLabel
-// falseLabel:
-//
-// GT_NE; cmp hiOp1,hiOp2
-// jne labelTrue
-// cmp loOp1,loOp2
-// jne trueLabel
-// falseLabel:
-//
-// TODO-X86-CQ: Check if hi or lo parts of op2 are 0 and change the compare to a test.
-void CodeGen::genJTrueLong(GenTreePtr treeNode)
-{
- assert(treeNode->OperIsCompare());
-
- GenTreeOp* tree = treeNode->AsOp();
- GenTreePtr op1 = tree->gtOp1;
- GenTreePtr op2 = tree->gtOp2;
-
- assert(varTypeIsLong(op1->TypeGet()));
- assert(varTypeIsLong(op2->TypeGet()));
-
- regNumber targetReg = treeNode->gtRegNum;
-
- assert(targetReg == REG_NA);
-
- GenTreePtr loOp1 = op1->gtGetOp1();
- GenTreePtr hiOp1 = op1->gtGetOp2();
- GenTreePtr loOp2 = op2->gtGetOp1();
- GenTreePtr hiOp2 = op2->gtGetOp2();
-
- // Emit the compare instruction
- getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, hiOp1, hiOp2);
-
- // Generate the first jump for the high compare
- CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
-
- // TODO-X86-CQ: If the next block is a BBJ_ALWAYS, we can set falseLabel = compiler->compCurBB->bbNext->bbJumpDest.
- BasicBlock* falseLabel = genCreateTempLabel();
-
- emitJumpKind jumpKindHi[2];
-
- // Generate the jumps for the high compare
- genJumpKindsForTreeLongHi(tree, jumpKindHi);
-
- BasicBlock* trueLabel = compiler->compCurBB->bbJumpDest;
-
- if (jumpKindHi[0] != EJ_NONE)
- {
- inst_JMP(jumpKindHi[0], trueLabel);
- }
-
- if (jumpKindHi[1] != EJ_NONE)
- {
- inst_JMP(jumpKindHi[1], falseLabel);
- }
-
- // The low jump must be unsigned
- emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED);
-
- // Emit the comparison and the jump to the trueLabel
- getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, loOp1, loOp2);
-
- inst_JMP(jumpKindLo, trueLabel);
-
- // Generate falseLabel, which is the false path. We will jump here if the high compare is false
- // or fall through if the low compare is false.
- genDefineTempLabel(falseLabel);
-}
#endif //! defined(_TARGET_64BIT_)
//------------------------------------------------------------------------
@@ -7339,19 +6059,77 @@ void CodeGen::genCompareInt(GenTreePtr treeNode)
{
assert(treeNode->OperIsCompare());
- GenTreeOp* tree = treeNode->AsOp();
- GenTreePtr op1 = tree->gtOp1;
- GenTreePtr op2 = tree->gtOp2;
- var_types op1Type = op1->TypeGet();
- var_types op2Type = op2->TypeGet();
+ GenTreeOp* tree = treeNode->AsOp();
+ GenTreePtr op1 = tree->gtOp1;
+ GenTreePtr op2 = tree->gtOp2;
+ var_types op1Type = op1->TypeGet();
+ var_types op2Type = op2->TypeGet();
+ regNumber targetReg = treeNode->gtRegNum;
+
+ // Case of op1 == 0 or op1 != 0:
+ // Optimize generation of 'test' instruction if op1 sets flags.
+ //
+ // Note that if LSRA has inserted any GT_RELOAD/GT_COPY before
+ // op1, it will not modify the flags set by codegen of op1.
+ // Similarly op1 could also be reg-optional at its use and
+ // it was spilled after producing its result in a register.
+ // Spill code too will not modify the flags set by op1.
+ GenTree* realOp1 = op1->gtSkipReloadOrCopy();
+ if (realOp1->gtSetFlags())
+ {
+ // op1 must set ZF and SF flags
+ assert(realOp1->gtSetZSFlags());
+
+ // Must be (in)equality against zero.
+ assert(tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE);
+ assert(op2->IsIntegralConst(0));
+ assert(op2->isContained());
+
+ // Just consume the operands
+ genConsumeOperands(tree);
+
+ // No need to generate test instruction since
+ // op1 sets flags
+
+ // Are we evaluating this into a register?
+ if (targetReg != REG_NA)
+ {
+ genSetRegToCond(targetReg, tree);
+ genProduceReg(tree);
+ }
+
+ return;
+ }
+
+#ifdef FEATURE_SIMD
+ // If we have GT_JTRUE(GT_EQ/NE(GT_SIMD((in)Equality, v1, v2), true/false)),
+ // then we don't need to generate code for GT_EQ/GT_NE, since SIMD (in)Equality intrinsic
+ // would set or clear Zero flag.
+ if ((targetReg == REG_NA) && (tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE))
+ {
+ // Is it a SIMD (in)Equality that doesn't need to materialize result into a register?
+ if ((op1->gtRegNum == REG_NA) && op1->IsSIMDEqualityOrInequality())
+ {
+ // Must be comparing against true or false.
+ assert(op2->IsIntegralConst(0) || op2->IsIntegralConst(1));
+ assert(op2->isContainedIntOrIImmed());
+
+ // In this case SIMD (in)Equality will set or clear
+ // Zero flag, based on which GT_JTRUE would generate
+ // the right conditional jump.
+ return;
+ }
+ }
+#endif // FEATURE_SIMD
genConsumeOperands(tree);
instruction ins;
emitAttr cmpAttr;
- regNumber targetReg = treeNode->gtRegNum;
- assert(!op1->isContainedIntOrIImmed()); // We no longer support swapping op1 and op2 to generate cmp reg, imm
+ // TODO-CQ: We should be able to support swapping op1 and op2 to generate cmp reg, imm.
+ // https://github.com/dotnet/coreclr/issues/7270
+ assert(!op1->isContainedIntOrIImmed()); // We no longer support
assert(!varTypeIsFloating(op2Type));
#ifdef _TARGET_X86_
@@ -7387,7 +6165,7 @@ void CodeGen::genCompareInt(GenTreePtr treeNode)
{
// Do we have a short compare against a constant in op2?
//
- // We checked for this case in LowerCmp() and if we can perform a small
+ // We checked for this case in TreeNodeInfoInitCmp() and if we can perform a small
// compare immediate we labeled this compare with a GTF_RELOP_SMALL
// and for unsigned small non-equality compares the GTF_UNSIGNED flag.
//
@@ -7442,12 +6220,11 @@ void CodeGen::genCompareInt(GenTreePtr treeNode)
if (op1->isContained())
{
// op1 can be a contained memory op
- // or the special contained GT_AND that we created in Lowering::LowerCmp()
+ // or the special contained GT_AND that we created in Lowering::TreeNodeInfoInitCmp()
//
- if ((op1->OperGet() == GT_AND))
+ if ((op1->OperGet() == GT_AND) && op1->gtGetOp2()->isContainedIntOrIImmed() &&
+ ((tree->OperGet() == GT_EQ) || (tree->OperGet() == GT_NE)))
{
- noway_assert(op1->gtOp.gtOp2->isContainedIntOrIImmed());
-
ins = INS_test; // we will generate "test andOp1, andOp2CnsVal"
op2 = op1->gtOp.gtOp2; // must assign op2 before we overwrite op1
op1 = op1->gtOp.gtOp1; // overwrite op1
@@ -7561,6 +6338,93 @@ void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree)
}
}
+#if !defined(_TARGET_64BIT_)
+//------------------------------------------------------------------------
+// genIntToIntCast: Generate code for long to int casts on x86.
+//
+// Arguments:
+// cast - The GT_CAST node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// The cast node and its sources (via GT_LONG) must have been assigned registers.
+// The destination cannot be a floating point type or a small integer type.
+//
+void CodeGen::genLongToIntCast(GenTree* cast)
+{
+ assert(cast->OperGet() == GT_CAST);
+
+ GenTree* src = cast->gtGetOp1();
+ noway_assert(src->OperGet() == GT_LONG);
+
+ genConsumeRegs(src);
+
+ var_types srcType = ((cast->gtFlags & GTF_UNSIGNED) != 0) ? TYP_ULONG : TYP_LONG;
+ var_types dstType = cast->CastToType();
+ regNumber loSrcReg = src->gtGetOp1()->gtRegNum;
+ regNumber hiSrcReg = src->gtGetOp2()->gtRegNum;
+ regNumber dstReg = cast->gtRegNum;
+
+ assert((dstType == TYP_INT) || (dstType == TYP_UINT));
+ assert(genIsValidIntReg(loSrcReg));
+ assert(genIsValidIntReg(hiSrcReg));
+ assert(genIsValidIntReg(dstReg));
+
+ if (cast->gtOverflow())
+ {
+ //
+ // Generate an overflow check for [u]long to [u]int casts:
+ //
+ // long -> int - check if the upper 33 bits are all 0 or all 1
+ //
+ // ulong -> int - check if the upper 33 bits are all 0
+ //
+ // long -> uint - check if the upper 32 bits are all 0
+ // ulong -> uint - check if the upper 32 bits are all 0
+ //
+
+ if ((srcType == TYP_LONG) && (dstType == TYP_INT))
+ {
+ BasicBlock* allOne = genCreateTempLabel();
+ BasicBlock* success = genCreateTempLabel();
+
+ inst_RV_RV(INS_test, loSrcReg, loSrcReg, TYP_INT, EA_4BYTE);
+ inst_JMP(EJ_js, allOne);
+
+ inst_RV_RV(INS_test, hiSrcReg, hiSrcReg, TYP_INT, EA_4BYTE);
+ genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
+ inst_JMP(EJ_jmp, success);
+
+ genDefineTempLabel(allOne);
+ inst_RV_IV(INS_cmp, hiSrcReg, -1, EA_4BYTE);
+ genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
+
+ genDefineTempLabel(success);
+ }
+ else
+ {
+ if ((srcType == TYP_ULONG) && (dstType == TYP_INT))
+ {
+ inst_RV_RV(INS_test, loSrcReg, loSrcReg, TYP_INT, EA_4BYTE);
+ genJumpToThrowHlpBlk(EJ_js, SCK_OVERFLOW);
+ }
+
+ inst_RV_RV(INS_test, hiSrcReg, hiSrcReg, TYP_INT, EA_4BYTE);
+ genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
+ }
+ }
+
+ if (dstReg != loSrcReg)
+ {
+ inst_RV_RV(INS_mov, dstReg, loSrcReg, TYP_INT, EA_4BYTE);
+ }
+
+ genProduceReg(cast);
+}
+#endif
+
//------------------------------------------------------------------------
// genIntToIntCast: Generate code for an integer cast
// This method handles integer overflow checking casts
@@ -7584,13 +6448,22 @@ void CodeGen::genIntToIntCast(GenTreePtr treeNode)
{
assert(treeNode->OperGet() == GT_CAST);
- GenTreePtr castOp = treeNode->gtCast.CastOp();
- regNumber targetReg = treeNode->gtRegNum;
- regNumber sourceReg = castOp->gtRegNum;
- var_types dstType = treeNode->CastToType();
- bool isUnsignedDst = varTypeIsUnsigned(dstType);
- var_types srcType = genActualType(castOp->TypeGet());
- bool isUnsignedSrc = varTypeIsUnsigned(srcType);
+ GenTreePtr castOp = treeNode->gtCast.CastOp();
+ var_types srcType = genActualType(castOp->TypeGet());
+
+#if !defined(_TARGET_64BIT_)
+ if (varTypeIsLong(srcType))
+ {
+ genLongToIntCast(treeNode);
+ return;
+ }
+#endif // !defined(_TARGET_64BIT_)
+
+ regNumber targetReg = treeNode->gtRegNum;
+ regNumber sourceReg = castOp->gtRegNum;
+ var_types dstType = treeNode->CastToType();
+ bool isUnsignedDst = varTypeIsUnsigned(dstType);
+ bool isUnsignedSrc = varTypeIsUnsigned(srcType);
// if necessary, force the srcType to unsigned when the GT_UNSIGNED flag is set
if (!isUnsignedSrc && (treeNode->gtFlags & GTF_UNSIGNED) != 0)
@@ -7948,7 +6821,7 @@ void CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
genConsumeOperands(treeNode->AsOp());
- if (srcType == dstType && targetReg == op1->gtRegNum)
+ if (srcType == dstType && (!op1->isContained() && (targetReg == op1->gtRegNum)))
{
// source and destinations types are the same and also reside in the same register.
// we just need to consume and produce the reg in this case.
@@ -7999,7 +6872,8 @@ void CodeGen::genIntToFloatCast(GenTreePtr treeNode)
assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
#if !defined(_TARGET_64BIT_)
- NYI_IF(varTypeIsLong(srcType), "Conversion from long to float");
+ // We expect morph to replace long to float/double casts with helper calls
+ noway_assert(!varTypeIsLong(srcType));
#endif // !defined(_TARGET_64BIT_)
// Since xarch emitter doesn't handle reporting gc-info correctly while casting away gc-ness we
@@ -8225,27 +7099,27 @@ void CodeGen::genCkfinite(GenTreePtr treeNode)
//
// For TYP_DOUBLE, we'll generate (for targetReg != op1->gtRegNum):
// movaps targetReg, op1->gtRegNum
- // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY
- // mov_xmm2i tmpReg, targetReg // tmpReg <= Y
+ // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY
+ // mov_xmm2i tmpReg, targetReg // tmpReg <= Y
// and tmpReg, <mask>
// cmp tmpReg, <mask>
// je <throw block>
// movaps targetReg, op1->gtRegNum // copy the value again, instead of un-shuffling it
//
// For TYP_DOUBLE with (targetReg == op1->gtRegNum):
- // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY
- // mov_xmm2i tmpReg, targetReg // tmpReg <= Y
+ // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY
+ // mov_xmm2i tmpReg, targetReg // tmpReg <= Y
// and tmpReg, <mask>
// cmp tmpReg, <mask>
// je <throw block>
- // shufps targetReg, targetReg, 0xB1 // ZWXY => WZYX
+ // shufps targetReg, targetReg, 0xB1 // ZWXY => WZYX
//
// For TYP_FLOAT, it's the same as _TARGET_64BIT_:
- // mov_xmm2i tmpReg, targetReg // tmpReg <= low 32 bits
+ // mov_xmm2i tmpReg, targetReg // tmpReg <= low 32 bits
// and tmpReg, <mask>
// cmp tmpReg, <mask>
// je <throw block>
- // movaps targetReg, op1->gtRegNum // only if targetReg != op1->gtRegNum
+ // movaps targetReg, op1->gtRegNum // only if targetReg != op1->gtRegNum
regNumber copyToTmpSrcReg; // The register we'll copy to the integer temp.
@@ -8613,7 +7487,7 @@ unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode)
#if FEATURE_FIXED_OUT_ARGS
baseVarNum = compiler->lvaOutgoingArgSpaceVar;
#else // !FEATURE_FIXED_OUT_ARGS
- NYI_X86("Stack args for x86/RyuJIT");
+ assert(!"No BaseVarForPutArgStk on x86");
baseVarNum = BAD_VAR_NUM;
#endif // !FEATURE_FIXED_OUT_ARGS
}
@@ -8621,8 +7495,74 @@ unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode)
return baseVarNum;
}
-//--------------------------------------------------------------------- //
-// genPutStructArgStk - generate code for passing an arg on the stack.
+#ifdef _TARGET_X86_
+//---------------------------------------------------------------------
+// adjustStackForPutArgStk:
+// adjust the stack pointer for a putArgStk node if necessary.
+//
+// Arguments:
+// putArgStk - the putArgStk node.
+//
+// Returns: true if the stack pointer was adjusted; false otherwise.
+//
+bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk)
+{
+#ifdef FEATURE_SIMD
+ if (varTypeIsSIMD(putArgStk))
+ {
+ const unsigned argSize = genTypeSize(putArgStk);
+ inst_RV_IV(INS_sub, REG_SPBASE, argSize, EA_PTRSIZE);
+ genStackLevel += argSize;
+ m_pushStkArg = false;
+ return true;
+ }
+#endif // FEATURE_SIMD
+
+ const unsigned argSize = putArgStk->getArgSize();
+
+ // If the gtPutArgStkKind is one of the push types, we do not pre-adjust the stack.
+ // This is set in Lowering, and is true if and only if:
+ // - This argument contains any GC pointers OR
+ // - It is a GT_FIELD_LIST OR
+ // - It is less than 16 bytes in size.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ switch (putArgStk->gtPutArgStkKind)
+ {
+ case GenTreePutArgStk::Kind::RepInstr:
+ case GenTreePutArgStk::Kind::Unroll:
+ assert((putArgStk->gtNumberReferenceSlots == 0) && (putArgStk->gtGetOp1()->OperGet() != GT_FIELD_LIST) &&
+ (argSize >= 16));
+ break;
+ case GenTreePutArgStk::Kind::Push:
+ case GenTreePutArgStk::Kind::PushAllSlots:
+ assert((putArgStk->gtNumberReferenceSlots != 0) || (putArgStk->gtGetOp1()->OperGet() == GT_FIELD_LIST) ||
+ (argSize < 16));
+ break;
+ case GenTreePutArgStk::Kind::Invalid:
+ default:
+ assert(!"Uninitialized GenTreePutArgStk::Kind");
+ break;
+ }
+#endif // DEBUG
+
+ if (putArgStk->isPushKind())
+ {
+ m_pushStkArg = true;
+ return false;
+ }
+ else
+ {
+ m_pushStkArg = false;
+ inst_RV_IV(INS_sub, REG_SPBASE, argSize, EA_PTRSIZE);
+ genStackLevel += argSize;
+ return true;
+ }
+}
+
+//---------------------------------------------------------------------
+// genPutArgStkFieldList - generate code for passing an arg on the stack.
//
// Arguments
// treeNode - the GT_PUTARG_STK node
@@ -8631,25 +7571,224 @@ unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode)
// Return value:
// None
//
-void CodeGen::genPutArgStk(GenTreePtr treeNode)
+void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
{
- var_types targetType = treeNode->TypeGet();
+ GenTreeFieldList* const fieldList = putArgStk->gtOp1->AsFieldList();
+ assert(fieldList != nullptr);
+
+ // Set m_pushStkArg and pre-adjust the stack if necessary.
+ const bool preAdjustedStack = genAdjustStackForPutArgStk(putArgStk);
+ // For now, we only support the "push" case; we will push a full slot for the first field of each slot
+ // within the struct.
+ assert((putArgStk->isPushKind()) && !preAdjustedStack && m_pushStkArg);
+
+ // If we have pre-adjusted the stack and are simply storing the fields in order) set the offset to 0.
+ // (Note that this mode is not currently being used.)
+ // If we are pushing the arguments (i.e. we have not pre-adjusted the stack), then we are pushing them
+ // in reverse order, so we start with the current field offset at the size of the struct arg (which must be
+ // a multiple of the target pointer size).
+ unsigned currentOffset = (preAdjustedStack) ? 0 : putArgStk->getArgSize();
+ unsigned prevFieldOffset = currentOffset;
+ regNumber tmpReg = REG_NA;
+ if (putArgStk->gtRsvdRegs != RBM_NONE)
+ {
+ assert(genCountBits(putArgStk->gtRsvdRegs) == 1);
+ tmpReg = genRegNumFromMask(putArgStk->gtRsvdRegs);
+ assert(genIsValidIntReg(tmpReg));
+ }
+ for (GenTreeFieldList* current = fieldList; current != nullptr; current = current->Rest())
+ {
+ GenTree* const fieldNode = current->Current();
+ const unsigned fieldOffset = current->gtFieldOffset;
+ var_types fieldType = current->gtFieldType;
+
+ // Long-typed nodes should have been handled by the decomposition pass, and lowering should have sorted the
+ // field list in descending order by offset.
+ assert(!varTypeIsLong(fieldType));
+ assert(fieldOffset <= prevFieldOffset);
+
+ // Consume the register, if any, for this field. Note that genConsumeRegs() will appropriately
+ // update the liveness info for a lclVar that has been marked RegOptional, which hasn't been
+ // assigned a register, and which is therefore contained.
+ // Unlike genConsumeReg(), it handles the case where no registers are being consumed.
+ genConsumeRegs(fieldNode);
+ regNumber argReg = fieldNode->isContainedSpillTemp() ? REG_NA : fieldNode->gtRegNum;
+
+ // If the field is slot-like, we can use a push instruction to store the entire register no matter the type.
+ //
+ // The GC encoder requires that the stack remain 4-byte aligned at all times. Round the adjustment up
+ // to the next multiple of 4. If we are going to generate a `push` instruction, the adjustment must
+ // not require rounding.
+ // NOTE: if the field is of GC type, we must use a push instruction, since the emitter is not otherwise
+ // able to detect stores into the outgoing argument area of the stack on x86.
+ const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevFieldOffset - fieldOffset) >= 4);
+ int adjustment = roundUp(currentOffset - fieldOffset, 4);
+ if (fieldIsSlot)
+ {
+ fieldType = genActualType(fieldType);
+ unsigned pushSize = genTypeSize(fieldType);
+ assert((pushSize % 4) == 0);
+ adjustment -= pushSize;
+ while (adjustment != 0)
+ {
+ inst_IV(INS_push, 0);
+ currentOffset -= pushSize;
+ genStackLevel += pushSize;
+ adjustment -= pushSize;
+ }
+ m_pushStkArg = true;
+ }
+ else
+ {
+ m_pushStkArg = false;
+ // We always "push" floating point fields (i.e. they are full slot values that don't
+ // require special handling).
+ assert(varTypeIsIntegralOrI(fieldNode));
+ // If we can't push this field, it needs to be in a register so that we can store
+ // it to the stack location.
+ assert(tmpReg != REG_NA);
+ if (adjustment != 0)
+ {
+ // This moves the stack pointer to fieldOffset.
+ // For this case, we must adjust the stack and generate stack-relative stores rather than pushes.
+ // Adjust the stack pointer to the next slot boundary.
+ inst_RV_IV(INS_sub, REG_SPBASE, adjustment, EA_PTRSIZE);
+ currentOffset -= adjustment;
+ genStackLevel += adjustment;
+ }
+
+ // Does it need to be in a byte register?
+ // If so, we'll use tmpReg, which must have been allocated as a byte register.
+ // If it's already in a register, but not a byteable one, then move it.
+ if (varTypeIsByte(fieldType) && ((argReg == REG_NA) || ((genRegMask(argReg) & RBM_BYTE_REGS) == 0)))
+ {
+ noway_assert((genRegMask(tmpReg) & RBM_BYTE_REGS) != 0);
+ if (argReg != REG_NA)
+ {
+ inst_RV_RV(INS_mov, tmpReg, argReg, fieldType);
+ argReg = tmpReg;
+ }
+ }
+ }
+
+ if (argReg == REG_NA)
+ {
+ if (m_pushStkArg)
+ {
+ if (fieldNode->isContainedSpillTemp())
+ {
+ assert(fieldNode->IsRegOptional());
+ TempDsc* tmp = getSpillTempDsc(fieldNode);
+ getEmitter()->emitIns_S(INS_push, emitActualTypeSize(fieldNode->TypeGet()), tmp->tdTempNum(), 0);
+ compiler->tmpRlsTemp(tmp);
+ }
+ else
+ {
+ assert(varTypeIsIntegralOrI(fieldNode));
+ switch (fieldNode->OperGet())
+ {
+ case GT_LCL_VAR:
+ inst_TT(INS_push, fieldNode, 0, 0, emitActualTypeSize(fieldNode->TypeGet()));
+ break;
+ case GT_CNS_INT:
+ if (fieldNode->IsIconHandle())
+ {
+ inst_IV_handle(INS_push, fieldNode->gtIntCon.gtIconVal);
+ }
+ else
+ {
+ inst_IV(INS_push, fieldNode->gtIntCon.gtIconVal);
+ }
+ break;
+ default:
+ unreached();
+ }
+ }
+ currentOffset -= TARGET_POINTER_SIZE;
+ genStackLevel += TARGET_POINTER_SIZE;
+ }
+ else
+ {
+ // The stack has been adjusted and we will load the field to tmpReg and then store it on the stack.
+ assert(varTypeIsIntegralOrI(fieldNode));
+ switch (fieldNode->OperGet())
+ {
+ case GT_LCL_VAR:
+ inst_RV_TT(INS_mov, tmpReg, fieldNode);
+ break;
+ case GT_CNS_INT:
+ genSetRegToConst(tmpReg, fieldNode->TypeGet(), fieldNode);
+ break;
+ default:
+ unreached();
+ }
+ genStoreRegToStackArg(fieldType, tmpReg, fieldOffset - currentOffset);
+ }
+ }
+ else
+ {
+ genStoreRegToStackArg(fieldType, argReg, fieldOffset - currentOffset);
+ if (m_pushStkArg)
+ {
+ // We always push a slot-rounded size
+ currentOffset -= genTypeSize(fieldType);
+ }
+ }
+
+ prevFieldOffset = fieldOffset;
+ }
+ if (currentOffset != 0)
+ {
+ // We don't expect padding at the beginning of a struct, but it could happen with explicit layout.
+ inst_RV_IV(INS_sub, REG_SPBASE, currentOffset, EA_PTRSIZE);
+ genStackLevel += currentOffset;
+ }
+}
+#endif // _TARGET_X86_
+
+//---------------------------------------------------------------------
+// genPutArgStk - generate code for passing an arg on the stack.
+//
+// Arguments
+// treeNode - the GT_PUTARG_STK node
+// targetType - the type of the treeNode
+//
+// Return value:
+// None
+//
+void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
+{
+ var_types targetType = putArgStk->TypeGet();
+
#ifdef _TARGET_X86_
- noway_assert(targetType != TYP_STRUCT);
+
+#ifdef FEATURE_SIMD
+ if (targetType == TYP_SIMD12)
+ {
+ genPutArgStkSIMD12(putArgStk);
+ return;
+ }
+#endif // FEATURE_SIMD
+
+ if (varTypeIsStruct(targetType))
+ {
+ (void)genAdjustStackForPutArgStk(putArgStk);
+ genPutStructArgStk(putArgStk);
+ return;
+ }
// The following logic is applicable for x86 arch.
- assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
+ assert(!varTypeIsFloating(targetType) || (targetType == putArgStk->gtOp1->TypeGet()));
- GenTreePtr data = treeNode->gtOp.gtOp1;
+ GenTreePtr data = putArgStk->gtOp1;
// On a 32-bit target, all of the long arguments have been decomposed into
// a separate putarg_stk for each of the upper and lower halves.
noway_assert(targetType != TYP_LONG);
- int argSize = genTypeSize(genActualType(targetType));
- genStackLevel += argSize;
+ const unsigned argSize = putArgStk->getArgSize();
+ assert((argSize % TARGET_POINTER_SIZE) == 0);
- // TODO-Cleanup: Handle this in emitInsMov() in emitXArch.cpp?
if (data->isContainedIntOrIImmed())
{
if (data->IsIconHandle())
@@ -8660,53 +7799,50 @@ void CodeGen::genPutArgStk(GenTreePtr treeNode)
{
inst_IV(INS_push, data->gtIntCon.gtIconVal);
}
+ genStackLevel += argSize;
}
- else if (data->isContained())
+ else if (data->OperGet() == GT_FIELD_LIST)
{
- NYI_X86("Contained putarg_stk of non-constant");
+ genPutArgStkFieldList(putArgStk);
}
else
{
+ // We should not see any contained nodes that are not immediates.
+ assert(!data->isContained());
genConsumeReg(data);
- if (varTypeIsIntegralOrI(targetType))
- {
- inst_RV(INS_push, data->gtRegNum, targetType);
- }
- else
- {
- // Decrement SP.
- inst_RV_IV(INS_sub, REG_SPBASE, argSize, emitActualTypeSize(TYP_I_IMPL));
- getEmitter()->emitIns_AR_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, REG_SPBASE, 0);
- }
+ genPushReg(targetType, data->gtRegNum);
}
#else // !_TARGET_X86_
{
- unsigned baseVarNum = getBaseVarForPutArgStk(treeNode);
+ unsigned baseVarNum = getBaseVarForPutArgStk(putArgStk);
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
if (varTypeIsStruct(targetType))
{
- genPutStructArgStk(treeNode, baseVarNum);
+ m_stkArgVarNum = baseVarNum;
+ m_stkArgOffset = putArgStk->getArgOffset();
+ genPutStructArgStk(putArgStk);
+ m_stkArgVarNum = BAD_VAR_NUM;
return;
}
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
noway_assert(targetType != TYP_STRUCT);
- assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
+ assert(!varTypeIsFloating(targetType) || (targetType == putArgStk->gtOp1->TypeGet()));
// Get argument offset on stack.
// Here we cross check that argument offset hasn't changed from lowering to codegen since
// we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
- int argOffset = treeNode->AsPutArgStk()->getArgOffset();
+ int argOffset = putArgStk->getArgOffset();
#ifdef DEBUG
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode);
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(putArgStk->gtCall, putArgStk);
assert(curArgTabEntry);
assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE);
#endif
- GenTreePtr data = treeNode->gtGetOp1();
+ GenTreePtr data = putArgStk->gtOp1;
if (data->isContained())
{
@@ -8723,7 +7859,125 @@ void CodeGen::genPutArgStk(GenTreePtr treeNode)
#endif // !_TARGET_X86_
}
-#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#ifdef _TARGET_X86_
+// genPushReg: Push a register value onto the stack and adjust the stack level
+//
+// Arguments:
+// type - the type of value to be stored
+// reg - the register containing the value
+//
+// Notes:
+// For TYP_LONG, the srcReg must be a floating point register.
+// Otherwise, the register type must be consistent with the given type.
+//
+void CodeGen::genPushReg(var_types type, regNumber srcReg)
+{
+ unsigned size = genTypeSize(type);
+ if (varTypeIsIntegralOrI(type) && type != TYP_LONG)
+ {
+ assert(genIsValidIntReg(srcReg));
+ inst_RV(INS_push, srcReg, type);
+ }
+ else
+ {
+ instruction ins;
+ emitAttr attr = emitTypeSize(type);
+ if (type == TYP_LONG)
+ {
+ // On x86, the only way we can push a TYP_LONG from a register is if it is in an xmm reg.
+ // This is only used when we are pushing a struct from memory to memory, and basically is
+ // handling an 8-byte "chunk", as opposed to strictly a long type.
+ ins = INS_movq;
+ }
+ else
+ {
+ ins = ins_Store(type);
+ }
+ assert(genIsValidFloatReg(srcReg));
+ inst_RV_IV(INS_sub, REG_SPBASE, size, EA_PTRSIZE);
+ getEmitter()->emitIns_AR_R(ins, attr, srcReg, REG_SPBASE, 0);
+ }
+ genStackLevel += size;
+}
+#endif // _TARGET_X86_
+
+#if defined(FEATURE_PUT_STRUCT_ARG_STK)
+// genStoreRegToStackArg: Store a register value into the stack argument area
+//
+// Arguments:
+// type - the type of value to be stored
+// reg - the register containing the value
+// offset - the offset from the base (see Assumptions below)
+//
+// Notes:
+// A type of TYP_STRUCT instructs this method to store a 16-byte chunk
+// at the given offset (i.e. not the full struct).
+//
+// Assumptions:
+// The caller must set the context appropriately before calling this method:
+// - On x64, m_stkArgVarNum must be set according to whether this is a regular or tail call.
+// - On x86, the caller must set m_pushStkArg if this method should push the argument.
+// Otherwise, the argument is stored at the given offset from sp.
+//
+// TODO: In the below code the load and store instructions are for 16 bytes, but the
+// type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but
+// this probably needs to be changed.
+//
+void CodeGen::genStoreRegToStackArg(var_types type, regNumber srcReg, int offset)
+{
+ assert(srcReg != REG_NA);
+ instruction ins;
+ emitAttr attr;
+ unsigned size;
+
+ if (type == TYP_STRUCT)
+ {
+ ins = INS_movdqu;
+ // This should be changed!
+ attr = EA_8BYTE;
+ size = 16;
+ }
+ else
+ {
+#ifdef FEATURE_SIMD
+ if (varTypeIsSIMD(type))
+ {
+ assert(genIsValidFloatReg(srcReg));
+ ins = ins_Store(type); // TODO-CQ: pass 'aligned' correctly
+ }
+ else
+#endif // FEATURE_SIMD
+#ifdef _TARGET_X86_
+ if (type == TYP_LONG)
+ {
+ assert(genIsValidFloatReg(srcReg));
+ ins = INS_movq;
+ }
+ else
+#endif // _TARGET_X86_
+ {
+ assert((varTypeIsFloating(type) && genIsValidFloatReg(srcReg)) ||
+ (varTypeIsIntegralOrI(type) && genIsValidIntReg(srcReg)));
+ ins = ins_Store(type);
+ }
+ attr = emitTypeSize(type);
+ size = genTypeSize(type);
+ }
+
+#ifdef _TARGET_X86_
+ if (m_pushStkArg)
+ {
+ genPushReg(type, srcReg);
+ }
+ else
+ {
+ getEmitter()->emitIns_AR_R(ins, attr, srcReg, REG_SPBASE, offset);
+ }
+#else // !_TARGET_X86_
+ assert(m_stkArgVarNum != BAD_VAR_NUM);
+ getEmitter()->emitIns_S_R(ins, attr, srcReg, m_stkArgVarNum, m_stkArgOffset + offset);
+#endif // !_TARGET_X86_
+}
//---------------------------------------------------------------------
// genPutStructArgStk - generate code for copying a struct arg on the stack by value.
@@ -8731,42 +7985,39 @@ void CodeGen::genPutArgStk(GenTreePtr treeNode)
// it generates the gcinfo as well.
//
// Arguments
-// treeNode - the GT_PUTARG_STK node
-// baseVarNum - the variable number relative to which to put the argument on the stack.
-// For tail calls this is the baseVarNum = 0.
-// For non tail calls this is the outgoingArgSpace.
-//
-// Return value:
-// None
+// putArgStk - the GT_PUTARG_STK node
//
-void CodeGen::genPutStructArgStk(GenTreePtr treeNode, unsigned baseVarNum)
+// Notes:
+// In the case of fixed out args, the caller must have set m_stkArgVarNum to the variable number
+// corresponding to the argument area (where we will put the argument on the stack).
+// For tail calls this is the baseVarNum = 0.
+// For non tail calls this is the outgoingArgSpace.
+void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
{
- assert(treeNode->OperGet() == GT_PUTARG_STK);
- assert(baseVarNum != BAD_VAR_NUM);
-
- var_types targetType = treeNode->TypeGet();
+ var_types targetType = putArgStk->TypeGet();
if (varTypeIsSIMD(targetType))
{
- regNumber srcReg = genConsumeReg(treeNode->gtGetOp1());
+ regNumber srcReg = genConsumeReg(putArgStk->gtGetOp1());
assert((srcReg != REG_NA) && (genIsValidFloatReg(srcReg)));
- getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), srcReg, baseVarNum,
- treeNode->AsPutArgStk()->getArgOffset());
+ genStoreRegToStackArg(targetType, srcReg, 0);
return;
}
assert(targetType == TYP_STRUCT);
- GenTreePutArgStk* putArgStk = treeNode->AsPutArgStk();
if (putArgStk->gtNumberReferenceSlots == 0)
{
switch (putArgStk->gtPutArgStkKind)
{
- case GenTreePutArgStk::PutArgStkKindRepInstr:
- genStructPutArgRepMovs(putArgStk, baseVarNum);
+ case GenTreePutArgStk::Kind::RepInstr:
+ genStructPutArgRepMovs(putArgStk);
break;
- case GenTreePutArgStk::PutArgStkKindUnroll:
- genStructPutArgUnroll(putArgStk, baseVarNum);
+ case GenTreePutArgStk::Kind::Unroll:
+ genStructPutArgUnroll(putArgStk);
+ break;
+ case GenTreePutArgStk::Kind::Push:
+ genStructPutArgUnroll(putArgStk);
break;
default:
unreached();
@@ -8775,108 +8026,150 @@ void CodeGen::genPutStructArgStk(GenTreePtr treeNode, unsigned baseVarNum)
else
{
// No need to disable GC the way COPYOBJ does. Here the refs are copied in atomic operations always.
+ CLANG_FORMAT_COMMENT_ANCHOR;
- // Consume these registers.
- // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
- genConsumePutStructArgStk(putArgStk, REG_RDI, REG_RSI, REG_NA, baseVarNum);
- GenTreePtr dstAddr = putArgStk;
- GenTreePtr src = putArgStk->gtOp.gtOp1;
- assert(src->OperGet() == GT_OBJ);
- GenTreePtr srcAddr = src->gtGetOp1();
+#ifdef _TARGET_X86_
+ // On x86, any struct that has contains GC references must be stored to the stack using `push` instructions so
+ // that the emitter properly detects the need to update the method's GC information.
+ //
+ // Strictly speaking, it is only necessary to use `push` to store the GC references themselves, so for structs
+ // with large numbers of consecutive non-GC-ref-typed fields, we may be able to improve the code size in the
+ // future.
+ assert(m_pushStkArg);
- unsigned slots = putArgStk->gtNumSlots;
+ GenTree* srcAddr = putArgStk->gtGetOp1()->gtGetOp1();
+ BYTE* gcPtrs = putArgStk->gtGcPtrs;
+ const unsigned numSlots = putArgStk->gtNumSlots;
- // We are always on the stack we don't need to use the write barrier.
- BYTE* gcPtrs = putArgStk->gtGcPtrs;
- unsigned gcPtrCount = putArgStk->gtNumberReferenceSlots;
+ regNumber srcRegNum = srcAddr->gtRegNum;
+ const bool srcAddrInReg = srcRegNum != REG_NA;
- unsigned i = 0;
- unsigned copiedSlots = 0;
- while (i < slots)
+ unsigned srcLclNum = 0;
+ unsigned srcLclOffset = 0;
+ if (srcAddrInReg)
{
- switch (gcPtrs[i])
+ genConsumeReg(srcAddr);
+ }
+ else
+ {
+ assert(srcAddr->OperIsLocalAddr());
+
+ srcLclNum = srcAddr->AsLclVarCommon()->gtLclNum;
+ if (srcAddr->OperGet() == GT_LCL_FLD_ADDR)
{
- case TYPE_GC_NONE:
- // Let's see if we can use rep movsq instead of a sequence of movsq instructions
- // to save cycles and code size.
- {
- unsigned nonGcSlotCount = 0;
+ srcLclOffset = srcAddr->AsLclFld()->gtLclOffs;
+ }
+ }
- do
- {
- nonGcSlotCount++;
- i++;
- } while (i < slots && gcPtrs[i] == TYPE_GC_NONE);
+ for (int i = numSlots - 1; i >= 0; --i)
+ {
+ emitAttr slotAttr;
+ if (gcPtrs[i] == TYPE_GC_NONE)
+ {
+ slotAttr = EA_4BYTE;
+ }
+ else if (gcPtrs[i] == TYPE_GC_REF)
+ {
+ slotAttr = EA_GCREF;
+ }
+ else
+ {
+ assert(gcPtrs[i] == TYPE_GC_BYREF);
+ slotAttr = EA_BYREF;
+ }
- // If we have a very small contiguous non-gc region, it's better just to
- // emit a sequence of movsq instructions
- if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
- {
- copiedSlots += nonGcSlotCount;
- while (nonGcSlotCount > 0)
- {
- instGen(INS_movsq);
- nonGcSlotCount--;
- }
- }
- else
- {
- getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
- copiedSlots += nonGcSlotCount;
- instGen(INS_r_movsq);
- }
- }
- break;
+ const unsigned offset = i * 4;
+ if (srcAddrInReg)
+ {
+ getEmitter()->emitIns_AR_R(INS_push, slotAttr, REG_NA, srcRegNum, offset);
+ }
+ else
+ {
+ getEmitter()->emitIns_S(INS_push, slotAttr, srcLclNum, srcLclOffset + offset);
+ }
+ genStackLevel += 4;
+ }
+#else // !defined(_TARGET_X86_)
+
+ // Consume these registers.
+ // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
+ genConsumePutStructArgStk(putArgStk, REG_RDI, REG_RSI, REG_NA);
+
+ const bool srcIsLocal = putArgStk->gtOp1->AsObj()->gtOp1->OperIsLocalAddr();
+ const emitAttr srcAddrAttr = srcIsLocal ? EA_PTRSIZE : EA_BYREF;
+
+#if DEBUG
+ unsigned numGCSlotsCopied = 0;
+#endif // DEBUG
+
+ BYTE* gcPtrs = putArgStk->gtGcPtrs;
+ const unsigned numSlots = putArgStk->gtNumSlots;
+ for (unsigned i = 0; i < numSlots;)
+ {
+ if (gcPtrs[i] == TYPE_GC_NONE)
+ {
+ // Let's see if we can use rep movsp (alias for movsd or movsq for 32 and 64 bits respectively)
+ // instead of a sequence of movsp instructions to save cycles and code size.
+ unsigned adjacentNonGCSlotCount = 0;
+ do
+ {
+ adjacentNonGCSlotCount++;
+ i++;
+ } while ((i < numSlots) && (gcPtrs[i] == TYPE_GC_NONE));
- case TYPE_GC_REF: // Is an object ref
- case TYPE_GC_BYREF: // Is an interior pointer - promote it but don't scan it
+ // If we have a very small contiguous non-ref region, it's better just to
+ // emit a sequence of movsp instructions
+ if (adjacentNonGCSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
{
- // We have a GC (byref or ref) pointer
- // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movsq instruction,
- // but the logic for emitting a GC info record is not available (it is internal for the emitter
- // only.) See emitGCVarLiveUpd function. If we could call it separately, we could do
- // instGen(INS_movsq); and emission of gc info.
-
- var_types memType;
- if (gcPtrs[i] == TYPE_GC_REF)
- {
- memType = TYP_REF;
- }
- else
+ for (; adjacentNonGCSlotCount > 0; adjacentNonGCSlotCount--)
{
- assert(gcPtrs[i] == TYPE_GC_BYREF);
- memType = TYP_BYREF;
+ instGen(INS_movsp);
}
+ }
+ else
+ {
+ getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, adjacentNonGCSlotCount);
+ instGen(INS_r_movsp);
+ }
+ }
+ else
+ {
+ assert((gcPtrs[i] == TYPE_GC_REF) || (gcPtrs[i] == TYPE_GC_BYREF));
+
+ // We have a GC (byref or ref) pointer
+ // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movsp instruction,
+ // but the logic for emitting a GC info record is not available (it is internal for the emitter
+ // only.) See emitGCVarLiveUpd function. If we could call it separately, we could do
+ // instGen(INS_movsp); and emission of gc info.
- getEmitter()->emitIns_R_AR(ins_Load(memType), emitTypeSize(memType), REG_RCX, REG_RSI, 0);
- getEmitter()->emitIns_S_R(ins_Store(memType), emitTypeSize(memType), REG_RCX, baseVarNum,
- ((copiedSlots + putArgStk->gtSlotNum) * TARGET_POINTER_SIZE));
+ var_types memType = (gcPtrs[i] == TYPE_GC_REF) ? TYP_REF : TYP_BYREF;
+ getEmitter()->emitIns_R_AR(ins_Load(memType), emitTypeSize(memType), REG_RCX, REG_RSI, 0);
+ genStoreRegToStackArg(memType, REG_RCX, i * TARGET_POINTER_SIZE);
+
+#ifdef DEBUG
+ numGCSlotsCopied++;
+#endif // DEBUG
+ i++;
+ if (i < numSlots)
+ {
// Source for the copy operation.
// If a LocalAddr, use EA_PTRSIZE - copy from stack.
// If not a LocalAddr, use EA_BYREF - the source location is not on the stack.
- getEmitter()->emitIns_R_I(INS_add, ((src->OperIsLocalAddr()) ? EA_PTRSIZE : EA_BYREF), REG_RSI,
- TARGET_POINTER_SIZE);
+ getEmitter()->emitIns_R_I(INS_add, srcAddrAttr, REG_RSI, TARGET_POINTER_SIZE);
// Always copying to the stack - outgoing arg area
// (or the outgoing arg area of the caller for a tail call) - use EA_PTRSIZE.
getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_RDI, TARGET_POINTER_SIZE);
- copiedSlots++;
- gcPtrCount--;
- i++;
}
- break;
-
- default:
- unreached();
- break;
}
}
- assert(gcPtrCount == 0);
+ assert(numGCSlotsCopied == putArgStk->gtNumberReferenceSlots);
+#endif // _TARGET_X86_
}
}
-#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#endif // defined(FEATURE_PUT_STRUCT_ARG_STK)
/*****************************************************************************
*
@@ -9043,7 +8336,7 @@ void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize,
return infoPtr;
}
-#else // !JIT32_GCENCODER
+#else // !JIT32_GCENCODER
void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
{
IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
@@ -9061,7 +8354,6 @@ void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize
// Now we can actually use those slot ID's to declare live ranges.
gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
-#if defined(DEBUGGING_SUPPORT)
if (compiler->opts.compDbgEnC)
{
// what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp)
@@ -9088,7 +8380,6 @@ void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize
// frame
gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
}
-#endif
gcInfoEncoder->Build();
@@ -9203,18 +8494,33 @@ void CodeGen::genStoreLongLclVar(GenTree* treeNode)
assert(varDsc->TypeGet() == TYP_LONG);
assert(!varDsc->lvPromoted);
GenTreePtr op1 = treeNode->gtOp.gtOp1;
- noway_assert(op1->OperGet() == GT_LONG);
+ noway_assert(op1->OperGet() == GT_LONG || op1->OperGet() == GT_MUL_LONG);
genConsumeRegs(op1);
- // Definitions of register candidates will have been lowered to 2 int lclVars.
- assert(!treeNode->InReg());
+ if (op1->OperGet() == GT_LONG)
+ {
+ // Definitions of register candidates will have been lowered to 2 int lclVars.
+ assert(!treeNode->InReg());
+
+ GenTreePtr loVal = op1->gtGetOp1();
+ GenTreePtr hiVal = op1->gtGetOp2();
+
+ // NYI: Contained immediates.
+ NYI_IF((loVal->gtRegNum == REG_NA) || (hiVal->gtRegNum == REG_NA),
+ "Store of long lclVar with contained immediate");
+
+ emit->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, loVal->gtRegNum, lclNum, 0);
+ emit->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, hiVal->gtRegNum, lclNum, genTypeSize(TYP_INT));
+ }
+ else if (op1->OperGet() == GT_MUL_LONG)
+ {
+ assert((op1->gtFlags & GTF_MUL_64RSLT) != 0);
- GenTreePtr loVal = op1->gtGetOp1();
- GenTreePtr hiVal = op1->gtGetOp2();
- // NYI: Contained immediates.
- NYI_IF((loVal->gtRegNum == REG_NA) || (hiVal->gtRegNum == REG_NA), "Store of long lclVar with contained immediate");
- emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, loVal->gtRegNum, lclNum, 0);
- emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, hiVal->gtRegNum, lclNum, genTypeSize(TYP_INT));
+ // Stack store
+ getEmitter()->emitIns_S_R(ins_Store(TYP_INT), emitTypeSize(TYP_INT), REG_LNGRET_LO, lclNum, 0);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_INT), emitTypeSize(TYP_INT), REG_LNGRET_HI, lclNum,
+ genTypeSize(TYP_INT));
+ }
}
#endif // !defined(_TARGET_64BIT_)
@@ -9332,57 +8638,6 @@ void CodeGen::genAmd64EmitterUnitTests()
#endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
-/*****************************************************************************/
-#ifdef DEBUGGING_SUPPORT
-/*****************************************************************************
- * genSetScopeInfo
- *
- * Called for every scope info piece to record by the main genSetScopeInfo()
- */
-
-void CodeGen::genSetScopeInfo(unsigned which,
- UNATIVE_OFFSET startOffs,
- UNATIVE_OFFSET length,
- unsigned varNum,
- unsigned LVnum,
- bool avail,
- Compiler::siVarLoc& varLoc)
-{
- /* We need to do some mapping while reporting back these variables */
-
- unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
- noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
-
- VarName name = nullptr;
-
-#ifdef DEBUG
-
- for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
- {
- if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
- {
- name = compiler->info.compVarScopes[scopeNum].vsdName;
- }
- }
-
- // Hang on to this compiler->info.
-
- TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
-
- tlvi.tlviVarNum = ilVarNum;
- tlvi.tlviLVnum = LVnum;
- tlvi.tlviName = name;
- tlvi.tlviStartPC = startOffs;
- tlvi.tlviLength = length;
- tlvi.tlviAvailable = avail;
- tlvi.tlviVarLoc = varLoc;
-
-#endif // DEBUG
-
- compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
-}
-#endif // DEBUGGING_SUPPORT
-
#endif // _TARGET_AMD64_
#endif // !LEGACY_BACKEND
diff --git a/src/jit/compatjit/.gitmirror b/src/jit/compatjit/.gitmirror
new file mode 100644
index 0000000000..f507630f94
--- /dev/null
+++ b/src/jit/compatjit/.gitmirror
@@ -0,0 +1 @@
+Only contents of this folder, excluding subfolders, will be mirrored by the Git-TFS Mirror. \ No newline at end of file
diff --git a/src/jit/compatjit/CMakeLists.txt b/src/jit/compatjit/CMakeLists.txt
new file mode 100644
index 0000000000..1e0615e431
--- /dev/null
+++ b/src/jit/compatjit/CMakeLists.txt
@@ -0,0 +1,66 @@
+project(compatjit)
+
+# This compatjit.dll is only built if we are not building JIT32 as compatjit.dll.
+# It is the same build as legacyjit.dll, just with a different name, and not
+# built as an altjit.
+
+add_definitions(-DLEGACY_BACKEND)
+
+add_definitions(-DFEATURE_NO_HOST)
+add_definitions(-DSELF_NO_HOST)
+add_definitions(-DFEATURE_READYTORUN_COMPILER)
+remove_definitions(-DFEATURE_MERGE_JIT_AND_ENGINE)
+
+# No SIMD in legacy back-end.
+remove_definitions(-DFEATURE_SIMD)
+remove_definitions(-DFEATURE_AVX_SUPPORT)
+
+if(WIN32)
+ add_definitions(-DFX_VER_INTERNALNAME_STR=compatjit.dll)
+endif(WIN32)
+
+add_library_clr(compatjit
+ SHARED
+ ${SHARED_LIB_SOURCES}
+)
+
+add_dependencies(compatjit jit_exports)
+
+set_property(TARGET compatjit APPEND_STRING PROPERTY LINK_FLAGS ${JIT_EXPORTS_LINKER_OPTION})
+set_property(TARGET compatjit APPEND_STRING PROPERTY LINK_DEPENDS ${JIT_EXPORTS_FILE})
+
+set(RYUJIT_LINK_LIBRARIES
+ utilcodestaticnohost
+ gcinfo
+)
+
+if(CLR_CMAKE_PLATFORM_UNIX)
+ list(APPEND RYUJIT_LINK_LIBRARIES
+ mscorrc_debug
+ coreclrpal
+ palrt
+ )
+else()
+ list(APPEND RYUJIT_LINK_LIBRARIES
+ ${STATIC_MT_CRT_LIB}
+ ${STATIC_MT_VCRT_LIB}
+ kernel32.lib
+ advapi32.lib
+ ole32.lib
+ oleaut32.lib
+ uuid.lib
+ user32.lib
+ version.lib
+ shlwapi.lib
+ bcrypt.lib
+ crypt32.lib
+ RuntimeObject.lib
+ )
+endif(CLR_CMAKE_PLATFORM_UNIX)
+
+target_link_libraries(compatjit
+ ${RYUJIT_LINK_LIBRARIES}
+)
+
+# add the install targets
+install_clr(compatjit)
diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp
index afbecdfc60..114847c0d0 100644
--- a/src/jit/compiler.cpp
+++ b/src/jit/compiler.cpp
@@ -48,6 +48,60 @@ bool Compiler::s_pAltJitExcludeAssembliesListInitialized = false;
AssemblyNamesList2* Compiler::s_pAltJitExcludeAssembliesList = nullptr;
#endif // ALT_JIT
+/*****************************************************************************
+ *
+ * Little helpers to grab the current cycle counter value; this is done
+ * differently based on target architecture, host toolchain, etc. The
+ * main thing is to keep the overhead absolutely minimal; in fact, on
+ * x86/x64 we use RDTSC even though it's not thread-safe; GetThreadCycles
+ * (which is monotonous) is just too expensive.
+ */
+#ifdef FEATURE_JIT_METHOD_PERF
+
+#if defined(_HOST_X86_) || defined(_HOST_AMD64_)
+
+#if defined(_MSC_VER)
+
+#include <intrin.h>
+inline bool _our_GetThreadCycles(unsigned __int64* cycleOut)
+{
+ *cycleOut = __rdtsc();
+ return true;
+}
+
+#elif defined(__clang__)
+
+inline bool _our_GetThreadCycles(unsigned __int64* cycleOut)
+{
+ uint64_t cycles;
+ asm volatile("rdtsc" : "=A"(cycles));
+ *cycleOut = cycles;
+ return true;
+}
+
+#else // neither _MSC_VER nor __clang__
+
+// The following *might* work - might as well try.
+#define _our_GetThreadCycles(cp) GetThreadCycles(cp)
+
+#endif
+
+#elif defined(_HOST_ARM_) || defined(_HOST_ARM64_)
+
+// If this doesn't work please see ../gc/gc.cpp for additional ARM
+// info (and possible solutions).
+#define _our_GetThreadCycles(cp) GetThreadCycles(cp)
+
+#else // not x86/x64 and not ARM
+
+// Don't know what this target is, but let's give it a try; if
+// someone really wants to make this work, please add the right
+// code here.
+#define _our_GetThreadCycles(cp) GetThreadCycles(cp)
+
+#endif // which host OS
+
+#endif // FEATURE_JIT_METHOD_PERF
/*****************************************************************************/
inline unsigned getCurTime()
{
@@ -147,8 +201,6 @@ void Compiler::compDspSrcLinesByLineNum(unsigned line, bool seek)
void Compiler::compDspSrcLinesByNativeIP(UNATIVE_OFFSET curIP)
{
-#ifdef DEBUGGING_SUPPORT
-
static IPmappingDsc* nextMappingDsc;
static unsigned lastLine;
@@ -203,8 +255,6 @@ void Compiler::compDspSrcLinesByNativeIP(UNATIVE_OFFSET curIP)
nextMappingDsc = nextMappingDsc->ipmdNext;
}
}
-
-#endif
}
/*****************************************************************************/
@@ -232,6 +282,15 @@ unsigned genTreeNsizHistBuckets[] = {1000, 5000, 10000, 50000, 100000, 500000,
Histogram genTreeNsizHist(HostAllocator::getHostAllocator(), genTreeNsizHistBuckets);
#endif // MEASURE_NODE_SIZE
+/*****************************************************************************/
+#if MEASURE_MEM_ALLOC
+
+unsigned memSizeHistBuckets[] = {20, 50, 75, 100, 150, 250, 500, 1000, 5000, 0};
+Histogram memAllocHist(HostAllocator::getHostAllocator(), memSizeHistBuckets);
+Histogram memUsedHist(HostAllocator::getHostAllocator(), memSizeHistBuckets);
+
+#endif // MEASURE_MEM_ALLOC
+
/*****************************************************************************
*
* Variables to keep track of total code amounts.
@@ -475,7 +534,7 @@ bool Compiler::isSingleFloat32Struct(CORINFO_CLASS_HANDLE clsHnd)
for (;;)
{
// all of class chain must be of value type and must have only one field
- if (!info.compCompHnd->isValueClass(clsHnd) && info.compCompHnd->getClassNumInstanceFields(clsHnd) != 1)
+ if (!info.compCompHnd->isValueClass(clsHnd) || info.compCompHnd->getClassNumInstanceFields(clsHnd) != 1)
{
return false;
}
@@ -1101,14 +1160,11 @@ size_t genFlowNodeCnt;
#ifdef DEBUG
/* static */
unsigned Compiler::s_compMethodsCount = 0; // to produce unique label names
-
-/* static */
-bool Compiler::s_dspMemStats = false;
#endif
-#ifndef DEBUGGING_SUPPORT
+#if MEASURE_MEM_ALLOC
/* static */
-const bool Compiler::Options::compDbgCode = false;
+bool Compiler::s_dspMemStats = false;
#endif
#ifndef PROFILING_SUPPORTED
@@ -1184,18 +1240,22 @@ void Compiler::compShutdown()
}
#endif
+#if NODEBASH_STATS
+ GenTree::ReportOperBashing(jitstdout);
+#endif
+
// Where should we write our statistics output?
FILE* fout = jitstdout;
#ifdef FEATURE_JIT_METHOD_PERF
- if (compJitTimeLogFilename != NULL)
+ if (compJitTimeLogFilename != nullptr)
{
- // I assume that this will return NULL if it fails for some reason, and
- // that...
FILE* jitTimeLogFile = _wfopen(compJitTimeLogFilename, W("a"));
- // ...Print will return silently with a NULL argument.
- CompTimeSummaryInfo::s_compTimeSummary.Print(jitTimeLogFile);
- fclose(jitTimeLogFile);
+ if (jitTimeLogFile != nullptr)
+ {
+ CompTimeSummaryInfo::s_compTimeSummary.Print(jitTimeLogFile);
+ fclose(jitTimeLogFile);
+ }
}
#endif // FEATURE_JIT_METHOD_PERF
@@ -1214,6 +1274,63 @@ void Compiler::compShutdown()
}
#endif // COUNT_RANGECHECKS
+#if COUNT_AST_OPERS
+
+ // Add up all the counts so that we can show percentages of total
+ unsigned gtc = 0;
+ for (unsigned op = 0; op < GT_COUNT; op++)
+ gtc += GenTree::s_gtNodeCounts[op];
+
+ if (gtc > 0)
+ {
+ unsigned rem_total = gtc;
+ unsigned rem_large = 0;
+ unsigned rem_small = 0;
+
+ unsigned tot_large = 0;
+ unsigned tot_small = 0;
+
+ fprintf(fout, "\nGenTree operator counts (approximate):\n\n");
+
+ for (unsigned op = 0; op < GT_COUNT; op++)
+ {
+ unsigned siz = GenTree::s_gtTrueSizes[op];
+ unsigned cnt = GenTree::s_gtNodeCounts[op];
+ double pct = 100.0 * cnt / gtc;
+
+ if (siz > TREE_NODE_SZ_SMALL)
+ tot_large += cnt;
+ else
+ tot_small += cnt;
+
+ // Let's not show anything below a threshold
+ if (pct >= 0.5)
+ {
+ fprintf(fout, " GT_%-17s %7u (%4.1lf%%) %3u bytes each\n", GenTree::OpName((genTreeOps)op), cnt,
+ pct, siz);
+ rem_total -= cnt;
+ }
+ else
+ {
+ if (siz > TREE_NODE_SZ_SMALL)
+ rem_large += cnt;
+ else
+ rem_small += cnt;
+ }
+ }
+ if (rem_total > 0)
+ {
+ fprintf(fout, " All other GT_xxx ... %7u (%4.1lf%%) ... %4.1lf%% small + %4.1lf%% large\n", rem_total,
+ 100.0 * rem_total / gtc, 100.0 * rem_small / gtc, 100.0 * rem_large / gtc);
+ }
+ fprintf(fout, " -----------------------------------------------------\n");
+ fprintf(fout, " Total ....... %11u --ALL-- ... %4.1lf%% small + %4.1lf%% large\n", gtc,
+ 100.0 * tot_small / gtc, 100.0 * tot_large / gtc);
+ fprintf(fout, "\n");
+ }
+
+#endif // COUNT_AST_OPERS
+
#if DISPLAY_SIZES
if (grossVMsize && grossNCsize)
@@ -1367,17 +1484,23 @@ void Compiler::compShutdown()
#if MEASURE_MEM_ALLOC
-#ifdef DEBUG
- // Under debug, we only dump memory stats when the COMPlus_* variable is defined.
- // Under non-debug, we don't have the COMPlus_* variable, and we always dump it.
if (s_dspMemStats)
-#endif
{
fprintf(fout, "\nAll allocations:\n");
s_aggMemStats.Print(jitstdout);
fprintf(fout, "\nLargest method:\n");
s_maxCompMemStats.Print(jitstdout);
+
+ fprintf(fout, "\n");
+ fprintf(fout, "---------------------------------------------------\n");
+ fprintf(fout, "Distribution of total memory allocated per method (in KB):\n");
+ memAllocHist.dump(fout);
+
+ fprintf(fout, "\n");
+ fprintf(fout, "---------------------------------------------------\n");
+ fprintf(fout, "Distribution of total memory used per method (in KB):\n");
+ memUsedHist.dump(fout);
}
#endif // MEASURE_MEM_ALLOC
@@ -1452,100 +1575,8 @@ void Compiler::compDisplayStaticSizes(FILE* fout)
{
#if MEASURE_NODE_SIZE
- /*
- IMPORTANT: Use the following code to check the alignment of
- GenTree members (in a retail build, of course).
- */
-
- GenTree* gtDummy = nullptr;
-
- fprintf(fout, "\n");
- fprintf(fout, "Offset / size of gtOper = %2u / %2u\n", offsetof(GenTree, gtOper), sizeof(gtDummy->gtOper));
- fprintf(fout, "Offset / size of gtType = %2u / %2u\n", offsetof(GenTree, gtType), sizeof(gtDummy->gtType));
-#if FEATURE_ANYCSE
- fprintf(fout, "Offset / size of gtCSEnum = %2u / %2u\n", offsetof(GenTree, gtCSEnum),
- sizeof(gtDummy->gtCSEnum));
-#endif // FEATURE_ANYCSE
-#if ASSERTION_PROP
- fprintf(fout, "Offset / size of gtAssertionNum = %2u / %2u\n", offsetof(GenTree, gtAssertionNum),
- sizeof(gtDummy->gtAssertionNum));
-#endif // ASSERTION_PROP
-#if FEATURE_STACK_FP_X87
- fprintf(fout, "Offset / size of gtFPlvl = %2u / %2u\n", offsetof(GenTree, gtFPlvl),
- sizeof(gtDummy->gtFPlvl));
-#endif // FEATURE_STACK_FP_X87
- // TODO: The section that report GenTree sizes should be made into a public static member function of the GenTree
- // class (see https://github.com/dotnet/coreclr/pull/493)
- // fprintf(fout, "Offset / size of gtCostEx = %2u / %2u\n", offsetof(GenTree, _gtCostEx ),
- // sizeof(gtDummy->_gtCostEx ));
- // fprintf(fout, "Offset / size of gtCostSz = %2u / %2u\n", offsetof(GenTree, _gtCostSz ),
- // sizeof(gtDummy->_gtCostSz ));
- fprintf(fout, "Offset / size of gtFlags = %2u / %2u\n", offsetof(GenTree, gtFlags),
- sizeof(gtDummy->gtFlags));
- fprintf(fout, "Offset / size of gtVNPair = %2u / %2u\n", offsetof(GenTree, gtVNPair),
- sizeof(gtDummy->gtVNPair));
- fprintf(fout, "Offset / size of gtRsvdRegs = %2u / %2u\n", offsetof(GenTree, gtRsvdRegs),
- sizeof(gtDummy->gtRsvdRegs));
-#ifdef LEGACY_BACKEND
- fprintf(fout, "Offset / size of gtUsedRegs = %2u / %2u\n", offsetof(GenTree, gtUsedRegs),
- sizeof(gtDummy->gtUsedRegs));
-#endif // LEGACY_BACKEND
-#ifndef LEGACY_BACKEND
- fprintf(fout, "Offset / size of gtLsraInfo = %2u / %2u\n", offsetof(GenTree, gtLsraInfo),
- sizeof(gtDummy->gtLsraInfo));
-#endif // !LEGACY_BACKEND
- fprintf(fout, "Offset / size of gtNext = %2u / %2u\n", offsetof(GenTree, gtNext), sizeof(gtDummy->gtNext));
- fprintf(fout, "Offset / size of gtPrev = %2u / %2u\n", offsetof(GenTree, gtPrev), sizeof(gtDummy->gtPrev));
- fprintf(fout, "\n");
-
-#if SMALL_TREE_NODES
- fprintf(fout, "Small tree node size = %3u\n", TREE_NODE_SZ_SMALL);
-#endif // SMALL_TREE_NODES
- fprintf(fout, "Large tree node size = %3u\n", TREE_NODE_SZ_LARGE);
- fprintf(fout, "Size of GenTree = %3u\n", sizeof(GenTree));
- fprintf(fout, "Size of GenTreeUnOp = %3u\n", sizeof(GenTreeUnOp));
- fprintf(fout, "Size of GenTreeOp = %3u\n", sizeof(GenTreeOp));
- fprintf(fout, "Size of GenTreeVal = %3u\n", sizeof(GenTreeVal));
- fprintf(fout, "Size of GenTreeIntConCommon = %3u\n", sizeof(GenTreeIntConCommon));
- fprintf(fout, "Size of GenTreePhysReg = %3u\n", sizeof(GenTreePhysReg));
-#ifndef LEGACY_BACKEND
- fprintf(fout, "Size of GenTreeJumpTable = %3u\n", sizeof(GenTreeJumpTable));
-#endif // !LEGACY_BACKEND
- fprintf(fout, "Size of GenTreeIntCon = %3u\n", sizeof(GenTreeIntCon));
- fprintf(fout, "Size of GenTreeLngCon = %3u\n", sizeof(GenTreeLngCon));
- fprintf(fout, "Size of GenTreeDblCon = %3u\n", sizeof(GenTreeDblCon));
- fprintf(fout, "Size of GenTreeStrCon = %3u\n", sizeof(GenTreeStrCon));
- fprintf(fout, "Size of GenTreeLclVarCommon = %3u\n", sizeof(GenTreeLclVarCommon));
- fprintf(fout, "Size of GenTreeLclVar = %3u\n", sizeof(GenTreeLclVar));
- fprintf(fout, "Size of GenTreeLclFld = %3u\n", sizeof(GenTreeLclFld));
- fprintf(fout, "Size of GenTreeRegVar = %3u\n", sizeof(GenTreeRegVar));
- fprintf(fout, "Size of GenTreeCast = %3u\n", sizeof(GenTreeCast));
- fprintf(fout, "Size of GenTreeBox = %3u\n", sizeof(GenTreeBox));
- fprintf(fout, "Size of GenTreeField = %3u\n", sizeof(GenTreeField));
- fprintf(fout, "Size of GenTreeArgList = %3u\n", sizeof(GenTreeArgList));
- fprintf(fout, "Size of GenTreeColon = %3u\n", sizeof(GenTreeColon));
- fprintf(fout, "Size of GenTreeCall = %3u\n", sizeof(GenTreeCall));
- fprintf(fout, "Size of GenTreeCmpXchg = %3u\n", sizeof(GenTreeCmpXchg));
- fprintf(fout, "Size of GenTreeFptrVal = %3u\n", sizeof(GenTreeFptrVal));
- fprintf(fout, "Size of GenTreeQmark = %3u\n", sizeof(GenTreeQmark));
- fprintf(fout, "Size of GenTreeIntrinsic = %3u\n", sizeof(GenTreeIntrinsic));
- fprintf(fout, "Size of GenTreeIndex = %3u\n", sizeof(GenTreeIndex));
- fprintf(fout, "Size of GenTreeArrLen = %3u\n", sizeof(GenTreeArrLen));
- fprintf(fout, "Size of GenTreeBoundsChk = %3u\n", sizeof(GenTreeBoundsChk));
- fprintf(fout, "Size of GenTreeArrElem = %3u\n", sizeof(GenTreeArrElem));
- fprintf(fout, "Size of GenTreeAddrMode = %3u\n", sizeof(GenTreeAddrMode));
- fprintf(fout, "Size of GenTreeIndir = %3u\n", sizeof(GenTreeIndir));
- fprintf(fout, "Size of GenTreeStoreInd = %3u\n", sizeof(GenTreeStoreInd));
- fprintf(fout, "Size of GenTreeRetExpr = %3u\n", sizeof(GenTreeRetExpr));
- fprintf(fout, "Size of GenTreeStmt = %3u\n", sizeof(GenTreeStmt));
- fprintf(fout, "Size of GenTreeObj = %3u\n", sizeof(GenTreeObj));
- fprintf(fout, "Size of GenTreeClsVar = %3u\n", sizeof(GenTreeClsVar));
- fprintf(fout, "Size of GenTreeArgPlace = %3u\n", sizeof(GenTreeArgPlace));
- fprintf(fout, "Size of GenTreeLabel = %3u\n", sizeof(GenTreeLabel));
- fprintf(fout, "Size of GenTreePhiArg = %3u\n", sizeof(GenTreePhiArg));
- fprintf(fout, "Size of GenTreePutArgStk = %3u\n", sizeof(GenTreePutArgStk));
- fprintf(fout, "\n");
-#endif // MEASURE_NODE_SIZE
+ GenTree::DumpNodeSizes(fout);
+#endif
#if MEASURE_BLOCK_SIZE
@@ -1572,8 +1603,6 @@ void Compiler::compDisplayStaticSizes(FILE* fout)
sizeof(bbDummy->bbJumpDest));
fprintf(fout, "Offset / size of bbJumpSwt = %3u / %3u\n", offsetof(BasicBlock, bbJumpSwt),
sizeof(bbDummy->bbJumpSwt));
- fprintf(fout, "Offset / size of bbTreeList = %3u / %3u\n", offsetof(BasicBlock, bbTreeList),
- sizeof(bbDummy->bbTreeList));
fprintf(fout, "Offset / size of bbEntryState = %3u / %3u\n", offsetof(BasicBlock, bbEntryState),
sizeof(bbDummy->bbEntryState));
fprintf(fout, "Offset / size of bbStkTempsIn = %3u / %3u\n", offsetof(BasicBlock, bbStkTempsIn),
@@ -1618,12 +1647,8 @@ void Compiler::compDisplayStaticSizes(FILE* fout)
sizeof(bbDummy->bbHeapSsaNumIn));
fprintf(fout, "Offset / size of bbHeapSsaNumOut = %3u / %3u\n", offsetof(BasicBlock, bbHeapSsaNumOut),
sizeof(bbDummy->bbHeapSsaNumOut));
-
-#ifdef DEBUGGING_SUPPORT
fprintf(fout, "Offset / size of bbScope = %3u / %3u\n", offsetof(BasicBlock, bbScope),
sizeof(bbDummy->bbScope));
-#endif // DEBUGGING_SUPPORT
-
fprintf(fout, "Offset / size of bbCseGen = %3u / %3u\n", offsetof(BasicBlock, bbCseGen),
sizeof(bbDummy->bbCseGen));
fprintf(fout, "Offset / size of bbCseIn = %3u / %3u\n", offsetof(BasicBlock, bbCseIn),
@@ -1888,10 +1913,6 @@ void Compiler::compInit(ArenaAllocator* pAlloc, InlineInfo* inlineInfo)
SIMDVectorHandle = nullptr;
#endif
-#ifdef DEBUG
- inlRNG = nullptr;
-#endif
-
compUsesThrowHelper = false;
}
@@ -2244,14 +2265,14 @@ const char* Compiler::compLocalVarName(unsigned varNum, unsigned offs)
void Compiler::compSetProcessor()
{
- unsigned compileFlags = opts.eeFlags;
+ const JitFlags& jitFlags = *opts.jitFlags;
#if defined(_TARGET_ARM_)
info.genCPU = CPU_ARM;
#elif defined(_TARGET_AMD64_)
- info.genCPU = CPU_X64;
+ info.genCPU = CPU_X64;
#elif defined(_TARGET_X86_)
- if (compileFlags & CORJIT_FLG_TARGET_P4)
+ if (jitFlags.IsSet(JitFlags::JIT_FLAG_TARGET_P4))
info.genCPU = CPU_X86_PENTIUM_4;
else
info.genCPU = CPU_X86;
@@ -2262,33 +2283,66 @@ void Compiler::compSetProcessor()
//
CLANG_FORMAT_COMMENT_ANCHOR;
-#ifdef _TARGET_AMD64_
- opts.compUseFCOMI = false;
- opts.compUseCMOV = true;
- opts.compCanUseSSE2 = true;
+#ifdef _TARGET_XARCH_
+ opts.compCanUseSSE3_4 = false;
+ if (!jitFlags.IsSet(JitFlags::JIT_FLAG_PREJIT) && jitFlags.IsSet(JitFlags::JIT_FLAG_USE_SSE3_4))
+ {
+ if (JitConfig.EnableSSE3_4() != 0)
+ {
+ opts.compCanUseSSE3_4 = true;
+ }
+ }
#ifdef FEATURE_AVX_SUPPORT
// COMPlus_EnableAVX can be used to disable using AVX if available on a target machine.
// Note that FEATURE_AVX_SUPPORT is not enabled for ctpjit
opts.compCanUseAVX = false;
- if (((compileFlags & CORJIT_FLG_PREJIT) == 0) && ((compileFlags & CORJIT_FLG_USE_AVX2) != 0))
+ if (!jitFlags.IsSet(JitFlags::JIT_FLAG_PREJIT) && jitFlags.IsSet(JitFlags::JIT_FLAG_USE_AVX2))
{
if (JitConfig.EnableAVX() != 0)
{
opts.compCanUseAVX = true;
- if (!compIsForInlining())
- {
- codeGen->getEmitter()->SetUseAVX(true);
- }
}
}
-#endif
-#endif //_TARGET_AMD64_
+#endif // FEATURE_AVX_SUPPORT
-#ifdef _TARGET_X86_
- opts.compUseFCOMI = ((opts.eeFlags & CORJIT_FLG_USE_FCOMI) != 0);
- opts.compUseCMOV = ((opts.eeFlags & CORJIT_FLG_USE_CMOV) != 0);
- opts.compCanUseSSE2 = ((opts.eeFlags & CORJIT_FLG_USE_SSE2) != 0);
+ if (!compIsForInlining())
+ {
+#ifdef FEATURE_AVX_SUPPORT
+ if (opts.compCanUseAVX)
+ {
+ codeGen->getEmitter()->SetUseAVX(true);
+ }
+ else
+#endif // FEATURE_AVX_SUPPORT
+ if (opts.compCanUseSSE3_4)
+ {
+ codeGen->getEmitter()->SetUseSSE3_4(true);
+ }
+ }
+#endif // _TARGET_XARCH_
+
+#ifdef _TARGET_AMD64_
+ opts.compUseFCOMI = false;
+ opts.compUseCMOV = true;
+ opts.compCanUseSSE2 = true;
+#elif defined(_TARGET_X86_)
+ opts.compUseFCOMI = jitFlags.IsSet(JitFlags::JIT_FLAG_USE_FCOMI);
+ opts.compUseCMOV = jitFlags.IsSet(JitFlags::JIT_FLAG_USE_CMOV);
+ opts.compCanUseSSE2 = jitFlags.IsSet(JitFlags::JIT_FLAG_USE_SSE2);
+
+#if !defined(LEGACY_BACKEND) && !defined(FEATURE_CORECLR)
+ // RyuJIT/x86 requires SSE2 to be available: there is no support for generating floating-point
+ // code with x87 instructions. On .NET Core, the VM always tells us that SSE2 is available.
+ // However, on desktop, under ngen, (and presumably in the unlikely case you're actually
+ // running on a machine without SSE2), the VM does not set the SSE2 flag. We ignore this and
+ // go ahead and generate SSE2 code anyway.
+ if (!opts.compCanUseSSE2)
+ {
+ JITDUMP("VM didn't set CORJIT_FLG_USE_SSE2! Ignoring, and generating SSE2 code anyway.\n");
+ opts.compCanUseSSE2 = true;
+ }
+#endif // !defined(LEGACY_BACKEND) && !defined(FEATURE_CORECLR)
#ifdef DEBUG
if (opts.compUseFCOMI)
@@ -2296,7 +2350,9 @@ void Compiler::compSetProcessor()
if (opts.compUseCMOV)
opts.compUseCMOV = !compStressCompile(STRESS_USE_CMOV, 50);
- // Should we override the SSE2 setting
+#ifdef LEGACY_BACKEND
+
+ // Should we override the SSE2 setting?
enum
{
SSE2_FORCE_DISABLE = 0,
@@ -2310,7 +2366,17 @@ void Compiler::compSetProcessor()
opts.compCanUseSSE2 = true;
else if (opts.compCanUseSSE2)
opts.compCanUseSSE2 = !compStressCompile(STRESS_GENERIC_VARN, 50);
+
+#else // !LEGACY_BACKEND
+
+ // RyuJIT/x86 requires SSE2 to be available and hence
+ // don't turn off compCanUseSSE2 under stress.
+ assert(opts.compCanUseSSE2);
+
+#endif // !LEGACY_BACKEND
+
#endif // DEBUG
+
#endif // _TARGET_X86_
}
@@ -2378,31 +2444,36 @@ unsigned ReinterpretHexAsDecimal(unsigned in)
return result;
}
-void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
+void Compiler::compInitOptions(JitFlags* jitFlags)
{
#ifdef UNIX_AMD64_ABI
opts.compNeedToAlignFrame = false;
#endif // UNIX_AMD64_ABI
memset(&opts, 0, sizeof(opts));
- unsigned compileFlags = jitFlags->corJitFlags;
-
if (compIsForInlining())
{
- assert((compileFlags & CORJIT_FLG_LOST_WHEN_INLINING) == 0);
- assert(compileFlags & CORJIT_FLG_SKIP_VERIFICATION);
+ // The following flags are lost when inlining. (They are removed in
+ // Compiler::fgInvokeInlineeCompiler().)
+ assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT));
+ assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR));
+ assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_ENTERLEAVE));
+ assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_EnC));
+ assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_INFO));
+
+ assert(jitFlags->IsSet(JitFlags::JIT_FLAG_SKIP_VERIFICATION));
}
opts.jitFlags = jitFlags;
- opts.eeFlags = compileFlags;
opts.compFlags = CLFLG_MAXOPT; // Default value is for full optimization
- if (opts.eeFlags & (CORJIT_FLG_DEBUG_CODE | CORJIT_FLG_MIN_OPT))
+ if (jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_CODE) || jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT))
{
opts.compFlags = CLFLG_MINOPT;
}
// Don't optimize .cctors (except prejit) or if we're an inlinee
- else if (!(opts.eeFlags & CORJIT_FLG_PREJIT) && ((info.compFlags & FLG_CCTOR) == FLG_CCTOR) && !compIsForInlining())
+ else if (!jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) && ((info.compFlags & FLG_CCTOR) == FLG_CCTOR) &&
+ !compIsForInlining())
{
opts.compFlags = CLFLG_MINOPT;
}
@@ -2414,32 +2485,31 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
// If the EE sets SIZE_OPT or if we are compiling a Class constructor
// we will optimize for code size at the expense of speed
//
- if ((opts.eeFlags & CORJIT_FLG_SIZE_OPT) || ((info.compFlags & FLG_CCTOR) == FLG_CCTOR))
+ if (jitFlags->IsSet(JitFlags::JIT_FLAG_SIZE_OPT) || ((info.compFlags & FLG_CCTOR) == FLG_CCTOR))
{
opts.compCodeOpt = SMALL_CODE;
}
//
// If the EE sets SPEED_OPT we will optimize for speed at the expense of code size
//
- else if (opts.eeFlags & CORJIT_FLG_SPEED_OPT)
+ else if (jitFlags->IsSet(JitFlags::JIT_FLAG_SPEED_OPT))
{
opts.compCodeOpt = FAST_CODE;
- assert((opts.eeFlags & CORJIT_FLG_SIZE_OPT) == 0);
+ assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_SIZE_OPT));
}
-//-------------------------------------------------------------------------
+ //-------------------------------------------------------------------------
+
+ opts.compDbgCode = jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_CODE);
+ opts.compDbgInfo = jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_INFO);
+ opts.compDbgEnC = jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_EnC);
-#ifdef DEBUGGING_SUPPORT
- opts.compDbgCode = (opts.eeFlags & CORJIT_FLG_DEBUG_CODE) != 0;
- opts.compDbgInfo = (opts.eeFlags & CORJIT_FLG_DEBUG_INFO) != 0;
- opts.compDbgEnC = (opts.eeFlags & CORJIT_FLG_DEBUG_EnC) != 0;
#if REGEN_SHORTCUTS || REGEN_CALLPAT
// We never want to have debugging enabled when regenerating GC encoding patterns
opts.compDbgCode = false;
opts.compDbgInfo = false;
opts.compDbgEnC = false;
#endif
-#endif
compSetProcessor();
@@ -2473,7 +2543,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
#ifdef DEBUG
const JitConfigValues::MethodSet* pfAltJit;
- if (opts.eeFlags & CORJIT_FLG_PREJIT)
+ if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
{
pfAltJit = &JitConfig.AltJitNgen();
}
@@ -2498,7 +2568,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
#else // !DEBUG
const char* altJitVal;
- if (opts.eeFlags & CORJIT_FLG_PREJIT)
+ if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
{
altJitVal = JitConfig.AltJitNgen().list();
}
@@ -2602,7 +2672,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
//
if (!compIsForInlining())
{
- if (opts.eeFlags & CORJIT_FLG_PREJIT)
+ if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
{
if (JitConfig.NgenDump().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
{
@@ -2952,10 +3022,8 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
#endif // DEBUG
#ifdef FEATURE_SIMD
-#ifdef _TARGET_AMD64_
- // Minimum bar for availing SIMD benefits is SSE2 on AMD64.
- featureSIMD = ((opts.eeFlags & CORJIT_FLG_FEATURE_SIMD) != 0);
-#endif // _TARGET_AMD64_
+ // Minimum bar for availing SIMD benefits is SSE2 on AMD64/x86.
+ featureSIMD = jitFlags->IsSet(JitFlags::JIT_FLAG_FEATURE_SIMD);
#endif // FEATURE_SIMD
if (compIsForInlining() || compIsForImportOnly())
@@ -2978,23 +3046,26 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
opts.compTailCallLoopOpt = true;
#endif
-#ifdef DEBUG
- opts.dspInstrs = false;
- opts.dspEmit = false;
- opts.dspLines = false;
- opts.varNames = false;
- opts.dmpHex = false;
- opts.disAsm = false;
- opts.disAsmSpilled = false;
- opts.disDiffable = false;
- opts.dspCode = false;
- opts.dspEHTable = false;
- opts.dspGCtbls = false;
- opts.disAsm2 = false;
- opts.dspUnwind = false;
- s_dspMemStats = false;
- opts.compLongAddress = false;
+#ifdef PROFILING_SUPPORTED
opts.compJitELTHookEnabled = false;
+#endif // PROFILING_SUPPORTED
+
+#ifdef DEBUG
+ opts.dspInstrs = false;
+ opts.dspEmit = false;
+ opts.dspLines = false;
+ opts.varNames = false;
+ opts.dmpHex = false;
+ opts.disAsm = false;
+ opts.disAsmSpilled = false;
+ opts.disDiffable = false;
+ opts.dspCode = false;
+ opts.dspEHTable = false;
+ opts.dspGCtbls = false;
+ opts.disAsm2 = false;
+ opts.dspUnwind = false;
+ opts.compLongAddress = false;
+ opts.optRepeat = false;
#ifdef LATE_DISASM
opts.doLateDisasm = false;
@@ -3007,7 +3078,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
//
if (!altJitConfig || opts.altJit)
{
- if (opts.eeFlags & CORJIT_FLG_PREJIT)
+ if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
{
if ((JitConfig.NgenOrder() & 1) == 1)
{
@@ -3084,14 +3155,14 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
opts.dspDiffable = true;
}
- if (JitConfig.DisplayMemStats() != 0)
+ if (JitConfig.JitLongAddress() != 0)
{
- s_dspMemStats = true;
+ opts.compLongAddress = true;
}
- if (JitConfig.JitLongAddress() != 0)
+ if (JitConfig.JitOptRepeat().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
{
- opts.compLongAddress = true;
+ opts.optRepeat = true;
}
}
@@ -3152,7 +3223,6 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
//-------------------------------------------------------------------------
-#ifdef DEBUGGING_SUPPORT
#ifdef DEBUG
assert(!codeGen->isGCTypeFixed());
opts.compGcChecks = (JitConfig.JitGCChecks() != 0) || compStressCompile(STRESS_GENERIC_VARN, 5);
@@ -3173,11 +3243,15 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
opts.compStackCheckOnCall = (dwJitStackChecks & DWORD(STACK_CHECK_ON_CALL)) != 0;
#endif
+#if MEASURE_MEM_ALLOC
+ s_dspMemStats = (JitConfig.DisplayMemStats() != 0);
+#endif
+
#ifdef PROFILING_SUPPORTED
- opts.compNoPInvokeInlineCB = (opts.eeFlags & CORJIT_FLG_PROF_NO_PINVOKE_INLINE) ? true : false;
+ opts.compNoPInvokeInlineCB = jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_NO_PINVOKE_INLINE);
// Cache the profiler handle
- if (opts.eeFlags & CORJIT_FLG_PROF_ENTERLEAVE)
+ if (jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_ENTERLEAVE))
{
BOOL hookNeeded;
BOOL indirected;
@@ -3192,11 +3266,8 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
compProfilerMethHndIndirected = false;
}
-#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
- // Right now this ELT hook option is enabled only for arm and amd64
-
- // Honour complus_JitELTHookEnabled only if VM has not asked us to generate profiler
- // hooks in the first place. That is, Override VM only if it hasn't asked for a
+ // Honour COMPlus_JitELTHookEnabled only if VM has not asked us to generate profiler
+ // hooks in the first place. That is, override VM only if it hasn't asked for a
// profiler callback for this method.
if (!compProfilerHookNeeded && (JitConfig.JitELTHookEnabled() != 0))
{
@@ -3209,7 +3280,6 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
compProfilerMethHnd = (void*)DummyProfilerELTStub;
compProfilerMethHndIndirected = false;
}
-#endif // _TARGET_ARM_ || _TARGET_AMD64_
#endif // PROFILING_SUPPORTED
@@ -3226,10 +3296,9 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
}
#endif
- opts.compMustInlinePInvokeCalli = (opts.eeFlags & CORJIT_FLG_IL_STUB) ? true : false;
+ opts.compMustInlinePInvokeCalli = jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB);
opts.compScopeInfo = opts.compDbgInfo;
-#endif // DEBUGGING_SUPPORT
#ifdef LATE_DISASM
codeGen->getDisAssembler().disOpenForLateDisAsm(info.compMethodName, info.compClassName,
@@ -3239,7 +3308,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
//-------------------------------------------------------------------------
#if RELOC_SUPPORT
- opts.compReloc = (opts.eeFlags & CORJIT_FLG_RELOC) ? true : false;
+ opts.compReloc = jitFlags->IsSet(JitFlags::JIT_FLAG_RELOC);
#endif
#ifdef DEBUG
@@ -3249,7 +3318,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
#endif
#endif // DEBUG
- opts.compProcedureSplitting = (opts.eeFlags & CORJIT_FLG_PROCSPLIT) ? true : false;
+ opts.compProcedureSplitting = jitFlags->IsSet(JitFlags::JIT_FLAG_PROCSPLIT);
#ifdef _TARGET_ARM64_
// TODO-ARM64-NYI: enable hot/cold splitting
@@ -3294,7 +3363,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
fgProfileBuffer = nullptr;
fgProfileData_ILSizeMismatch = false;
fgNumProfileRuns = 0;
- if (opts.eeFlags & CORJIT_FLG_BBOPT)
+ if (jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT))
{
assert(!compIsForInlining());
HRESULT hr;
@@ -3365,7 +3434,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
printf("OPTIONS: compProcedureSplitting = %s\n", dspBool(opts.compProcedureSplitting));
printf("OPTIONS: compProcedureSplittingEH = %s\n", dspBool(opts.compProcedureSplittingEH));
- if ((opts.eeFlags & CORJIT_FLG_BBOPT) && fgHaveProfileData())
+ if (jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT) && fgHaveProfileData())
{
printf("OPTIONS: using real profile data\n");
}
@@ -3375,7 +3444,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
printf("OPTIONS: discarded IBC profile data due to mismatch in ILSize\n");
}
- if (opts.eeFlags & CORJIT_FLG_PREJIT)
+ if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
{
printf("OPTIONS: Jit invoked for ngen\n");
}
@@ -3384,11 +3453,11 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
#endif
opts.compGCPollType = GCPOLL_NONE;
- if (opts.eeFlags & CORJIT_FLG_GCPOLL_CALLS)
+ if (jitFlags->IsSet(JitFlags::JIT_FLAG_GCPOLL_CALLS))
{
opts.compGCPollType = GCPOLL_CALL;
}
- else if (opts.eeFlags & CORJIT_FLG_GCPOLL_INLINE)
+ else if (jitFlags->IsSet(JitFlags::JIT_FLAG_GCPOLL_INLINE))
{
// make sure that the EE didn't set both flags.
assert(opts.compGCPollType == GCPOLL_NONE);
@@ -3568,14 +3637,11 @@ void Compiler::compInitDebuggingInfo()
info.compVarScopesCount = 0;
-#ifdef DEBUGGING_SUPPORT
if (opts.compScopeInfo)
-#endif
{
eeGetVars();
}
-#ifdef DEBUGGING_SUPPORT
compInitVarScopeMap();
if (opts.compScopeInfo || opts.compDbgCode)
@@ -3598,7 +3664,6 @@ void Compiler::compInitDebuggingInfo()
JITDUMP("Debuggable code - Add new BB%02u to perform initialization of variables [%08X]\n", fgFirstBB->bbNum,
dspPtr(fgFirstBB));
}
-#endif // DEBUGGING_SUPPORT
/*-------------------------------------------------------------------------
*
@@ -3617,9 +3682,7 @@ void Compiler::compInitDebuggingInfo()
info.compStmtOffsetsCount = 0;
-#ifdef DEBUGGING_SUPPORT
if (opts.compDbgInfo)
-#endif
{
/* Get hold of the line# records, if there are any */
@@ -3661,12 +3724,9 @@ void Compiler::compInitDebuggingInfo()
void Compiler::compSetOptimizationLevel()
{
- unsigned compileFlags;
bool theMinOptsValue;
unsigned jitMinOpts;
- compileFlags = opts.eeFlags;
-
if (compIsForInlining())
{
theMinOptsValue = impInlineInfo->InlinerCompiler->opts.MinOpts();
@@ -3757,13 +3817,40 @@ void Compiler::compSetOptimizationLevel()
}
}
+#if 0
+ // The code in this #if can be used to debug optimization issues according to method hash.
+ // To use, uncomment, rebuild and set environment variables minoptshashlo and minoptshashhi.
+#ifdef DEBUG
+ unsigned methHash = info.compMethodHash();
+ char* lostr = getenv("minoptshashlo");
+ unsigned methHashLo = 0;
+ if (lostr != nullptr)
+ {
+ sscanf_s(lostr, "%x", &methHashLo);
+ char* histr = getenv("minoptshashhi");
+ unsigned methHashHi = UINT32_MAX;
+ if (histr != nullptr)
+ {
+ sscanf_s(histr, "%x", &methHashHi);
+ if (methHash >= methHashLo && methHash <= methHashHi)
+ {
+ printf("MinOpts for method %s, hash = 0x%x.\n",
+ info.compFullName, info.compMethodHash());
+ printf(""); // in our logic this causes a flush
+ theMinOptsValue = true;
+ }
+ }
+ }
+#endif
+#endif
+
if (compStressCompile(STRESS_MIN_OPTS, 5))
{
theMinOptsValue = true;
}
// For PREJIT we never drop down to MinOpts
// unless unless CLFLG_MINOPT is set
- else if (!(compileFlags & CORJIT_FLG_PREJIT))
+ else if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
{
if ((unsigned)JitConfig.JitMinOptsCodeSize() < info.compILCodeSize)
{
@@ -3805,7 +3892,7 @@ void Compiler::compSetOptimizationLevel()
// Retail check if we should force Minopts due to the complexity of the method
// For PREJIT we never drop down to MinOpts
// unless unless CLFLG_MINOPT is set
- if (!theMinOptsValue && !(compileFlags & CORJIT_FLG_PREJIT) &&
+ if (!theMinOptsValue && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) &&
((DEFAULT_MIN_OPTS_CODE_SIZE < info.compILCodeSize) || (DEFAULT_MIN_OPTS_INSTR_COUNT < opts.instrCount) ||
(DEFAULT_MIN_OPTS_BB_COUNT < fgBBcount) || (DEFAULT_MIN_OPTS_LV_NUM_COUNT < lvaCount) ||
(DEFAULT_MIN_OPTS_LV_REF_COUNT < opts.lvRefCount)))
@@ -3828,14 +3915,14 @@ void Compiler::compSetOptimizationLevel()
unsigned methHash = info.compMethodHash();
char* lostr = getenv("opthashlo");
unsigned methHashLo = 0;
- if (lostr != NULL)
+ if (lostr != NULL)
{
sscanf_s(lostr, "%x", &methHashLo);
// methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
}
char* histr = getenv("opthashhi");
unsigned methHashHi = UINT32_MAX;
- if (histr != NULL)
+ if (histr != NULL)
{
sscanf_s(histr, "%x", &methHashHi);
// methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
@@ -3883,27 +3970,27 @@ _SetMinOpts:
}
#if !defined(_TARGET_AMD64_)
- // The VM sets CORJIT_FLG_FRAMED for two reasons: (1) the COMPlus_JitFramed variable is set, or
+ // The VM sets JitFlags::JIT_FLAG_FRAMED for two reasons: (1) the COMPlus_JitFramed variable is set, or
// (2) the function is marked "noinline". The reason for #2 is that people mark functions
// noinline to ensure the show up on in a stack walk. But for AMD64, we don't need a frame
// pointer for the frame to show up in stack walk.
- if (compileFlags & CORJIT_FLG_FRAMED)
+ if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_FRAMED))
codeGen->setFrameRequired(true);
#endif
- if (compileFlags & CORJIT_FLG_RELOC)
+ if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELOC))
{
codeGen->genAlignLoops = false; // loop alignment not supported for prejitted code
- // The zapper doesn't set CORJIT_FLG_ALIGN_LOOPS, and there is
+ // The zapper doesn't set JitFlags::JIT_FLAG_ALIGN_LOOPS, and there is
// no reason for it to set it as the JIT doesn't currently support loop alignment
// for prejitted images. (The JIT doesn't know the final address of the code, hence
// it can't align code based on unknown addresses.)
- assert((compileFlags & CORJIT_FLG_ALIGN_LOOPS) == 0);
+ assert(!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_ALIGN_LOOPS));
}
else
{
- codeGen->genAlignLoops = (compileFlags & CORJIT_FLG_ALIGN_LOOPS) != 0;
+ codeGen->genAlignLoops = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_ALIGN_LOOPS);
}
}
@@ -4075,7 +4162,7 @@ void Compiler::compFunctionTraceEnd(void* methodCodePtr, ULONG methodCodeSize, b
// For an overview of the structure of the JIT, see:
// https://github.com/dotnet/coreclr/blob/master/Documentation/botr/ryujit-overview.md
//
-void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_FLAGS* compileFlags)
+void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags* compileFlags)
{
if (compIsForInlining())
{
@@ -4112,26 +4199,36 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_F
fgRemovePreds();
}
+ EndPhase(PHASE_IMPORTATION);
+
if (compIsForInlining())
{
/* Quit inlining if fgImport() failed for any reason. */
- if (compDonotInline())
+ if (!compDonotInline())
{
- return;
+ /* Filter out unimported BBs */
+
+ fgRemoveEmptyBlocks();
}
- /* Filter out unimported BBs */
+ EndPhase(PHASE_POST_IMPORT);
- fgRemoveEmptyBlocks();
+#ifdef FEATURE_JIT_METHOD_PERF
+ if (pCompJitTimer != nullptr)
+ {
+#if MEASURE_CLRAPI_CALLS
+ EndPhase(PHASE_CLR_API);
+#endif
+ pCompJitTimer->Terminate(this, CompTimeSummaryInfo::s_compTimeSummary, false);
+ }
+#endif
return;
}
assert(!compDonotInline());
- EndPhase(PHASE_IMPORTATION);
-
// Maybe the caller was not interested in generating code
if (compIsForImportOnly())
{
@@ -4145,7 +4242,7 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_F
fgRemoveEH();
#endif // !FEATURE_EH
- if (compileFlags->corJitFlags & CORJIT_FLG_BBINSTR)
+ if (compileFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR))
{
fgInstrumentMethod();
}
@@ -4180,7 +4277,7 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_F
/* Massage the trees so that we can generate code out of them */
fgMorph();
- EndPhase(PHASE_MORPH);
+ EndPhase(PHASE_MORPH_END);
/* GS security checks for unsafe buffers */
if (getNeedsGSSecurityCookie())
@@ -4336,6 +4433,7 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_F
bool doCopyProp = true;
bool doAssertionProp = true;
bool doRangeAnalysis = true;
+ int iterations = 1;
#ifdef DEBUG
doSsa = (JitConfig.JitDoSsa() != 0);
@@ -4345,72 +4443,88 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_F
doCopyProp = doValueNum && (JitConfig.JitDoCopyProp() != 0);
doAssertionProp = doValueNum && (JitConfig.JitDoAssertionProp() != 0);
doRangeAnalysis = doAssertionProp && (JitConfig.JitDoRangeAnalysis() != 0);
-#endif
- if (doSsa)
+ if (opts.optRepeat)
{
- fgSsaBuild();
- EndPhase(PHASE_BUILD_SSA);
+ iterations = JitConfig.JitOptRepeatCount();
}
+#endif
- if (doEarlyProp)
+ while (iterations > 0)
{
- /* Propagate array length and rewrite getType() method call */
- optEarlyProp();
- EndPhase(PHASE_EARLY_PROP);
- }
+ if (doSsa)
+ {
+ fgSsaBuild();
+ EndPhase(PHASE_BUILD_SSA);
+ }
- if (doValueNum)
- {
- fgValueNumber();
- EndPhase(PHASE_VALUE_NUMBER);
- }
+ if (doEarlyProp)
+ {
+ /* Propagate array length and rewrite getType() method call */
+ optEarlyProp();
+ EndPhase(PHASE_EARLY_PROP);
+ }
- if (doLoopHoisting)
- {
- /* Hoist invariant code out of loops */
- optHoistLoopCode();
- EndPhase(PHASE_HOIST_LOOP_CODE);
- }
+ if (doValueNum)
+ {
+ fgValueNumber();
+ EndPhase(PHASE_VALUE_NUMBER);
+ }
- if (doCopyProp)
- {
- /* Perform VN based copy propagation */
- optVnCopyProp();
- EndPhase(PHASE_VN_COPY_PROP);
- }
+ if (doLoopHoisting)
+ {
+ /* Hoist invariant code out of loops */
+ optHoistLoopCode();
+ EndPhase(PHASE_HOIST_LOOP_CODE);
+ }
+
+ if (doCopyProp)
+ {
+ /* Perform VN based copy propagation */
+ optVnCopyProp();
+ EndPhase(PHASE_VN_COPY_PROP);
+ }
#if FEATURE_ANYCSE
- /* Remove common sub-expressions */
- optOptimizeCSEs();
+ /* Remove common sub-expressions */
+ optOptimizeCSEs();
#endif // FEATURE_ANYCSE
#if ASSERTION_PROP
- if (doAssertionProp)
- {
- /* Assertion propagation */
- optAssertionPropMain();
- EndPhase(PHASE_ASSERTION_PROP_MAIN);
- }
+ if (doAssertionProp)
+ {
+ /* Assertion propagation */
+ optAssertionPropMain();
+ EndPhase(PHASE_ASSERTION_PROP_MAIN);
+ }
- if (doRangeAnalysis)
- {
- /* Optimize array index range checks */
- RangeCheck rc(this);
- rc.OptimizeRangeChecks();
- EndPhase(PHASE_OPTIMIZE_INDEX_CHECKS);
- }
+ if (doRangeAnalysis)
+ {
+ /* Optimize array index range checks */
+ RangeCheck rc(this);
+ rc.OptimizeRangeChecks();
+ EndPhase(PHASE_OPTIMIZE_INDEX_CHECKS);
+ }
#endif // ASSERTION_PROP
- /* update the flowgraph if we modified it during the optimization phase*/
- if (fgModified)
- {
- fgUpdateFlowGraph();
- EndPhase(PHASE_UPDATE_FLOW_GRAPH);
+ /* update the flowgraph if we modified it during the optimization phase*/
+ if (fgModified)
+ {
+ fgUpdateFlowGraph();
+ EndPhase(PHASE_UPDATE_FLOW_GRAPH);
+
+ // Recompute the edge weight if we have modified the flow graph
+ fgComputeEdgeWeights();
+ EndPhase(PHASE_COMPUTE_EDGE_WEIGHTS2);
+ }
- // Recompute the edge weight if we have modified the flow graph
- fgComputeEdgeWeights();
- EndPhase(PHASE_COMPUTE_EDGE_WEIGHTS2);
+ // Iterate if requested, resetting annotations first.
+ if (--iterations == 0)
+ {
+ break;
+ }
+ ResetOptAnnotations();
+ RecomputeLoopInfo();
}
}
@@ -4540,7 +4654,12 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_F
#ifdef FEATURE_JIT_METHOD_PERF
if (pCompJitTimer)
- pCompJitTimer->Terminate(this, CompTimeSummaryInfo::s_compTimeSummary);
+ {
+#if MEASURE_CLRAPI_CALLS
+ EndPhase(PHASE_CLR_API);
+#endif
+ pCompJitTimer->Terminate(this, CompTimeSummaryInfo::s_compTimeSummary, true);
+ }
#endif
RecordStateAtEndOfCompilation();
@@ -4569,6 +4688,82 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_F
#endif // FUNC_INFO_LOGGING
}
+//------------------------------------------------------------------------
+// ResetOptAnnotations: Clear annotations produced during global optimizations.
+//
+// Notes:
+// The intent of this method is to clear any information typically assumed
+// to be set only once; it is used between iterations when JitOptRepeat is
+// in effect.
+
+void Compiler::ResetOptAnnotations()
+{
+ assert(opts.optRepeat);
+ assert(JitConfig.JitOptRepeatCount() > 0);
+ fgResetForSsa();
+ vnStore = nullptr;
+ m_opAsgnVarDefSsaNums = nullptr;
+ m_blockToEHPreds = nullptr;
+ fgSsaPassesCompleted = 0;
+ fgVNPassesCompleted = 0;
+
+ for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ for (GenTreeStmt* stmt = block->firstStmt(); stmt != nullptr; stmt = stmt->getNextStmt())
+ {
+ stmt->gtFlags &= ~GTF_STMT_HAS_CSE;
+
+ for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree != nullptr; tree = tree->gtNext)
+ {
+ tree->ClearVN();
+ tree->ClearAssertion();
+ tree->gtCSEnum = NO_CSE;
+
+ // Clear any *_ASG_LHS flags -- these are set during SSA construction,
+ // and the heap live-in calculation depends on them being unset coming
+ // into SSA construction (without clearing them, a block that has a
+ // heap def via one of these before any heap use is treated as not having
+ // an upwards-exposed heap use, even though subsequent heap uses may not
+ // be killed by the store; this seems to be a bug, worked around here).
+ if (tree->OperIsIndir())
+ {
+ tree->gtFlags &= ~GTF_IND_ASG_LHS;
+ }
+ else if (tree->OperGet() == GT_CLS_VAR)
+ {
+ tree->gtFlags &= ~GTF_CLS_VAR_ASG_LHS;
+ }
+ }
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// RecomputeLoopInfo: Recompute loop annotations between opt-repeat iterations.
+//
+// Notes:
+// The intent of this method is to update loop structure annotations, and those
+// they depend on; these annotations may have become stale during optimization,
+// and need to be up-to-date before running another iteration of optimizations.
+
+void Compiler::RecomputeLoopInfo()
+{
+ assert(opts.optRepeat);
+ assert(JitConfig.JitOptRepeatCount() > 0);
+ // Recompute reachability sets, dominators, and loops.
+ optLoopCount = 0;
+ fgDomsComputed = false;
+ for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ block->bbFlags &= ~BBF_LOOP_FLAGS;
+ }
+ fgComputeReachability();
+ // Rebuild the loop tree annotations themselves. Since this is performed as
+ // part of 'optOptimizeLoops', this will also re-perform loop rotation, but
+ // not other optimizations, as the others are not part of 'optOptimizeLoops'.
+ optOptimizeLoops();
+}
+
/*****************************************************************************/
void Compiler::ProcessShutdownWork(ICorStaticInfo* statInfo)
{
@@ -4696,11 +4891,13 @@ int Compiler::compCompile(CORINFO_METHOD_HANDLE methodHnd,
CORINFO_METHOD_INFO* methodInfo,
void** methodCodePtr,
ULONG* methodCodeSize,
- CORJIT_FLAGS* compileFlags)
+ JitFlags* compileFlags)
{
#ifdef FEATURE_JIT_METHOD_PERF
static bool checkedForJitTimeLog = false;
+ pCompJitTimer = nullptr;
+
if (!checkedForJitTimeLog)
{
// Call into VM to get the config strings. FEATURE_JIT_METHOD_PERF is enabled for
@@ -4713,14 +4910,10 @@ int Compiler::compCompile(CORINFO_METHOD_HANDLE methodHnd,
checkedForJitTimeLog = true;
}
- if ((Compiler::compJitTimeLogFilename != NULL) || (JitTimeLogCsv() != NULL))
+ if ((Compiler::compJitTimeLogFilename != nullptr) || (JitTimeLogCsv() != nullptr))
{
pCompJitTimer = JitTimer::Create(this, methodInfo->ILCodeSize);
}
- else
- {
- pCompJitTimer = NULL;
- }
#endif // FEATURE_JIT_METHOD_PERF
#ifdef DEBUG
@@ -4862,7 +5055,7 @@ int Compiler::compCompile(CORINFO_METHOD_HANDLE methodHnd,
// Set this before the first 'BADCODE'
// Skip verification where possible
- tiVerificationNeeded = (compileFlags->corJitFlags & CORJIT_FLG_SKIP_VERIFICATION) == 0;
+ tiVerificationNeeded = !compileFlags->IsSet(JitFlags::JIT_FLAG_SKIP_VERIFICATION);
assert(!compIsForInlining() || !tiVerificationNeeded); // Inlinees must have been verified.
@@ -4893,8 +5086,8 @@ int Compiler::compCompile(CORINFO_METHOD_HANDLE methodHnd,
case CORINFO_VERIFICATION_CAN_SKIP:
// The VM should first verify the open instantiation. If unverifiable code
- // is detected, it should pass in CORJIT_FLG_SKIP_VERIFICATION.
- assert(!"The VM should have used CORJIT_FLG_SKIP_VERIFICATION");
+ // is detected, it should pass in JitFlags::JIT_FLAG_SKIP_VERIFICATION.
+ assert(!"The VM should have used JitFlags::JIT_FLAG_SKIP_VERIFICATION");
tiVerificationNeeded = false;
break;
@@ -4933,7 +5126,7 @@ int Compiler::compCompile(CORINFO_METHOD_HANDLE methodHnd,
CORINFO_METHOD_INFO* methodInfo;
void** methodCodePtr;
ULONG* methodCodeSize;
- CORJIT_FLAGS* compileFlags;
+ JitFlags* compileFlags;
CorInfoInstantiationVerification instVerInfo;
int result;
@@ -5000,6 +5193,8 @@ void Compiler::compCompileFinish()
// Make the updates.
genMemStats.nraTotalSizeAlloc = compGetAllocator()->getTotalBytesAllocated();
genMemStats.nraTotalSizeUsed = compGetAllocator()->getTotalBytesUsed();
+ memAllocHist.record((unsigned)((genMemStats.nraTotalSizeAlloc + 1023) / 1024));
+ memUsedHist.record((unsigned)((genMemStats.nraTotalSizeUsed + 1023) / 1024));
s_aggMemStats.Add(genMemStats);
if (genMemStats.allocSz > s_maxCompMemStats.allocSz)
{
@@ -5038,6 +5233,7 @@ void Compiler::compCompileFinish()
// the prolog which requires memory
(info.compLocalsCount <= 32) && (!opts.MinOpts()) && // We may have too many local variables, etc
(getJitStressLevel() == 0) && // We need extra memory for stress
+ !opts.optRepeat && // We need extra memory to repeat opts
!compAllocator->bypassHostAllocator() && // ArenaAllocator::getDefaultPageSize() is artificially low for
// DirectAlloc
(compAllocator->getTotalBytesAllocated() > (2 * ArenaAllocator::getDefaultPageSize())) &&
@@ -5071,7 +5267,7 @@ void Compiler::compCompileFinish()
mdMethodDef currentMethodToken = info.compCompHnd->getMethodDefFromMethod(info.compMethodHnd);
unsigned profCallCount = 0;
- if (((opts.eeFlags & CORJIT_FLG_BBOPT) != 0) && fgHaveProfileData())
+ if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT) && fgHaveProfileData())
{
assert(fgProfileBuffer[0].ILOffset == 0);
profCallCount = fgProfileBuffer[0].ExecutionCount;
@@ -5208,7 +5404,7 @@ void Compiler::compCompileFinish()
// For ngen the int3 or breakpoint instruction will be right at the
// start of the ngen method and we will stop when we execute it.
//
- if ((opts.eeFlags & CORJIT_FLG_PREJIT) == 0)
+ if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
{
if (compJitHaltMethod())
{
@@ -5296,7 +5492,7 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
CORINFO_METHOD_INFO* methodInfo,
void** methodCodePtr,
ULONG* methodCodeSize,
- CORJIT_FLAGS* compileFlags,
+ JitFlags* compileFlags,
CorInfoInstantiationVerification instVerInfo)
{
CORINFO_METHOD_HANDLE methodHnd = info.compMethodHnd;
@@ -5438,7 +5634,7 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
info.compIsContextful = (info.compClassAttr & CORINFO_FLG_CONTEXTFUL) != 0;
- info.compPublishStubParam = (opts.eeFlags & CORJIT_FLG_PUBLISH_SECRET_PARAM) != 0;
+ info.compPublishStubParam = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PUBLISH_SECRET_PARAM);
switch (methodInfo->args.getCallConv())
{
@@ -5476,7 +5672,7 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
const bool forceInline = !!(info.compFlags & CORINFO_FLG_FORCEINLINE);
- if (!compIsForInlining() && (opts.eeFlags & CORJIT_FLG_PREJIT))
+ if (!compIsForInlining() && opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
{
// We're prejitting the root method. We also will analyze it as
// a potential inline candidate.
@@ -5644,10 +5840,6 @@ _Next:
return CORJIT_OK;
}
-/*****************************************************************************/
-#ifdef DEBUGGING_SUPPORT
-/*****************************************************************************/
-
//------------------------------------------------------------------------
// compFindLocalVarLinear: Linear search for variable's scope containing offset.
//
@@ -5992,11 +6184,7 @@ void Compiler::compProcessScopesUntil(unsigned offset,
} while (foundExit || foundEnter);
}
-/*****************************************************************************/
-#endif // DEBUGGING_SUPPORT
-/*****************************************************************************/
-
-#if defined(DEBUGGING_SUPPORT) && defined(DEBUG)
+#if defined(DEBUG)
void Compiler::compDispScopeLists()
{
@@ -6044,10 +6232,6 @@ void Compiler::compDispScopeLists()
}
}
-#endif
-
-#if defined(DEBUG)
-
void Compiler::compDispLocalVars()
{
printf("info.compVarScopesCount = %d\n", info.compVarScopesCount);
@@ -6066,7 +6250,66 @@ void Compiler::compDispLocalVars()
}
}
-#endif
+#endif // DEBUG
+
+/*****************************************************************************/
+
+#if MEASURE_CLRAPI_CALLS
+
+struct WrapICorJitInfo : public ICorJitInfo
+{
+ //------------------------------------------------------------------------
+ // WrapICorJitInfo::makeOne: allocate an instance of WrapICorJitInfo
+ //
+ // Arguments:
+ // alloc - the allocator to get memory from for the instance
+ // compile - the compiler instance
+ // compHndRef - the ICorJitInfo handle from the EE; the caller's
+ // copy may be replaced with a "wrapper" instance
+ //
+ // Return Value:
+ // If the config flags indicate that ICorJitInfo should be wrapped,
+ // we return the "wrapper" instance; otherwise we return "nullptr".
+
+ static WrapICorJitInfo* makeOne(ArenaAllocator* alloc, Compiler* compiler, COMP_HANDLE& compHndRef /* INOUT */)
+ {
+ WrapICorJitInfo* wrap = nullptr;
+
+ if (JitConfig.JitEECallTimingInfo() != 0)
+ {
+ // It's too early to use the default allocator, so we do this
+ // in two steps to be safe (the constructor doesn't need to do
+ // anything except fill in the vtable pointer, so we let the
+ // compiler do it).
+ void* inst = alloc->allocateMemory(roundUp(sizeof(WrapICorJitInfo)));
+ if (inst != nullptr)
+ {
+ // If you get a build error here due to 'WrapICorJitInfo' being
+ // an abstract class, it's very likely that the wrapper bodies
+ // in ICorJitInfo_API_wrapper.hpp are no longer in sync with
+ // the EE interface; please be kind and update the header file.
+ wrap = new (inst, jitstd::placement_t()) WrapICorJitInfo();
+
+ wrap->wrapComp = compiler;
+
+ // Save the real handle and replace it with our wrapped version.
+ wrap->wrapHnd = compHndRef;
+ compHndRef = wrap;
+ }
+ }
+
+ return wrap;
+ }
+
+private:
+ Compiler* wrapComp;
+ COMP_HANDLE wrapHnd; // the "real thing"
+
+public:
+#include "ICorJitInfo_API_wrapper.hpp"
+};
+
+#endif // MEASURE_CLRAPI_CALLS
/*****************************************************************************/
@@ -6078,7 +6321,7 @@ int jitNativeCode(CORINFO_METHOD_HANDLE methodHnd,
CORINFO_METHOD_INFO* methodInfo,
void** methodCodePtr,
ULONG* methodCodeSize,
- CORJIT_FLAGS* compileFlags,
+ JitFlags* compileFlags,
void* inlineInfoPtr)
{
//
@@ -6093,6 +6336,10 @@ START:
ArenaAllocator* pAlloc = nullptr;
ArenaAllocator alloc;
+#if MEASURE_CLRAPI_CALLS
+ WrapICorJitInfo* wrapCLR = nullptr;
+#endif
+
if (inlineInfo)
{
// Use inliner's memory allocator when compiling the inlinee.
@@ -6128,8 +6375,11 @@ START:
CORINFO_METHOD_INFO* methodInfo;
void** methodCodePtr;
ULONG* methodCodeSize;
- CORJIT_FLAGS* compileFlags;
+ JitFlags* compileFlags;
InlineInfo* inlineInfo;
+#if MEASURE_CLRAPI_CALLS
+ WrapICorJitInfo* wrapCLR;
+#endif
int result;
} param;
@@ -6145,7 +6395,10 @@ START:
param.methodCodeSize = methodCodeSize;
param.compileFlags = compileFlags;
param.inlineInfo = inlineInfo;
- param.result = result;
+#if MEASURE_CLRAPI_CALLS
+ param.wrapCLR = nullptr;
+#endif
+ param.result = result;
setErrorTrap(compHnd, Param*, pParamOuter, &param)
{
@@ -6172,6 +6425,10 @@ START:
pParam->pComp = (Compiler*)pParam->pAlloc->allocateMemory(roundUp(sizeof(*pParam->pComp)));
}
+#if MEASURE_CLRAPI_CALLS
+ pParam->wrapCLR = WrapICorJitInfo::makeOne(pParam->pAlloc, pParam->pComp, pParam->compHnd);
+#endif
+
// push this compiler on the stack (TLS)
pParam->pComp->prevCompiler = JitTls::GetCompiler();
JitTls::SetCompiler(pParam->pComp);
@@ -6238,8 +6495,9 @@ START:
jitFallbackCompile = true;
// Update the flags for 'safer' code generation.
- compileFlags->corJitFlags |= CORJIT_FLG_MIN_OPT;
- compileFlags->corJitFlags &= ~(CORJIT_FLG_SIZE_OPT | CORJIT_FLG_SPEED_OPT);
+ compileFlags->Set(JitFlags::JIT_FLAG_MIN_OPT);
+ compileFlags->Clear(JitFlags::JIT_FLAG_SIZE_OPT);
+ compileFlags->Clear(JitFlags::JIT_FLAG_SPEED_OPT);
goto START;
}
@@ -6952,9 +7210,12 @@ void Compiler::compDispCallArgStats(FILE* fout)
// Static variables
CritSecObject CompTimeSummaryInfo::s_compTimeSummaryLock;
CompTimeSummaryInfo CompTimeSummaryInfo::s_compTimeSummary;
+#if MEASURE_CLRAPI_CALLS
+double JitTimer::s_cyclesPerSec = CycleTimer::CyclesPerSecond();
+#endif
#endif // FEATURE_JIT_METHOD_PERF
-#if defined(FEATURE_JIT_METHOD_PERF) || DUMP_FLOWGRAPHS
+#if defined(FEATURE_JIT_METHOD_PERF) || DUMP_FLOWGRAPHS || defined(FEATURE_TRACELOGGING)
const char* PhaseNames[] = {
#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) string_nm,
#include "compphases.h"
@@ -6983,13 +7244,36 @@ int PhaseParent[] = {
};
CompTimeInfo::CompTimeInfo(unsigned byteCodeBytes)
- : m_byteCodeBytes(byteCodeBytes), m_totalCycles(0), m_parentPhaseEndSlop(0), m_timerFailure(false)
+ : m_byteCodeBytes(byteCodeBytes)
+ , m_totalCycles(0)
+ , m_parentPhaseEndSlop(0)
+ , m_timerFailure(false)
+#if MEASURE_CLRAPI_CALLS
+ , m_allClrAPIcalls(0)
+ , m_allClrAPIcycles(0)
+#endif
{
for (int i = 0; i < PHASE_NUMBER_OF; i++)
{
m_invokesByPhase[i] = 0;
m_cyclesByPhase[i] = 0;
+#if MEASURE_CLRAPI_CALLS
+ m_CLRinvokesByPhase[i] = 0;
+ m_CLRcyclesByPhase[i] = 0;
+#endif
}
+
+#if MEASURE_CLRAPI_CALLS
+ assert(ARRAYSIZE(m_perClrAPIcalls) == API_ICorJitInfo_Names::API_COUNT);
+ assert(ARRAYSIZE(m_perClrAPIcycles) == API_ICorJitInfo_Names::API_COUNT);
+ assert(ARRAYSIZE(m_maxClrAPIcycles) == API_ICorJitInfo_Names::API_COUNT);
+ for (int i = 0; i < API_ICorJitInfo_Names::API_COUNT; i++)
+ {
+ m_perClrAPIcalls[i] = 0;
+ m_perClrAPIcycles[i] = 0;
+ m_maxClrAPIcycles[i] = 0;
+ }
+#endif
}
bool CompTimeSummaryInfo::IncludedInFilteredData(CompTimeInfo& info)
@@ -6997,52 +7281,125 @@ bool CompTimeSummaryInfo::IncludedInFilteredData(CompTimeInfo& info)
return false; // info.m_byteCodeBytes < 10;
}
-void CompTimeSummaryInfo::AddInfo(CompTimeInfo& info)
+//------------------------------------------------------------------------
+// CompTimeSummaryInfo::AddInfo: Record timing info from one compile.
+//
+// Arguments:
+// info - The timing information to record.
+// includePhases - If "true", the per-phase info in "info" is valid,
+// which means that a "normal" compile has ended; if
+// the value is "false" we are recording the results
+// of a partial compile (typically an import-only run
+// on behalf of the inliner) in which case the phase
+// info is not valid and so we only record EE call
+// overhead.
+void CompTimeSummaryInfo::AddInfo(CompTimeInfo& info, bool includePhases)
{
if (info.m_timerFailure)
+ {
return; // Don't update if there was a failure.
+ }
CritSecHolder timeLock(s_compTimeSummaryLock);
- m_numMethods++;
- bool includeInFiltered = IncludedInFilteredData(info);
+ if (includePhases)
+ {
+ bool includeInFiltered = IncludedInFilteredData(info);
- // Update the totals and maxima.
- m_total.m_byteCodeBytes += info.m_byteCodeBytes;
- m_maximum.m_byteCodeBytes = max(m_maximum.m_byteCodeBytes, info.m_byteCodeBytes);
- m_total.m_totalCycles += info.m_totalCycles;
- m_maximum.m_totalCycles = max(m_maximum.m_totalCycles, info.m_totalCycles);
+ m_numMethods++;
- if (includeInFiltered)
- {
- m_numFilteredMethods++;
- m_filtered.m_byteCodeBytes += info.m_byteCodeBytes;
- m_filtered.m_totalCycles += info.m_totalCycles;
- m_filtered.m_parentPhaseEndSlop += info.m_parentPhaseEndSlop;
- }
+ // Update the totals and maxima.
+ m_total.m_byteCodeBytes += info.m_byteCodeBytes;
+ m_maximum.m_byteCodeBytes = max(m_maximum.m_byteCodeBytes, info.m_byteCodeBytes);
+ m_total.m_totalCycles += info.m_totalCycles;
+ m_maximum.m_totalCycles = max(m_maximum.m_totalCycles, info.m_totalCycles);
+
+#if MEASURE_CLRAPI_CALLS
+ // Update the CLR-API values.
+ m_total.m_allClrAPIcalls += info.m_allClrAPIcalls;
+ m_maximum.m_allClrAPIcalls = max(m_maximum.m_allClrAPIcalls, info.m_allClrAPIcalls);
+ m_total.m_allClrAPIcycles += info.m_allClrAPIcycles;
+ m_maximum.m_allClrAPIcycles = max(m_maximum.m_allClrAPIcycles, info.m_allClrAPIcycles);
+#endif
- for (int i = 0; i < PHASE_NUMBER_OF; i++)
- {
- m_total.m_invokesByPhase[i] += info.m_invokesByPhase[i];
- m_total.m_cyclesByPhase[i] += info.m_cyclesByPhase[i];
if (includeInFiltered)
{
- m_filtered.m_invokesByPhase[i] += info.m_invokesByPhase[i];
- m_filtered.m_cyclesByPhase[i] += info.m_cyclesByPhase[i];
+ m_numFilteredMethods++;
+ m_filtered.m_byteCodeBytes += info.m_byteCodeBytes;
+ m_filtered.m_totalCycles += info.m_totalCycles;
+ m_filtered.m_parentPhaseEndSlop += info.m_parentPhaseEndSlop;
+ }
+
+ for (int i = 0; i < PHASE_NUMBER_OF; i++)
+ {
+ m_total.m_invokesByPhase[i] += info.m_invokesByPhase[i];
+ m_total.m_cyclesByPhase[i] += info.m_cyclesByPhase[i];
+
+#if MEASURE_CLRAPI_CALLS
+ m_total.m_CLRinvokesByPhase[i] += info.m_CLRinvokesByPhase[i];
+ m_total.m_CLRcyclesByPhase[i] += info.m_CLRcyclesByPhase[i];
+#endif
+
+ if (includeInFiltered)
+ {
+ m_filtered.m_invokesByPhase[i] += info.m_invokesByPhase[i];
+ m_filtered.m_cyclesByPhase[i] += info.m_cyclesByPhase[i];
+#if MEASURE_CLRAPI_CALLS
+ m_filtered.m_CLRinvokesByPhase[i] += info.m_CLRinvokesByPhase[i];
+ m_filtered.m_CLRcyclesByPhase[i] += info.m_CLRcyclesByPhase[i];
+#endif
+ }
+ m_maximum.m_cyclesByPhase[i] = max(m_maximum.m_cyclesByPhase[i], info.m_cyclesByPhase[i]);
+
+#if MEASURE_CLRAPI_CALLS
+ m_maximum.m_CLRcyclesByPhase[i] = max(m_maximum.m_CLRcyclesByPhase[i], info.m_CLRcyclesByPhase[i]);
+#endif
}
- m_maximum.m_cyclesByPhase[i] = max(m_maximum.m_cyclesByPhase[i], info.m_cyclesByPhase[i]);
+ m_total.m_parentPhaseEndSlop += info.m_parentPhaseEndSlop;
+ m_maximum.m_parentPhaseEndSlop = max(m_maximum.m_parentPhaseEndSlop, info.m_parentPhaseEndSlop);
+ }
+#if MEASURE_CLRAPI_CALLS
+ else
+ {
+ m_totMethods++;
+
+ // Update the "global" CLR-API values.
+ m_total.m_allClrAPIcalls += info.m_allClrAPIcalls;
+ m_maximum.m_allClrAPIcalls = max(m_maximum.m_allClrAPIcalls, info.m_allClrAPIcalls);
+ m_total.m_allClrAPIcycles += info.m_allClrAPIcycles;
+ m_maximum.m_allClrAPIcycles = max(m_maximum.m_allClrAPIcycles, info.m_allClrAPIcycles);
+
+ // Update the per-phase CLR-API values.
+ m_total.m_invokesByPhase[PHASE_CLR_API] += info.m_allClrAPIcalls;
+ m_maximum.m_invokesByPhase[PHASE_CLR_API] =
+ max(m_maximum.m_perClrAPIcalls[PHASE_CLR_API], info.m_allClrAPIcalls);
+ m_total.m_cyclesByPhase[PHASE_CLR_API] += info.m_allClrAPIcycles;
+ m_maximum.m_cyclesByPhase[PHASE_CLR_API] =
+ max(m_maximum.m_cyclesByPhase[PHASE_CLR_API], info.m_allClrAPIcycles);
+ }
+
+ for (int i = 0; i < API_ICorJitInfo_Names::API_COUNT; i++)
+ {
+ m_total.m_perClrAPIcalls[i] += info.m_perClrAPIcalls[i];
+ m_maximum.m_perClrAPIcalls[i] = max(m_maximum.m_perClrAPIcalls[i], info.m_perClrAPIcalls[i]);
+
+ m_total.m_perClrAPIcycles[i] += info.m_perClrAPIcycles[i];
+ m_maximum.m_perClrAPIcycles[i] = max(m_maximum.m_perClrAPIcycles[i], info.m_perClrAPIcycles[i]);
+
+ m_maximum.m_maxClrAPIcycles[i] = max(m_maximum.m_maxClrAPIcycles[i], info.m_maxClrAPIcycles[i]);
}
- m_total.m_parentPhaseEndSlop += info.m_parentPhaseEndSlop;
- m_maximum.m_parentPhaseEndSlop = max(m_maximum.m_parentPhaseEndSlop, info.m_parentPhaseEndSlop);
+#endif
}
// Static
-LPCWSTR Compiler::compJitTimeLogFilename = NULL;
+LPCWSTR Compiler::compJitTimeLogFilename = nullptr;
void CompTimeSummaryInfo::Print(FILE* f)
{
- if (f == NULL)
+ if (f == nullptr)
+ {
return;
+ }
// Otherwise...
double countsPerSec = CycleTimer::CyclesPerSecond();
if (countsPerSec == 0.0)
@@ -7051,13 +7408,16 @@ void CompTimeSummaryInfo::Print(FILE* f)
return;
}
+ bool extraInfo = (JitConfig.JitEECallTimingInfo() != 0);
+ double totTime_ms = 0.0;
+
fprintf(f, "JIT Compilation time report:\n");
fprintf(f, " Compiled %d methods.\n", m_numMethods);
if (m_numMethods != 0)
{
fprintf(f, " Compiled %d bytecodes total (%d max, %8.2f avg).\n", m_total.m_byteCodeBytes,
m_maximum.m_byteCodeBytes, (double)m_total.m_byteCodeBytes / (double)m_numMethods);
- double totTime_ms = ((double)m_total.m_totalCycles / countsPerSec) * 1000.0;
+ totTime_ms = ((double)m_total.m_totalCycles / countsPerSec) * 1000.0;
fprintf(f, " Time: total: %10.3f Mcycles/%10.3f ms\n", ((double)m_total.m_totalCycles / 1000000.0),
totTime_ms);
fprintf(f, " max: %10.3f Mcycles/%10.3f ms\n", ((double)m_maximum.m_totalCycles) / 1000000.0,
@@ -7065,15 +7425,36 @@ void CompTimeSummaryInfo::Print(FILE* f)
fprintf(f, " avg: %10.3f Mcycles/%10.3f ms\n",
((double)m_total.m_totalCycles) / 1000000.0 / (double)m_numMethods, totTime_ms / (double)m_numMethods);
- fprintf(f, " Total time by phases:\n");
- fprintf(f, " PHASE inv/meth Mcycles time (ms) %% of total max (ms)\n");
- fprintf(f, " --------------------------------------------------------------------------------------\n");
+ const char* extraHdr1 = "";
+ const char* extraHdr2 = "";
+#if MEASURE_CLRAPI_CALLS
+ if (extraInfo)
+ {
+ extraHdr1 = " CLRs/meth % in CLR";
+ extraHdr2 = "-----------------------";
+ }
+#endif
+
+ fprintf(f, "\n Total time by phases:\n");
+ fprintf(f, " PHASE inv/meth Mcycles time (ms) %% of total max (ms)%s\n",
+ extraHdr1);
+ fprintf(f, " ---------------------------------------------------------------------------------------%s\n",
+ extraHdr2);
+
// Ensure that at least the names array and the Phases enum have the same number of entries:
assert(sizeof(PhaseNames) / sizeof(const char*) == PHASE_NUMBER_OF);
for (int i = 0; i < PHASE_NUMBER_OF; i++)
{
- double phase_tot_ms = (((double)m_total.m_cyclesByPhase[i]) / countsPerSec) * 1000.0;
- double phase_max_ms = (((double)m_maximum.m_cyclesByPhase[i]) / countsPerSec) * 1000.0;
+ double phase_tot_ms = (((double)m_total.m_cyclesByPhase[i]) / countsPerSec) * 1000.0;
+ double phase_max_ms = (((double)m_maximum.m_cyclesByPhase[i]) / countsPerSec) * 1000.0;
+ double phase_tot_pct = 100.0 * phase_tot_ms / totTime_ms;
+
+#if MEASURE_CLRAPI_CALLS
+ // Skip showing CLR API call info if we didn't collect any
+ if (i == PHASE_CLR_API && !extraInfo)
+ continue;
+#endif
+
// Indent nested phases, according to depth.
int ancPhase = PhaseParent[i];
while (ancPhase != -1)
@@ -7081,13 +7462,33 @@ void CompTimeSummaryInfo::Print(FILE* f)
fprintf(f, " ");
ancPhase = PhaseParent[ancPhase];
}
- fprintf(f, " %-30s %5.2f %10.2f %9.3f %8.2f%% %8.3f\n", PhaseNames[i],
+ fprintf(f, " %-30s %6.2f %10.2f %9.3f %8.2f%% %8.3f", PhaseNames[i],
((double)m_total.m_invokesByPhase[i]) / ((double)m_numMethods),
((double)m_total.m_cyclesByPhase[i]) / 1000000.0, phase_tot_ms, (phase_tot_ms * 100.0 / totTime_ms),
phase_max_ms);
+
+#if MEASURE_CLRAPI_CALLS
+ if (extraInfo && i != PHASE_CLR_API)
+ {
+ double nest_tot_ms = (((double)m_total.m_CLRcyclesByPhase[i]) / countsPerSec) * 1000.0;
+ double nest_percent = nest_tot_ms * 100.0 / totTime_ms;
+ double calls_per_fn = ((double)m_total.m_CLRinvokesByPhase[i]) / ((double)m_numMethods);
+
+ if (nest_percent > 0.1 || calls_per_fn > 10)
+ fprintf(f, " %5.1f %8.2f%%", calls_per_fn, nest_percent);
+ }
+#endif
+ fprintf(f, "\n");
+ }
+
+ // Show slop if it's over a certain percentage of the total
+ double pslop_pct = 100.0 * m_total.m_parentPhaseEndSlop * 1000.0 / countsPerSec / totTime_ms;
+ if (pslop_pct >= 1.0)
+ {
+ fprintf(f, "\n 'End phase slop' should be very small (if not, there's unattributed time): %9.3f Mcycles = "
+ "%3.1f%% of total.\n\n",
+ m_total.m_parentPhaseEndSlop / 1000000.0, pslop_pct);
}
- fprintf(f, "\n 'End phase slop' should be very small (if not, there's unattributed time): %9.3f Mcycles.\n",
- m_total.m_parentPhaseEndSlop);
}
if (m_numFilteredMethods > 0)
{
@@ -7121,19 +7522,125 @@ void CompTimeSummaryInfo::Print(FILE* f)
((double)m_filtered.m_cyclesByPhase[i]) / 1000000.0, phase_tot_ms,
(phase_tot_ms * 100.0 / totTime_ms));
}
- fprintf(f, "\n 'End phase slop' should be very small (if not, there's unattributed time): %9.3f Mcycles.\n",
- m_filtered.m_parentPhaseEndSlop);
+
+ double fslop_ms = m_filtered.m_parentPhaseEndSlop * 1000.0 / countsPerSec;
+ if (fslop_ms > 1.0)
+ {
+ fprintf(f,
+ "\n 'End phase slop' should be very small (if not, there's unattributed time): %9.3f Mcycles.\n",
+ m_filtered.m_parentPhaseEndSlop);
+ }
}
+
+#if MEASURE_CLRAPI_CALLS
+ if (m_total.m_allClrAPIcalls > 0 && m_total.m_allClrAPIcycles > 0)
+ {
+ fprintf(f, "\n");
+ if (m_totMethods > 0)
+ fprintf(f, " Imported %u methods.\n\n", m_numMethods + m_totMethods);
+
+ fprintf(f, " CLR API # calls total time max time avg time %% "
+ "of total\n");
+ fprintf(f, " -------------------------------------------------------------------------------");
+ fprintf(f, "---------------------\n");
+
+ static const char* APInames[] = {
+#define DEF_CLR_API(name) #name,
+#include "ICorJitInfo_API_names.h"
+ };
+
+ unsigned shownCalls = 0;
+ double shownMillis = 0.0;
+#ifdef DEBUG
+ unsigned checkedCalls = 0;
+ double checkedMillis = 0.0;
+#endif
+
+ for (unsigned pass = 0; pass < 2; pass++)
+ {
+ for (unsigned i = 0; i < API_ICorJitInfo_Names::API_COUNT; i++)
+ {
+ unsigned calls = m_total.m_perClrAPIcalls[i];
+ if (calls == 0)
+ continue;
+
+ unsigned __int64 cycles = m_total.m_perClrAPIcycles[i];
+ double millis = 1000.0 * cycles / countsPerSec;
+
+ // Don't show the small fry to keep the results manageable
+ if (millis < 0.5)
+ {
+ // We always show the following API because it is always called
+ // exactly once for each method and its body is the simplest one
+ // possible (it just returns an integer constant), and therefore
+ // it can be used to measure the overhead of adding the CLR API
+ // timing code. Roughly speaking, on a 3GHz x64 box the overhead
+ // per call should be around 40 ns when using RDTSC, compared to
+ // about 140 ns when using GetThreadCycles() under Windows.
+ if (i != API_ICorJitInfo_Names::API_getExpectedTargetArchitecture)
+ continue;
+ }
+
+ // In the first pass we just compute the totals.
+ if (pass == 0)
+ {
+ shownCalls += m_total.m_perClrAPIcalls[i];
+ shownMillis += millis;
+ continue;
+ }
+
+ unsigned __int32 maxcyc = m_maximum.m_maxClrAPIcycles[i];
+ double max_ms = 1000.0 * maxcyc / countsPerSec;
+
+ fprintf(f, " %-40s", APInames[i]); // API name
+ fprintf(f, " %8u %9.1f ms", calls, millis); // #calls, total time
+ fprintf(f, " %8.1f ms %8.1f ns", max_ms, 1000000.0 * millis / calls); // max, avg time
+ fprintf(f, " %5.1f%%\n", 100.0 * millis / shownMillis); // % of total
+
+#ifdef DEBUG
+ checkedCalls += m_total.m_perClrAPIcalls[i];
+ checkedMillis += millis;
+#endif
+ }
+ }
+
+#ifdef DEBUG
+ assert(checkedCalls == shownCalls);
+ assert(checkedMillis == shownMillis);
+#endif
+
+ if (shownCalls > 0 || shownMillis > 0)
+ {
+ fprintf(f, " -------------------------");
+ fprintf(f, "---------------------------------------------------------------------------\n");
+ fprintf(f, " Total for calls shown above %8u %10.1f ms", shownCalls, shownMillis);
+ if (totTime_ms > 0.0)
+ fprintf(f, " (%4.1lf%% of overall JIT time)", shownMillis * 100.0 / totTime_ms);
+ fprintf(f, "\n");
+ }
+ fprintf(f, "\n");
+ }
+#endif
+
+ fprintf(f, "\n");
}
JitTimer::JitTimer(unsigned byteCodeSize) : m_info(byteCodeSize)
{
+#if MEASURE_CLRAPI_CALLS
+ m_CLRcallInvokes = 0;
+ m_CLRcallCycles = 0;
+#endif
+
#ifdef DEBUG
m_lastPhase = (Phases)-1;
+#if MEASURE_CLRAPI_CALLS
+ m_CLRcallAPInum = -1;
+#endif
#endif
unsigned __int64 threadCurCycles;
- if (GetThreadCycles(&threadCurCycles))
+ if (_our_GetThreadCycles(&threadCurCycles))
{
m_start = threadCurCycles;
m_curPhaseStart = threadCurCycles;
@@ -7147,9 +7654,10 @@ void JitTimer::EndPhase(Phases phase)
// assert((int)phase > (int)m_lastPhase); // We should end phases in increasing order.
unsigned __int64 threadCurCycles;
- if (GetThreadCycles(&threadCurCycles))
+ if (_our_GetThreadCycles(&threadCurCycles))
{
unsigned __int64 phaseCycles = (threadCurCycles - m_curPhaseStart);
+
// If this is not a leaf phase, the assumption is that the last subphase must have just recently ended.
// Credit the duration to "slop", the total of which should be very small.
if (PhaseHasChildren[phase])
@@ -7161,6 +7669,13 @@ void JitTimer::EndPhase(Phases phase)
// It is a leaf phase. Credit duration to it.
m_info.m_invokesByPhase[phase]++;
m_info.m_cyclesByPhase[phase] += phaseCycles;
+
+#if MEASURE_CLRAPI_CALLS
+ // Record the CLR API timing info as well.
+ m_info.m_CLRinvokesByPhase[phase] += m_CLRcallInvokes;
+ m_info.m_CLRcyclesByPhase[phase] += m_CLRcallCycles;
+#endif
+
// Credit the phase's ancestors, if any.
int ancPhase = PhaseParent[phase];
while (ancPhase != -1)
@@ -7168,8 +7683,13 @@ void JitTimer::EndPhase(Phases phase)
m_info.m_cyclesByPhase[ancPhase] += phaseCycles;
ancPhase = PhaseParent[ancPhase];
}
- // Did we just end the last phase?
- if (phase + 1 == PHASE_NUMBER_OF)
+
+#if MEASURE_CLRAPI_CALLS
+ const Phases lastPhase = PHASE_CLR_API;
+#else
+ const Phases lastPhase = PHASE_NUMBER_OF;
+#endif
+ if (phase + 1 == lastPhase)
{
m_info.m_totalCycles = (threadCurCycles - m_start);
}
@@ -7179,11 +7699,92 @@ void JitTimer::EndPhase(Phases phase)
}
}
}
+
#ifdef DEBUG
m_lastPhase = phase;
#endif
+#if MEASURE_CLRAPI_CALLS
+ m_CLRcallInvokes = 0;
+ m_CLRcallCycles = 0;
+#endif
+}
+
+#if MEASURE_CLRAPI_CALLS
+
+//------------------------------------------------------------------------
+// JitTimer::CLRApiCallEnter: Start the stopwatch for an EE call.
+//
+// Arguments:
+// apix - The API index - an "enum API_ICorJitInfo_Names" value.
+//
+
+void JitTimer::CLRApiCallEnter(unsigned apix)
+{
+ assert(m_CLRcallAPInum == -1); // Nested calls not allowed
+ m_CLRcallAPInum = apix;
+
+ // If we can't get the cycles, we'll just ignore this call
+ if (!_our_GetThreadCycles(&m_CLRcallStart))
+ m_CLRcallStart = 0;
+}
+
+//------------------------------------------------------------------------
+// JitTimer::CLRApiCallLeave: compute / record time spent in an EE call.
+//
+// Arguments:
+// apix - The API's "enum API_ICorJitInfo_Names" value; this value
+// should match the value passed to the most recent call to
+// "CLRApiCallEnter" (i.e. these must come as matched pairs),
+// and they also may not nest.
+//
+
+void JitTimer::CLRApiCallLeave(unsigned apix)
+{
+ // Make sure we're actually inside a measured CLR call.
+ assert(m_CLRcallAPInum != -1);
+ m_CLRcallAPInum = -1;
+
+ // Ignore this one if we don't have a valid starting counter.
+ if (m_CLRcallStart != 0)
+ {
+ if (JitConfig.JitEECallTimingInfo() != 0)
+ {
+ unsigned __int64 threadCurCycles;
+ if (_our_GetThreadCycles(&threadCurCycles))
+ {
+ // Compute the cycles spent in the call.
+ threadCurCycles -= m_CLRcallStart;
+
+ // Add the cycles to the 'phase' and bump its use count.
+ m_info.m_cyclesByPhase[PHASE_CLR_API] += threadCurCycles;
+ m_info.m_invokesByPhase[PHASE_CLR_API] += 1;
+
+ // Add the values to the "per API" info.
+ m_info.m_allClrAPIcycles += threadCurCycles;
+ m_info.m_allClrAPIcalls += 1;
+
+ m_info.m_perClrAPIcalls[apix] += 1;
+ m_info.m_perClrAPIcycles[apix] += threadCurCycles;
+ m_info.m_maxClrAPIcycles[apix] = max(m_info.m_maxClrAPIcycles[apix], (unsigned __int32)threadCurCycles);
+
+ // Subtract the cycles from the enclosing phase by bumping its start time
+ m_curPhaseStart += threadCurCycles;
+
+ // Update the running totals.
+ m_CLRcallInvokes += 1;
+ m_CLRcallCycles += threadCurCycles;
+ }
+ }
+
+ m_CLRcallStart = 0;
+ }
+
+ assert(m_CLRcallAPInum != -1); // No longer in this API call.
+ m_CLRcallAPInum = -1;
}
+#endif // MEASURE_CLRAPI_CALLS
+
CritSecObject JitTimer::s_csvLock;
LPCWSTR Compiler::JitTimeLogCsv()
@@ -7195,39 +7796,38 @@ LPCWSTR Compiler::JitTimeLogCsv()
void JitTimer::PrintCsvHeader()
{
LPCWSTR jitTimeLogCsv = Compiler::JitTimeLogCsv();
- if (jitTimeLogCsv == NULL)
+ if (jitTimeLogCsv == nullptr)
{
return;
}
CritSecHolder csvLock(s_csvLock);
- FILE* fp = _wfopen(jitTimeLogCsv, W("r"));
- if (fp == nullptr)
+ FILE* fp = _wfopen(jitTimeLogCsv, W("a"));
+ if (fp != nullptr)
{
- // File doesn't exist, so create it and write the header
-
- // Use write mode, so we rewrite the file, and retain only the last compiled process/dll.
- // Ex: ngen install mscorlib won't print stats for "ngen" but for "mscorsvw"
- FILE* fp = _wfopen(jitTimeLogCsv, W("w"));
- fprintf(fp, "\"Method Name\",");
- fprintf(fp, "\"Method Index\",");
- fprintf(fp, "\"IL Bytes\",");
- fprintf(fp, "\"Basic Blocks\",");
- fprintf(fp, "\"Opt Level\",");
- fprintf(fp, "\"Loops Cloned\",");
-
- for (int i = 0; i < PHASE_NUMBER_OF; i++)
+ // Write the header if the file is empty
+ if (ftell(fp) == 0)
{
- fprintf(fp, "\"%s\",", PhaseNames[i]);
- }
+ fprintf(fp, "\"Method Name\",");
+ fprintf(fp, "\"Method Index\",");
+ fprintf(fp, "\"IL Bytes\",");
+ fprintf(fp, "\"Basic Blocks\",");
+ fprintf(fp, "\"Opt Level\",");
+ fprintf(fp, "\"Loops Cloned\",");
- InlineStrategy::DumpCsvHeader(fp);
+ for (int i = 0; i < PHASE_NUMBER_OF; i++)
+ {
+ fprintf(fp, "\"%s\",", PhaseNames[i]);
+ }
- fprintf(fp, "\"Total Cycles\",");
- fprintf(fp, "\"CPS\"\n");
+ InlineStrategy::DumpCsvHeader(fp);
+
+ fprintf(fp, "\"Total Cycles\",");
+ fprintf(fp, "\"CPS\"\n");
+ }
+ fclose(fp);
}
- fclose(fp);
}
extern ICorJitHost* g_jitHost;
@@ -7235,7 +7835,7 @@ extern ICorJitHost* g_jitHost;
void JitTimer::PrintCsvMethodStats(Compiler* comp)
{
LPCWSTR jitTimeLogCsv = Compiler::JitTimeLogCsv();
- if (jitTimeLogCsv == NULL)
+ if (jitTimeLogCsv == nullptr)
{
return;
}
@@ -7265,7 +7865,9 @@ void JitTimer::PrintCsvMethodStats(Compiler* comp)
for (int i = 0; i < PHASE_NUMBER_OF; i++)
{
if (!PhaseHasChildren[i])
+ {
totCycles += m_info.m_cyclesByPhase[i];
+ }
fprintf(fp, "%I64u,", m_info.m_cyclesByPhase[i]);
}
@@ -7277,23 +7879,14 @@ void JitTimer::PrintCsvMethodStats(Compiler* comp)
}
// Completes the timing of the current method, and adds it to "sum".
-void JitTimer::Terminate(Compiler* comp, CompTimeSummaryInfo& sum)
+void JitTimer::Terminate(Compiler* comp, CompTimeSummaryInfo& sum, bool includePhases)
{
-#ifdef DEBUG
- unsigned __int64 totCycles2 = 0;
- for (int i = 0; i < PHASE_NUMBER_OF; i++)
+ if (includePhases)
{
- if (!PhaseHasChildren[i])
- totCycles2 += m_info.m_cyclesByPhase[i];
+ PrintCsvMethodStats(comp);
}
- // We include m_parentPhaseEndSlop in the next phase's time also (we probably shouldn't)
- // totCycles2 += m_info.m_parentPhaseEndSlop;
- assert(totCycles2 == m_info.m_totalCycles);
-#endif
-
- PrintCsvMethodStats(comp);
- sum.AddInfo(m_info);
+ sum.AddInfo(m_info, includePhases);
}
#endif // FEATURE_JIT_METHOD_PERF
@@ -7331,6 +7924,10 @@ void Compiler::MemStats::PrintByKind(FILE* f)
void Compiler::AggregateMemStats::Print(FILE* f)
{
fprintf(f, "For %9u methods:\n", nMethods);
+ if (nMethods == 0)
+ {
+ return;
+ }
fprintf(f, " count: %12u (avg %7u per method)\n", allocCnt, allocCnt / nMethods);
fprintf(f, " alloc size : %12llu (avg %7llu per method)\n", allocSz, allocSz / nMethods);
fprintf(f, " max alloc : %12llu\n", allocSzMax);
@@ -8520,6 +9117,9 @@ int cTreeFlagsIR(Compiler* comp, GenTree* tree)
break;
case GT_MUL:
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+ case GT_MUL_LONG:
+#endif
if (tree->gtFlags & GTF_MUL_64RSLT)
{
@@ -10124,11 +10724,6 @@ void cNodeIR(Compiler* comp, GenTree* tree)
}
break;
- case GT_STORE_CLS_VAR:
-
- chars += printf(" ???");
- break;
-
case GT_LEA:
GenTreeAddrMode* lea = tree->AsAddrMode();
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index 05047c5ecb..d8cd491063 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -691,12 +691,21 @@ public:
// is now TYP_INT in the local variable table. It's not really unused, because it's in the tree.
assert(varTypeIsStruct(lvType) || (lvType == TYP_BLK) || (lvPromoted && lvUnusedStruct));
+
+#if defined(FEATURE_SIMD) && !defined(_TARGET_64BIT_)
+ // For 32-bit architectures, we make local variable SIMD12 types 16 bytes instead of just 12. We can't do
+ // this for arguments, which must be passed according the defined ABI.
+ if ((lvType == TYP_SIMD12) && !lvIsParam)
+ {
+ assert(lvExactSize == 12);
+ return 16;
+ }
+#endif // defined(FEATURE_SIMD) && !defined(_TARGET_64BIT_)
+
return (unsigned)(roundUp(lvExactSize, TARGET_POINTER_SIZE));
}
-#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
unsigned lvSlotNum; // original slot # (if remapped)
-#endif
typeInfo lvVerTypeInfo; // type info needed for verification
@@ -926,6 +935,14 @@ extern const char* PhaseNames[];
extern const char* PhaseEnums[];
extern const LPCWSTR PhaseShortNames[];
+// The following enum provides a simple 1:1 mapping to CLR API's
+enum API_ICorJitInfo_Names
+{
+#define DEF_CLR_API(name) API_##name,
+#include "ICorJitInfo_API_names.h"
+ API_COUNT
+};
+
//---------------------------------------------------------------
// Compilation time.
//
@@ -949,6 +966,10 @@ struct CompTimeInfo
unsigned __int64 m_totalCycles;
unsigned __int64 m_invokesByPhase[PHASE_NUMBER_OF];
unsigned __int64 m_cyclesByPhase[PHASE_NUMBER_OF];
+#if MEASURE_CLRAPI_CALLS
+ unsigned __int64 m_CLRinvokesByPhase[PHASE_NUMBER_OF];
+ unsigned __int64 m_CLRcyclesByPhase[PHASE_NUMBER_OF];
+#endif
// For better documentation, we call EndPhase on
// non-leaf phases. We should also call EndPhase on the
// last leaf subphase; obviously, the elapsed cycles between the EndPhase
@@ -960,12 +981,25 @@ struct CompTimeInfo
unsigned __int64 m_parentPhaseEndSlop;
bool m_timerFailure;
+#if MEASURE_CLRAPI_CALLS
+ // The following measures the time spent inside each individual CLR API call.
+ unsigned m_allClrAPIcalls;
+ unsigned m_perClrAPIcalls[API_ICorJitInfo_Names::API_COUNT];
+ unsigned __int64 m_allClrAPIcycles;
+ unsigned __int64 m_perClrAPIcycles[API_ICorJitInfo_Names::API_COUNT];
+ unsigned __int32 m_maxClrAPIcycles[API_ICorJitInfo_Names::API_COUNT];
+#endif // MEASURE_CLRAPI_CALLS
+
CompTimeInfo(unsigned byteCodeBytes);
#endif
};
#ifdef FEATURE_JIT_METHOD_PERF
+#if MEASURE_CLRAPI_CALLS
+struct WrapICorJitInfo;
+#endif
+
// This class summarizes the JIT time information over the course of a run: the number of methods compiled,
// and the total and maximum timings. (These are instances of the "CompTimeInfo" type described above).
// The operation of adding a single method's timing to the summary may be performed concurrently by several
@@ -977,6 +1011,7 @@ class CompTimeSummaryInfo
static CritSecObject s_compTimeSummaryLock;
int m_numMethods;
+ int m_totMethods;
CompTimeInfo m_total;
CompTimeInfo m_maximum;
@@ -996,13 +1031,14 @@ public:
// This is the unique CompTimeSummaryInfo object for this instance of the runtime.
static CompTimeSummaryInfo s_compTimeSummary;
- CompTimeSummaryInfo() : m_numMethods(0), m_total(0), m_maximum(0), m_numFilteredMethods(0), m_filtered(0)
+ CompTimeSummaryInfo()
+ : m_numMethods(0), m_totMethods(0), m_total(0), m_maximum(0), m_numFilteredMethods(0), m_filtered(0)
{
}
// Assumes that "info" is a completed CompTimeInfo for a compilation; adds it to the summary.
// This is thread safe.
- void AddInfo(CompTimeInfo& info);
+ void AddInfo(CompTimeInfo& info, bool includePhases);
// Print the summary information to "f".
// This is not thread-safe; assumed to be called by only one thread.
@@ -1017,6 +1053,13 @@ class JitTimer
{
unsigned __int64 m_start; // Start of the compilation.
unsigned __int64 m_curPhaseStart; // Start of the current phase.
+#if MEASURE_CLRAPI_CALLS
+ unsigned __int64 m_CLRcallStart; // Start of the current CLR API call (if any).
+ unsigned __int64 m_CLRcallInvokes; // CLR API invokes under current outer so far
+ unsigned __int64 m_CLRcallCycles; // CLR API cycles under current outer so far.
+ int m_CLRcallAPInum; // The enum/index of the current CLR API call (or -1).
+ static double s_cyclesPerSec; // Cached for speedier measurements
+#endif
#ifdef DEBUG
Phases m_lastPhase; // The last phase that was completed (or (Phases)-1 to start).
#endif
@@ -1045,9 +1088,15 @@ public:
// Ends the current phase (argument is for a redundant check).
void EndPhase(Phases phase);
+#if MEASURE_CLRAPI_CALLS
+ // Start and end a timed CLR API call.
+ void CLRApiCallEnter(unsigned apix);
+ void CLRApiCallLeave(unsigned apix);
+#endif // MEASURE_CLRAPI_CALLS
+
// Completes the timing of the current method, which is assumed to have "byteCodeBytes" bytes of bytecode,
// and adds it to "sum".
- void Terminate(Compiler* comp, CompTimeSummaryInfo& sum);
+ void Terminate(Compiler* comp, CompTimeSummaryInfo& sum, bool includePhases);
// Attempts to query the cycle counter of the current thread. If successful, returns "true" and sets
// *cycles to the cycle counter value. Otherwise, returns false and sets the "m_timerFailure" flag of
@@ -1164,7 +1213,13 @@ struct fgArgTabEntry
regNumber otherRegNum; // The (second) register to use when passing this argument.
SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
-#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#elif defined(_TARGET_X86_)
+ __declspec(property(get = getIsStruct)) bool isStruct;
+ bool getIsStruct()
+ {
+ return varTypeIsStruct(node);
+ }
+#endif // _TARGET_X86_
#ifdef _TARGET_ARM_
void SetIsHfaRegArg(bool hfaRegArg)
@@ -1293,6 +1348,10 @@ public:
{
return hasStackArgs;
}
+ bool AreArgsComplete() const
+ {
+ return argsComplete;
+ }
};
#ifdef DEBUG
@@ -1939,8 +1998,6 @@ public:
GenTreeArgList* gtNewArgList(GenTreePtr op1, GenTreePtr op2);
GenTreeArgList* gtNewArgList(GenTreePtr op1, GenTreePtr op2, GenTreePtr op3);
- GenTreeArgList* gtNewAggregate(GenTree* element);
-
static fgArgTabEntryPtr gtArgEntryByArgNum(GenTreePtr call, unsigned argNum);
static fgArgTabEntryPtr gtArgEntryByNode(GenTreePtr call, GenTreePtr node);
fgArgTabEntryPtr gtArgEntryByLateArgIndex(GenTreePtr call, unsigned lateArgInx);
@@ -1975,7 +2032,18 @@ public:
GenTreePtr gtClone(GenTree* tree, bool complexOK = false);
- GenTreePtr gtCloneExpr(GenTree* tree, unsigned addFlags = 0, unsigned varNum = (unsigned)-1, int varVal = 0);
+ // If `tree` is a lclVar with lclNum `varNum`, return an IntCns with value `varVal`; otherwise,
+ // create a copy of `tree`, adding specified flags, replacing uses of lclVar `deepVarNum` with
+ // IntCnses with value `deepVarVal`.
+ GenTreePtr gtCloneExpr(
+ GenTree* tree, unsigned addFlags, unsigned varNum, int varVal, unsigned deepVarNum, int deepVarVal);
+
+ // Create a copy of `tree`, optionally adding specifed flags, and optionally mapping uses of local
+ // `varNum` to int constants with value `varVal`.
+ GenTreePtr gtCloneExpr(GenTree* tree, unsigned addFlags = 0, unsigned varNum = (unsigned)-1, int varVal = 0)
+ {
+ return gtCloneExpr(tree, addFlags, varNum, varVal, varNum, varVal);
+ }
GenTreePtr gtReplaceTree(GenTreePtr stmt, GenTreePtr tree, GenTreePtr replacementTree);
@@ -1997,7 +2065,7 @@ public:
unsigned gtHashValue(GenTree* tree);
- unsigned gtSetListOrder(GenTree* list, bool regs);
+ unsigned gtSetListOrder(GenTree* list, bool regs, bool isListCallArgs);
void gtWalkOp(GenTree** op1, GenTree** op2, GenTree* adr, bool constOnly);
@@ -2277,7 +2345,8 @@ public:
DNER_VMNeedsStackAddr,
DNER_LiveInOutOfHandler,
DNER_LiveAcrossUnmanagedCall,
- DNER_BlockOp, // Is read or written via a block operation that explicitly takes the address.
+ DNER_BlockOp, // Is read or written via a block operation that explicitly takes the address.
+ DNER_IsStructArg, // Is a struct passed as an argument in a way that requires a stack location.
#ifdef JIT32_GCENCODER
DNER_PinningRef,
#endif
@@ -2439,7 +2508,6 @@ public:
void lvaInit();
- unsigned lvaArgSize(const void* argTok);
unsigned lvaLclSize(unsigned varNum);
unsigned lvaLclExactSize(unsigned varNum);
@@ -2712,9 +2780,10 @@ protected:
void impImportNewObjArray(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_CALL_INFO* pCallInfo);
- bool impCanPInvokeInline(var_types callRetTyp);
- bool impCanPInvokeInlineCallSite(var_types callRetTyp);
- void impCheckForPInvokeCall(GenTreePtr call, CORINFO_METHOD_HANDLE methHnd, CORINFO_SIG_INFO* sig, unsigned mflags);
+ bool impCanPInvokeInline(BasicBlock* block);
+ bool impCanPInvokeInlineCallSite(BasicBlock* block);
+ void impCheckForPInvokeCall(
+ GenTreePtr call, CORINFO_METHOD_HANDLE methHnd, CORINFO_SIG_INFO* sig, unsigned mflags, BasicBlock* block);
GenTreePtr impImportIndirectCall(CORINFO_SIG_INFO* sig, IL_OFFSETX ilOffset = BAD_IL_OFFSET);
void impPopArgsForUnmanagedCall(GenTreePtr call, CORINFO_SIG_INFO* sig);
@@ -2739,8 +2808,6 @@ protected:
GenTreePtr impFixupCallStructReturn(GenTreePtr call, CORINFO_CLASS_HANDLE retClsHnd);
- GenTreePtr impInitCallLongReturn(GenTreePtr call);
-
GenTreePtr impFixupStructReturnType(GenTreePtr op, CORINFO_CLASS_HANDLE retClsHnd);
#ifdef DEBUG
@@ -2764,7 +2831,6 @@ protected:
void impImportLeave(BasicBlock* block);
void impResetLeaveBlock(BasicBlock* block, unsigned jmpAddr);
- BOOL impLocAllocOnStack();
GenTreePtr impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig,
@@ -2868,6 +2934,8 @@ public:
unsigned flags,
void* compileTimeHandle);
+ GenTreePtr getRuntimeContextTree(CORINFO_RUNTIME_LOOKUP_KIND kind);
+
GenTreePtr impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken,
CORINFO_LOOKUP* pLookup,
void* compileTimeHandle);
@@ -3148,8 +3216,6 @@ private:
static LONG jitNestingLevel;
#endif // DEBUG
- bool seenConditionalJump;
-
static BOOL impIsAddressInLocal(GenTreePtr tree, GenTreePtr* lclVarTreeOut);
void impMakeDiscretionaryInlineObservations(InlineInfo* pInlineInfo, InlineResult* inlineResult);
@@ -3455,8 +3521,9 @@ public:
void fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loadw);
void fgMorphBlocks();
- bool fgMorphBlockStmt(BasicBlock* block, GenTreePtr stmt DEBUGARG(const char* msg));
+ bool fgMorphBlockStmt(BasicBlock* block, GenTreeStmt* stmt DEBUGARG(const char* msg));
+ void fgCheckArgCnt();
void fgSetOptions();
#ifdef DEBUG
@@ -3845,7 +3912,7 @@ public:
//
var_types getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
structPassingKind* wbPassStruct = nullptr,
- unsigned structSize = 0);
+ unsigned structSize = 0);
#ifdef DEBUG
// Print a representation of "vnp" or "vn" on standard output.
@@ -4072,7 +4139,7 @@ public:
void fgUnreachableBlock(BasicBlock* block);
- void fgRemoveJTrue(BasicBlock* block);
+ void fgRemoveConditionalJump(BasicBlock* block);
BasicBlock* fgLastBBInMainFunction();
@@ -4204,6 +4271,7 @@ public:
void fgDebugCheckLinks(bool morphTrees = false);
void fgDebugCheckNodeLinks(BasicBlock* block, GenTreePtr stmt);
void fgDebugCheckFlags(GenTreePtr tree);
+ void fgDebugCheckFlagsHelper(GenTreePtr tree, unsigned treeFlags, unsigned chkFlags);
#endif
#ifdef LEGACY_BACKEND
@@ -4305,7 +4373,7 @@ protected:
void fgLinkBasicBlocks();
- void fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* jumpTarget);
+ unsigned fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* jumpTarget);
void fgCheckBasicBlockControlFlow();
@@ -4380,13 +4448,6 @@ private:
GenTree* fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType = nullptr);
GenTree* fgMakeMultiUse(GenTree** ppTree);
- // After replacing oldChild with newChild, fixup the fgArgTabEntryPtr
- // if it happens to be an argument to a call.
- void fgFixupIfCallArg(ArrayStack<GenTree*>* parentStack, GenTree* oldChild, GenTree* newChild);
-
-public:
- void fgFixupArgTabEntryPtr(GenTreePtr parentCall, GenTreePtr oldArg, GenTreePtr newArg);
-
private:
// Recognize a bitwise rotation pattern and convert into a GT_ROL or a GT_ROR node.
GenTreePtr fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree);
@@ -4440,16 +4501,11 @@ private:
// for sufficiently small offsets, we can rely on OS page protection to implicitly null-check addresses that we
// know will be dereferenced. To know that reliance on implicit null checking is sound, we must further know that
// all offsets between the top-level indirection and the bottom are constant, and that their sum is sufficiently
- // small; hence the other fields of MorphAddrContext. Finally, the odd structure of GT_COPYBLK, in which the second
- // argument is a GT_LIST, requires us to "tell" that List node that its parent is a GT_COPYBLK, so it "knows" that
- // each of its arguments should be evaluated in MACK_Ind contexts. (This would not be true for GT_LIST nodes
- // representing method call argument lists.)
+ // small; hence the other fields of MorphAddrContext.
enum MorphAddrContextKind
{
MACK_Ind,
MACK_Addr,
- MACK_CopyBlock, // This is necessary so we know we have to start a new "Ind" context for each of the
- // addresses in the arg list.
};
struct MorphAddrContext
{
@@ -4513,7 +4569,7 @@ private:
void fgMorphCallInline(GenTreeCall* call, InlineResult* result);
void fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result);
#if DEBUG
- void fgNoteNonInlineCandidate(GenTreePtr tree, GenTreeCall* call);
+ void fgNoteNonInlineCandidate(GenTreeStmt* stmt, GenTreeCall* call);
static fgWalkPreFn fgFindNonInlineCandidate;
#endif
GenTreePtr fgOptimizeDelegateConstructor(GenTreePtr call, CORINFO_CONTEXT_HANDLE* ExactContextHnd);
@@ -4525,16 +4581,14 @@ private:
GenTreePtr fgMorphGetStructAddr(GenTreePtr* pTree, CORINFO_CLASS_HANDLE clsHnd, bool isRValue = false);
GenTreePtr fgMorphBlkNode(GenTreePtr tree, bool isDest);
GenTreePtr fgMorphBlockOperand(GenTreePtr tree, var_types asgType, unsigned blockWidth, bool isDest);
+ void fgMorphUnsafeBlk(GenTreeObj* obj);
GenTreePtr fgMorphCopyBlock(GenTreePtr tree);
GenTreePtr fgMorphForRegisterFP(GenTreePtr tree);
GenTreePtr fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac = nullptr);
GenTreePtr fgMorphSmpOpPre(GenTreePtr tree);
- GenTreePtr fgMorphDivByConst(GenTreeOp* tree);
- GenTreePtr fgMorphModByConst(GenTreeOp* tree);
GenTreePtr fgMorphModToSubMulDiv(GenTreeOp* tree);
GenTreePtr fgMorphSmpOpOptional(GenTreeOp* tree);
GenTreePtr fgMorphRecognizeBoxNullable(GenTree* compare);
- bool fgShouldUseMagicNumberDivide(GenTreeOp* tree);
GenTreePtr fgMorphToEmulatedFP(GenTreePtr tree);
GenTreePtr fgMorphConst(GenTreePtr tree);
@@ -4544,11 +4598,12 @@ public:
private:
#if LOCAL_ASSERTION_PROP
+ void fgKillDependentAssertionsSingle(unsigned lclNum DEBUGARG(GenTreePtr tree));
void fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTreePtr tree));
#endif
void fgMorphTreeDone(GenTreePtr tree, GenTreePtr oldTree = nullptr DEBUGARG(int morphNum = 0));
- GenTreePtr fgMorphStmt;
+ GenTreeStmt* fgMorphStmt;
unsigned fgGetBigOffsetMorphingTemp(var_types type); // We cache one temp per type to be
// used when morphing big offset.
@@ -4564,7 +4619,6 @@ private:
void fgMarkUseDef(GenTreeLclVarCommon* tree, GenTree* asgdLclVar = nullptr);
-#ifdef DEBUGGING_SUPPORT
void fgBeginScopeLife(VARSET_TP* inScope, VarScopeDsc* var);
void fgEndScopeLife(VARSET_TP* inScope, VarScopeDsc* var);
@@ -4578,8 +4632,6 @@ private:
void fgDispDebugScopes();
#endif // DEBUG
-#endif // DEBUGGING_SUPPORT
-
//-------------------------------------------------------------------------
//
// The following keeps track of any code we've added for things like array
@@ -4622,6 +4674,7 @@ private:
void fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* result);
void fgInsertInlineeBlocks(InlineInfo* pInlineInfo);
GenTreePtr fgInlinePrependStatements(InlineInfo* inlineInfo);
+ void fgInlineAppendStatements(InlineInfo* inlineInfo, BasicBlock* block, GenTreePtr stmt);
#if FEATURE_MULTIREG_RET
GenTreePtr fgGetStructAsStructPtr(GenTreePtr tree);
@@ -4905,6 +4958,7 @@ public:
#define LPFLG_VAR_LIMIT 0x0100 // iterator is compared with a local var (var # found in lpVarLimit)
#define LPFLG_CONST_LIMIT 0x0200 // iterator is compared with a constant (found in lpConstLimit)
#define LPFLG_ARRLEN_LIMIT 0x0400 // iterator is compared with a.len or a[i].len (found in lpArrLenLimit)
+#define LPFLG_SIMD_LIMIT 0x0080 // iterator is compared with Vector<T>.Count (found in lpConstLimit)
#define LPFLG_HAS_PREHEAD 0x0800 // lpHead is known to be a preHead for this loop
#define LPFLG_REMOVED 0x1000 // has been removed from the loop table (unrolled or optimized away)
@@ -5205,6 +5259,11 @@ protected:
static const int MIN_CSE_COST = 2;
+ // Keeps tracked cse indices
+ BitVecTraits* cseTraits;
+ EXPSET_TP cseFull;
+ EXPSET_TP cseEmpty;
+
/* Generic list of nodes - used by the CSE logic */
struct treeLst
@@ -6237,7 +6296,7 @@ public:
BOOL eeIsValueClass(CORINFO_CLASS_HANDLE clsHnd);
-#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD)
+#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD) || defined(TRACK_LSRA_STATS)
bool IsSuperPMIException(unsigned code)
{
@@ -6334,10 +6393,19 @@ public:
#endif
}
+ inline bool IsTargetAbi(CORINFO_RUNTIME_ABI abi)
+ {
+#if COR_JIT_EE_VERSION > 460
+ return eeGetEEInfo()->targetAbi == abi;
+#else
+ return CORINFO_DESKTOP_ABI == abi;
+#endif
+ }
+
inline bool generateCFIUnwindCodes()
{
-#if COR_JIT_EE_VERSION > 460 && defined(UNIX_AMD64_ABI)
- return eeGetEEInfo()->targetAbi == CORINFO_CORERT_ABI;
+#ifdef UNIX_AMD64_ABI
+ return IsTargetAbi(CORINFO_CORERT_ABI);
#else
return false;
#endif
@@ -6522,8 +6590,6 @@ private:
public:
CodeGenInterface* codeGen;
-#ifdef DEBUGGING_SUPPORT
-
// The following holds information about instr offsets in terms of generated code.
struct IPmappingDsc
@@ -6553,7 +6619,6 @@ public:
typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, IL_OFFSETX, JitSimplerHashBehavior>
CallSiteILOffsetTable;
CallSiteILOffsetTable* genCallSite2ILOffsetMap;
-#endif // DEBUGGING_SUPPORT
unsigned genReturnLocal; // Local number for the return value when applicable.
BasicBlock* genReturnBB; // jumped to when not optimizing for speed.
@@ -6588,8 +6653,14 @@ public:
{
return codeGen->doDoubleAlign();
}
- DWORD getCanDoubleAlign(); // Defined & used only by RegAlloc
-#endif // DOUBLE_ALIGN
+ DWORD getCanDoubleAlign();
+ bool shouldDoubleAlign(unsigned refCntStk,
+ unsigned refCntReg,
+ unsigned refCntWtdReg,
+ unsigned refCntStkParam,
+ unsigned refCntWtdStkDbl);
+#endif // DOUBLE_ALIGN
+
__declspec(property(get = getFullPtrRegMap, put = setFullPtrRegMap)) bool genFullPtrRegMap;
bool getFullPtrRegMap()
{
@@ -6829,6 +6900,11 @@ private:
return InstructionSet_AVX;
}
+ if (CanUseSSE3_4())
+ {
+ return InstructionSet_SSE3_4;
+ }
+
// min bar is SSE2
assert(canUseSSE2());
return InstructionSet_SSE2;
@@ -7072,7 +7148,7 @@ private:
// and small int base type vectors.
SIMDIntrinsicID impSIMDIntegralRelOpGreaterThanOrEqual(
CORINFO_CLASS_HANDLE typeHnd, unsigned simdVectorSize, var_types baseType, GenTree** op1, GenTree** op2);
-#endif // defined(_TARGET_AMD64_) && !defined(LEGACY_BACKEND)
+#endif // defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
void setLclRelatedToSIMDIntrinsic(GenTreePtr tree);
bool areFieldsContiguous(GenTreePtr op1, GenTreePtr op2);
@@ -7261,6 +7337,16 @@ private:
// Returns true if the TYP_SIMD locals on stack are aligned at their
// preferred byte boundary specified by getSIMDTypeAlignment().
+ //
+ // As per the Intel manual, the preferred alignment for AVX vectors is 32-bytes. On Amd64,
+ // RSP/EBP is aligned at 16-bytes, therefore to align SIMD types at 32-bytes we need even
+ // RSP/EBP to be 32-byte aligned. It is not clear whether additional stack space used in
+ // aligning stack is worth the benefit and for now will use 16-byte alignment for AVX
+ // 256-bit vectors with unaligned load/stores to/from memory. On x86, the stack frame
+ // is aligned to 4 bytes. We need to extend existing support for double (8-byte) alignment
+ // to 16 or 32 byte alignment for frames with local SIMD vars, if that is determined to be
+ // profitable.
+ //
bool isSIMDTypeLocalAligned(unsigned varNum)
{
#if defined(FEATURE_SIMD) && ALIGN_SIMD_TYPES
@@ -7270,8 +7356,7 @@ private:
int off = lvaFrameAddress(varNum, &ebpBased);
// TODO-Cleanup: Can't this use the lvExactSize on the varDsc?
int alignment = getSIMDTypeAlignment(lvaTable[varNum].lvType);
- bool isAligned = ((off % alignment) == 0);
- noway_assert(isAligned || lvaTable[varNum].lvIsParam);
+ bool isAligned = (alignment <= STACK_ALIGN) && ((off % alignment) == 0);
return isAligned;
}
#endif // FEATURE_SIMD
@@ -7289,6 +7374,16 @@ private:
#endif
}
+ // Whether SSE3, SSE3, SSE4.1 and SSE4.2 is available
+ bool CanUseSSE3_4() const
+ {
+#ifdef _TARGET_XARCH_
+ return opts.compCanUseSSE3_4;
+#else
+ return false;
+#endif
+ }
+
bool canUseAVX() const
{
#ifdef FEATURE_AVX_SUPPORT
@@ -7393,21 +7488,21 @@ public:
struct Options
{
- CORJIT_FLAGS* jitFlags; // all flags passed from the EE
- unsigned eeFlags; // CorJitFlag flags passed from the EE
- unsigned compFlags; // method attributes
+ JitFlags* jitFlags; // all flags passed from the EE
+ unsigned compFlags; // method attributes
codeOptimize compCodeOpt; // what type of code optimizations
bool compUseFCOMI;
bool compUseCMOV;
#ifdef _TARGET_XARCH_
- bool compCanUseSSE2; // Allow CodeGen to use "movq XMM" instructions
+ bool compCanUseSSE2; // Allow CodeGen to use "movq XMM" instructions
+ bool compCanUseSSE3_4; // Allow CodeGen to use SSE3, SSSE3, SSE4.1 and SSE4.2 instructions
#ifdef FEATURE_AVX_SUPPORT
bool compCanUseAVX; // Allow CodeGen to use AVX 256-bit vectors for SIMD operations
-#endif
-#endif
+#endif // FEATURE_AVX_SUPPORT
+#endif // _TARGET_XARCH_
// optimize maximally and/or favor speed over size?
@@ -7464,7 +7559,7 @@ public:
#ifdef FEATURE_READYTORUN_COMPILER
inline bool IsReadyToRun()
{
- return (eeFlags & CORJIT_FLG_READYTORUN) != 0;
+ return jitFlags->IsSet(JitFlags::JIT_FLAG_READYTORUN);
}
#else
inline bool IsReadyToRun()
@@ -7478,7 +7573,7 @@ public:
inline bool ShouldUsePInvokeHelpers()
{
#if COR_JIT_EE_VERSION > 460
- return (jitFlags->corJitFlags2 & CORJIT_FLG2_USE_PINVOKE_HELPERS) != 0;
+ return jitFlags->IsSet(JitFlags::JIT_FLAG_USE_PINVOKE_HELPERS);
#else
return false;
#endif
@@ -7489,7 +7584,7 @@ public:
inline bool IsReversePInvoke()
{
#if COR_JIT_EE_VERSION > 460
- return (jitFlags->corJitFlags2 & CORJIT_FLG2_REVERSE_PINVOKE) != 0;
+ return jitFlags->IsSet(JitFlags::JIT_FLAG_REVERSE_PINVOKE);
#else
return false;
#endif
@@ -7499,7 +7594,7 @@ public:
inline bool IsJit32Compat()
{
#if defined(_TARGET_X86_) && COR_JIT_EE_VERSION > 460
- return (jitFlags->corJitFlags2 & CORJIT_FLG2_DESKTOP_QUIRKS) != 0;
+ return jitFlags->IsSet(JitFlags::JIT_FLAG_DESKTOP_QUIRKS);
#else
return false;
#endif
@@ -7509,7 +7604,7 @@ public:
inline bool IsJit64Compat()
{
#if defined(_TARGET_AMD64_) && COR_JIT_EE_VERSION > 460
- return (jitFlags->corJitFlags2 & CORJIT_FLG2_DESKTOP_QUIRKS) != 0;
+ return jitFlags->IsSet(JitFlags::JIT_FLAG_DESKTOP_QUIRKS);
#elif defined(_TARGET_AMD64_) && !defined(FEATURE_CORECLR)
return true;
#else
@@ -7517,14 +7612,10 @@ public:
#endif
}
-#ifdef DEBUGGING_SUPPORT
bool compScopeInfo; // Generate the LocalVar info ?
bool compDbgCode; // Generate debugger-friendly code?
bool compDbgInfo; // Gather debugging info?
bool compDbgEnC;
-#else
- static const bool compDbgCode;
-#endif
#ifdef PROFILING_SUPPORTED
bool compNoPInvokeInlineCB;
@@ -7584,6 +7675,7 @@ public:
bool altJit; // True if we are an altjit and are compiling this method
#ifdef DEBUG
+ bool optRepeat; // Repeat optimizer phases k times
bool compProcedureSplittingEH; // Separate cold code from hot code for functions with EH
bool dspCode; // Display native code generated
bool dspEHTable; // Display the EH table reported to the VM
@@ -7623,9 +7715,11 @@ public:
// for any call. We have a plan for not needing for stubs though
bool compNeedStackProbes;
- // Whether to emit Enter/Leave/TailCall hooks using a dummy stub (DummyProfilerELTStub())
- // This options helps one to make JIT behave as if it is under profiler.
+#ifdef PROFILING_SUPPORTED
+ // Whether to emit Enter/Leave/TailCall hooks using a dummy stub (DummyProfilerELTStub()).
+ // This option helps make the JIT behave as if it is running under a profiler.
bool compJitELTHookEnabled;
+#endif // PROFILING_SUPPORTED
#if FEATURE_TAILCALL_OPT
// Whether opportunistic or implicit tail call optimization is enabled.
@@ -7650,8 +7744,6 @@ public:
#ifdef DEBUG
- static bool s_dspMemStats; // Display per-phase memory statistics for every function
-
template <typename T>
T dspPtr(T p)
{
@@ -7759,8 +7851,8 @@ public:
codeOptimize compCodeOpt()
{
#if 0
- // Switching between size & speed has measurable throughput impact
- // (3.5% on NGen mscorlib when measured). It used to be enabled for
+ // Switching between size & speed has measurable throughput impact
+ // (3.5% on NGen mscorlib when measured). It used to be enabled for
// DEBUG, but should generate identical code between CHK & RET builds,
// so that's not acceptable.
// TODO-Throughput: Figure out what to do about size vs. speed & throughput.
@@ -7772,10 +7864,6 @@ public:
#endif
}
-#ifdef DEBUG
- CLRRandom* inlRNG;
-#endif
-
//--------------------- Info about the procedure --------------------------
struct Info
@@ -7855,8 +7943,6 @@ public:
// and the VM expects that, or the JIT is a "self-host" compiler
// (e.g., x86 hosted targeting x86) and the VM expects that.
-#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
-
/* The following holds IL scope information about local variables.
*/
@@ -7871,8 +7957,6 @@ public:
unsigned compStmtOffsetsCount;
ICorDebugInfo::BoundaryTypes compStmtOffsetsImplicit;
-#endif // DEBUGGING_SUPPORT || DEBUG
-
#define CPU_X86 0x0100 // The generic X86 CPU
#define CPU_X86_PENTIUM_4 0x0110
@@ -7937,9 +8021,12 @@ public:
// Such method's compRetNativeType is TYP_STRUCT without a hidden RetBufArg
return varTypeIsStruct(info.compRetNativeType) && (info.compRetBuffArg == BAD_VAR_NUM);
#endif // TARGET_XXX
+
#else // not FEATURE_MULTIREG_RET
+
// For this architecture there are no multireg returns
return false;
+
#endif // FEATURE_MULTIREG_RET
}
@@ -7960,7 +8047,7 @@ public:
void compDispLocalVars();
-#endif // DEBUGGING_SUPPORT || DEBUG
+#endif // DEBUG
//-------------------------- Global Compiler Data ------------------------------------
@@ -8059,19 +8146,22 @@ public:
CORINFO_METHOD_INFO* methodInfo,
void** methodCodePtr,
ULONG* methodCodeSize,
- CORJIT_FLAGS* compileFlags);
+ JitFlags* compileFlags);
void compCompileFinish();
int compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
COMP_HANDLE compHnd,
CORINFO_METHOD_INFO* methodInfo,
void** methodCodePtr,
ULONG* methodCodeSize,
- CORJIT_FLAGS* compileFlags,
+ JitFlags* compileFlags,
CorInfoInstantiationVerification instVerInfo);
ArenaAllocator* compGetAllocator();
#if MEASURE_MEM_ALLOC
+
+ static bool s_dspMemStats; // Display per-phase memory statistics for every function
+
struct MemStats
{
unsigned allocCnt; // # of allocs
@@ -8195,9 +8285,8 @@ public:
void compDspSrcLinesByLineNum(unsigned line, bool seek = false);
#endif // DEBUG
-//-------------------------------------------------------------------------
+ //-------------------------------------------------------------------------
-#ifdef DEBUGGING_SUPPORT
typedef ListNode<VarScopeDsc*> VarScopeListNode;
struct VarScopeMapInfo
@@ -8255,8 +8344,6 @@ public:
void compDispScopeLists();
#endif // DEBUG
-#endif // DEBUGGING_SUPPORT
-
bool compIsProfilerHookNeeded();
//-------------------------------------------------------------------------
@@ -8299,7 +8386,7 @@ public:
protected:
size_t compMaxUncheckedOffsetForNullObject;
- void compInitOptions(CORJIT_FLAGS* compileFlags);
+ void compInitOptions(JitFlags* compileFlags);
void compSetProcessor();
void compInitDebuggingInfo();
@@ -8307,16 +8394,22 @@ protected:
#ifdef _TARGET_ARMARCH_
bool compRsvdRegCheck(FrameLayoutState curState);
#endif
- void compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_FLAGS* compileFlags);
+ void compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags* compileFlags);
- // Data required for generating profiler Enter/Leave/TailCall hooks
- CLANG_FORMAT_COMMENT_ANCHOR;
+ // Clear annotations produced during optimizations; to be used between iterations when repeating opts.
+ void ResetOptAnnotations();
+
+ // Regenerate loop descriptors; to be used between iterations when repeating opts.
+ void RecomputeLoopInfo();
#ifdef PROFILING_SUPPORTED
+ // Data required for generating profiler Enter/Leave/TailCall hooks
+
bool compProfilerHookNeeded; // Whether profiler Enter/Leave/TailCall hook needs to be generated for the method
void* compProfilerMethHnd; // Profiler handle of the method being compiled. Passed as param to ELT callbacks
bool compProfilerMethHndIndirected; // Whether compProfilerHandle is pointer to the handle or is an actual handle
#endif
+
#ifdef _TARGET_AMD64_
bool compQuirkForPPP(); // Check if this method should be Quirked for the PPP issue
#endif
@@ -8692,6 +8785,18 @@ private:
#endif
inline void EndPhase(Phases phase); // Indicate the end of the given phase.
+#if MEASURE_CLRAPI_CALLS
+ // Thin wrappers that call into JitTimer (if present).
+ inline void CLRApiCallEnter(unsigned apix);
+ inline void CLRApiCallLeave(unsigned apix);
+
+public:
+ inline void CLR_API_Enter(API_ICorJitInfo_Names ename);
+ inline void CLR_API_Leave(API_ICorJitInfo_Names ename);
+
+private:
+#endif
+
#if defined(DEBUG) || defined(INLINE_DATA) || defined(FEATURE_CLRSQM)
// These variables are associated with maintaining SQM data about compile time.
unsigned __int64 m_compCyclesAtEndOfInlining; // The thread-virtualized cycle count at the end of the inlining phase
diff --git a/src/jit/compiler.hpp b/src/jit/compiler.hpp
index eb8eb19c68..e8358fd2ab 100644
--- a/src/jit/compiler.hpp
+++ b/src/jit/compiler.hpp
@@ -473,10 +473,17 @@ inline unsigned Compiler::funGetFuncIdx(BasicBlock* block)
#endif // !FEATURE_EH_FUNCLETS
-/*****************************************************************************
- *
- * Map a register mask to a register number
- */
+//------------------------------------------------------------------------------
+// genRegNumFromMask : Maps a single register mask to a register number.
+//
+// Arguments:
+// mask - the register mask
+//
+// Return Value:
+// The number of the register contained in the mask.
+//
+// Assumptions:
+// The mask contains one and only one register.
inline regNumber genRegNumFromMask(regMaskTP mask)
{
@@ -768,7 +775,8 @@ inline double getR8LittleEndian(const BYTE* ptr)
/*****************************************************************************
*
- * Return the bitmask to use in the EXPSET_TP for the CSE with the given CSE index.
+ * Return the normalized index to use in the EXPSET_TP for the CSE with
+ * the given CSE index.
* Each GenTree has the following field:
* signed char gtCSEnum; // 0 or the CSE index (negated if def)
* So zero is reserved to mean this node is not a CSE
@@ -777,15 +785,15 @@ inline double getR8LittleEndian(const BYTE* ptr)
* This precondition is checked by the assert on the first line of this method.
*/
-inline EXPSET_TP genCSEnum2bit(unsigned index)
+inline unsigned int genCSEnum2bit(unsigned index)
{
assert((index > 0) && (index <= EXPSET_SZ));
- return ((EXPSET_TP)1 << (index - 1));
+ return (index - 1);
}
#ifdef DEBUG
-const char* genES2str(EXPSET_TP set);
+const char* genES2str(BitVecTraits* traits, EXPSET_TP set);
const char* refCntWtd2str(unsigned refCntWtd);
#endif
@@ -870,6 +878,10 @@ inline GenTree::GenTree(genTreeOps oper, var_types type DEBUGARG(bool largeNode)
#endif
#endif
+#if COUNT_AST_OPERS
+ InterlockedIncrement(&s_gtNodeCounts[oper]);
+#endif
+
#ifdef DEBUG
gtSeqNum = 0;
gtTreeID = JitTls::GetCompiler()->compGenTreeID++;
@@ -1285,11 +1297,11 @@ inline void GenTree::SetOper(genTreeOps oper, ValueNumberUpdate vnUpdate)
assert(GenTree::s_gtNodeSizes[gtOper] == TREE_NODE_SZ_SMALL ||
GenTree::s_gtNodeSizes[gtOper] == TREE_NODE_SZ_LARGE);
- assert(GenTree::s_gtNodeSizes[oper] == TREE_NODE_SZ_SMALL || GenTree::s_gtNodeSizes[oper] == TREE_NODE_SZ_LARGE);
+ assert(GenTree::s_gtNodeSizes[oper] == TREE_NODE_SZ_SMALL || GenTree::s_gtNodeSizes[oper] == TREE_NODE_SZ_LARGE);
assert(GenTree::s_gtNodeSizes[oper] == TREE_NODE_SZ_SMALL || (gtDebugFlags & GTF_DEBUG_NODE_LARGE));
- gtOper = oper;
+ SetOperRaw(oper);
#ifdef DEBUG
// Maintain the invariant that unary operators always have NULL gtOp2.
@@ -1327,6 +1339,9 @@ inline void GenTree::CopyFrom(const GenTree* src, Compiler* comp)
assert((gtDebugFlags & GTF_DEBUG_NODE_LARGE) || GenTree::s_gtNodeSizes[src->gtOper] == TREE_NODE_SZ_SMALL);
GenTreePtr prev = gtPrev;
GenTreePtr next = gtNext;
+
+ RecordOperBashing(OperGet(), src->OperGet()); // nop unless NODEBASH_STATS is enabled
+
// The VTable pointer is copied intentionally here
memcpy((void*)this, (void*)src, src->GetNodeSize());
this->gtPrev = prev;
@@ -1373,7 +1388,7 @@ inline void GenTree::InitNodeSize()
inline void GenTree::SetOper(genTreeOps oper, ValueNumberUpdate vnUpdate)
{
- gtOper = oper;
+ SetOperRaw(oper);
if (vnUpdate == CLEAR_VN)
{
@@ -1384,6 +1399,7 @@ inline void GenTree::SetOper(genTreeOps oper, ValueNumberUpdate vnUpdate)
inline void GenTree::CopyFrom(GenTreePtr src)
{
+ RecordOperBashing(OperGet(), src->OperGet()); // nop unless NODEBASH_STATS is enabled
*this = *src;
#ifdef DEBUG
gtSeqNum = 0;
@@ -1405,6 +1421,16 @@ inline GenTreePtr Compiler::gtNewCastNodeL(var_types typ, GenTreePtr op1, var_ty
#endif // SMALL_TREE_NODES
/*****************************************************************************/
+/*****************************************************************************/
+
+inline void GenTree::SetOperRaw(genTreeOps oper)
+{
+ // Please do not do anything here other than assign to gtOper (debug-only
+ // code is OK, but should be kept to a minimum).
+ RecordOperBashing(OperGet(), oper); // nop unless NODEBASH_STATS is enabled
+ gtOper = oper;
+}
+
inline void GenTree::SetOperResetFlags(genTreeOps oper)
{
SetOper(oper);
@@ -1446,7 +1472,7 @@ inline void GenTree::ChangeOper(genTreeOps oper, ValueNumberUpdate vnUpdate)
inline void GenTree::ChangeOperUnchecked(genTreeOps oper)
{
- gtOper = oper; // Trust the caller and don't use SetOper()
+ SetOperRaw(oper); // Trust the caller and don't use SetOper()
gtFlags &= GTF_COMMON_MASK;
}
@@ -1579,7 +1605,7 @@ inline unsigned Compiler::lvaGrabTemp(bool shortLifetime DEBUGARG(const char* re
#if 0
// TODO-Cleanup: Enable this and test.
-#ifdef DEBUG
+#ifdef DEBUG
// Fill the old table with junks. So to detect the un-intended use.
memset(lvaTable, fDefaultFill2.val_DontUse_(CLRConfig::INTERNAL_JitDefaultFill, 0xFF), lvaCount * sizeof(*lvaTable));
#endif
@@ -1655,7 +1681,7 @@ inline unsigned Compiler::lvaGrabTemps(unsigned cnt DEBUGARG(const char* reason)
}
#if 0
-#ifdef DEBUG
+#ifdef DEBUG
// TODO-Cleanup: Enable this and test.
// Fill the old table with junks. So to detect the un-intended use.
memset(lvaTable, fDefaultFill2.val_DontUse_(CLRConfig::INTERNAL_JitDefaultFill, 0xFF), lvaCount * sizeof(*lvaTable));
@@ -3909,7 +3935,7 @@ inline bool Compiler::IsSharedStaticHelper(GenTreePtr tree)
helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_DYNAMICCLASS ||
helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_DYNAMICCLASS ||
#ifdef FEATURE_READYTORUN_COMPILER
- helper == CORINFO_HELP_READYTORUN_STATIC_BASE ||
+ helper == CORINFO_HELP_READYTORUN_STATIC_BASE || helper == CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE ||
#endif
helper == CORINFO_HELP_CLASSINIT_SHARED_DYNAMICCLASS;
#if 0
@@ -3944,7 +3970,7 @@ inline bool jitStaticFldIsGlobAddr(CORINFO_FIELD_HANDLE fldHnd)
return (fldHnd == FLD_GLOBAL_DS || fldHnd == FLD_GLOBAL_FS);
}
-#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD)
+#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD) || defined(FEATURE_TRACELOGGING)
inline bool Compiler::eeIsNativeMethod(CORINFO_METHOD_HANDLE method)
{
@@ -4087,16 +4113,12 @@ inline bool Compiler::compIsProfilerHookNeeded()
{
#ifdef PROFILING_SUPPORTED
return compProfilerHookNeeded
-
-#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
// IL stubs are excluded by VM and we need to do the same even running
// under a complus env hook to generate profiler hooks
- || (opts.compJitELTHookEnabled && !(opts.eeFlags & CORJIT_FLG_IL_STUB))
-#endif
- ;
-#else // PROFILING_SUPPORTED
+ || (opts.compJitELTHookEnabled && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB));
+#else // !PROFILING_SUPPORTED
return false;
-#endif
+#endif // !PROFILING_SUPPORTED
}
/*****************************************************************************
@@ -4185,7 +4207,7 @@ inline bool Compiler::impIsDUP_LDVIRTFTN_TOKEN(const BYTE* delegateCreateStart,
inline bool Compiler::compIsForImportOnly()
{
- return ((opts.eeFlags & CORJIT_FLG_IMPORT_ONLY) != 0);
+ return opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IMPORT_ONLY);
}
/*****************************************************************************
@@ -4352,10 +4374,12 @@ inline bool Compiler::lvaIsGCTracked(const LclVarDsc* varDsc)
{
if (varDsc->lvTracked && (varDsc->lvType == TYP_REF || varDsc->lvType == TYP_BYREF))
{
+ // Stack parameters are always untracked w.r.t. GC reportings
+ const bool isStackParam = varDsc->lvIsParam && !varDsc->lvIsRegArg;
#ifdef _TARGET_AMD64_
- return !lvaIsFieldOfDependentlyPromotedStruct(varDsc);
+ return !isStackParam && !lvaIsFieldOfDependentlyPromotedStruct(varDsc);
#else // !_TARGET_AMD64_
- return true;
+ return !isStackParam;
#endif // !_TARGET_AMD64_
}
else
@@ -4367,8 +4391,10 @@ inline bool Compiler::lvaIsGCTracked(const LclVarDsc* varDsc)
inline void Compiler::EndPhase(Phases phase)
{
#if defined(FEATURE_JIT_METHOD_PERF)
- if (pCompJitTimer != NULL)
+ if (pCompJitTimer != nullptr)
+ {
pCompJitTimer->EndPhase(phase);
+ }
#endif
#if DUMP_FLOWGRAPHS
fgDumpFlowGraph(phase);
@@ -4405,6 +4431,36 @@ inline void Compiler::EndPhase(Phases phase)
}
/*****************************************************************************/
+#if MEASURE_CLRAPI_CALLS
+
+inline void Compiler::CLRApiCallEnter(unsigned apix)
+{
+ if (pCompJitTimer != nullptr)
+ {
+ pCompJitTimer->CLRApiCallEnter(apix);
+ }
+}
+inline void Compiler::CLRApiCallLeave(unsigned apix)
+{
+ if (pCompJitTimer != nullptr)
+ {
+ pCompJitTimer->CLRApiCallLeave(apix);
+ }
+}
+
+inline void Compiler::CLR_API_Enter(API_ICorJitInfo_Names ename)
+{
+ CLRApiCallEnter(ename);
+}
+
+inline void Compiler::CLR_API_Leave(API_ICorJitInfo_Names ename)
+{
+ CLRApiCallLeave(ename);
+}
+
+#endif // MEASURE_CLRAPI_CALLS
+
+/*****************************************************************************/
bool Compiler::fgExcludeFromSsa(unsigned lclNum)
{
if (opts.MinOpts())
diff --git a/src/jit/compphases.h b/src/jit/compphases.h
index f193d04647..ac1bb636ff 100644
--- a/src/jit/compphases.h
+++ b/src/jit/compphases.h
@@ -22,7 +22,12 @@
CompPhaseNameMacro(PHASE_PRE_IMPORT, "Pre-import", "PRE-IMP", false, -1)
CompPhaseNameMacro(PHASE_IMPORTATION, "Importation", "IMPORT", false, -1)
CompPhaseNameMacro(PHASE_POST_IMPORT, "Post-import", "POST-IMP", false, -1)
-CompPhaseNameMacro(PHASE_MORPH, "Morph", "MORPH", false, -1)
+CompPhaseNameMacro(PHASE_MORPH_INIT, "Morph - Init", "MOR-INIT" ,false, -1)
+CompPhaseNameMacro(PHASE_MORPH_INLINE, "Morph - Inlining", "MOR-INL", false, -1)
+CompPhaseNameMacro(PHASE_MORPH_IMPBYREF, "Morph - ByRefs", "MOR-BYREF",false, -1)
+CompPhaseNameMacro(PHASE_STR_ADRLCL, "Morph - Structs/AddrExp", "MOR-STRAL",false, -1)
+CompPhaseNameMacro(PHASE_MORPH_GLOBAL, "Morph - Global", "MOR-GLOB", false, -1)
+CompPhaseNameMacro(PHASE_MORPH_END, "Morph - Finish", "MOR-END", false, -1)
CompPhaseNameMacro(PHASE_GS_COOKIE, "GS Cookie", "GS-COOK", false, -1)
CompPhaseNameMacro(PHASE_COMPUTE_PREDS, "Compute preds", "PREDS", false, -1)
CompPhaseNameMacro(PHASE_MARK_GC_POLL_BLOCKS, "Mark GC poll blocks", "GC-POLL", false, -1)
@@ -55,7 +60,7 @@ CompPhaseNameMacro(PHASE_OPTIMIZE_INDEX_CHECKS, "Optimize index checks",
#if FEATURE_VALNUM_CSE
CompPhaseNameMacro(PHASE_OPTIMIZE_VALNUM_CSES, "Optimize Valnum CSEs", "OPT-CSE", false, -1)
-#endif
+#endif
CompPhaseNameMacro(PHASE_VN_COPY_PROP, "VN based copy prop", "CP-PROP", false, -1)
#if ASSERTION_PROP
@@ -86,6 +91,12 @@ CompPhaseNameMacro(PHASE_LINEAR_SCAN_RESOLVE, "LSRA resolve",
CompPhaseNameMacro(PHASE_GENERATE_CODE, "Generate code", "CODEGEN", false, -1)
CompPhaseNameMacro(PHASE_EMIT_CODE, "Emit code", "EMIT", false, -1)
CompPhaseNameMacro(PHASE_EMIT_GCEH, "Emit GC+EH tables", "EMT-GCEH", false, -1)
+
+#if MEASURE_CLRAPI_CALLS
+// The following is a "pseudo-phase" - it aggregates timing info
+// for calls through ICorJitInfo across all "real" phases.
+CompPhaseNameMacro(PHASE_CLR_API, "CLR API calls", "CLR-API", false, -1)
+#endif
// clang-format on
#undef CompPhaseNameMacro
diff --git a/src/jit/crossgen/CMakeLists.txt b/src/jit/crossgen/CMakeLists.txt
index f79d9e72ce..6440e91a04 100644
--- a/src/jit/crossgen/CMakeLists.txt
+++ b/src/jit/crossgen/CMakeLists.txt
@@ -1,7 +1,7 @@
include(${CLR_DIR}/crossgen.cmake)
-if(CLR_CMAKE_TARGET_ARCH_I386 OR CLR_CMAKE_TARGET_ARCH_ARM)
+if(CLR_CMAKE_TARGET_ARCH_ARM)
add_definitions(-DLEGACY_BACKEND)
endif()
-add_library_clr(${JIT_BASE_NAME}_crossgen ${SOURCES})
+add_library_clr(clrjit_crossgen ${SOURCES})
diff --git a/src/jit/decomposelongs.cpp b/src/jit/decomposelongs.cpp
index cf66487367..98b8b081fc 100644
--- a/src/jit/decomposelongs.cpp
+++ b/src/jit/decomposelongs.cpp
@@ -65,7 +65,7 @@ void DecomposeLongs::DecomposeBlock(BasicBlock* block)
assert(block->isEmpty() || block->IsLIR());
m_blockWeight = block->getBBWeight(m_compiler);
- m_range = &LIR::AsRange(block);
+ m_range = &LIR::AsRange(block);
DecomposeRangeHelper();
}
@@ -90,7 +90,7 @@ void DecomposeLongs::DecomposeRange(Compiler* compiler, unsigned blockWeight, LI
DecomposeLongs decomposer(compiler);
decomposer.m_blockWeight = blockWeight;
- decomposer.m_range = &range;
+ decomposer.m_range = &range;
decomposer.DecomposeRangeHelper();
}
@@ -111,13 +111,7 @@ void DecomposeLongs::DecomposeRangeHelper()
GenTree* node = Range().FirstNonPhiNode();
while (node != nullptr)
{
- LIR::Use use;
- if (!Range().TryGetUse(node, &use))
- {
- use = LIR::Use::GetDummyUse(Range(), node);
- }
-
- node = DecomposeNode(use);
+ node = DecomposeNode(node);
}
assert(Range().CheckLIR(m_compiler));
@@ -132,10 +126,8 @@ void DecomposeLongs::DecomposeRangeHelper()
// Return Value:
// The next node to process.
//
-GenTree* DecomposeLongs::DecomposeNode(LIR::Use& use)
+GenTree* DecomposeLongs::DecomposeNode(GenTree* tree)
{
- GenTree* tree = use.Def();
-
// Handle the case where we are implicitly using the lower half of a long lclVar.
if ((tree->TypeGet() == TYP_INT) && tree->OperIsLocal())
{
@@ -171,14 +163,15 @@ GenTree* DecomposeLongs::DecomposeNode(LIR::Use& use)
}
#endif // DEBUG
+ LIR::Use use;
+ if (!Range().TryGetUse(tree, &use))
+ {
+ use = LIR::Use::GetDummyUse(Range(), tree);
+ }
+
GenTree* nextNode = nullptr;
switch (tree->OperGet())
{
- case GT_PHI:
- case GT_PHI_ARG:
- nextNode = tree->gtNext;
- break;
-
case GT_LCL_VAR:
nextNode = DecomposeLclVar(use);
break;
@@ -212,8 +205,7 @@ GenTree* DecomposeLongs::DecomposeNode(LIR::Use& use)
break;
case GT_STORE_LCL_FLD:
- assert(tree->gtOp.gtOp1->OperGet() == GT_LONG);
- NYI("st.lclFld of of TYP_LONG");
+ nextNode = DecomposeStoreLclFld(use);
break;
case GT_IND:
@@ -239,23 +231,11 @@ GenTree* DecomposeLongs::DecomposeNode(LIR::Use& use)
break;
case GT_MUL:
- NYI("Arithmetic binary operators on TYP_LONG - GT_MUL");
- break;
-
- case GT_DIV:
- NYI("Arithmetic binary operators on TYP_LONG - GT_DIV");
- break;
-
- case GT_MOD:
- NYI("Arithmetic binary operators on TYP_LONG - GT_MOD");
- break;
-
- case GT_UDIV:
- NYI("Arithmetic binary operators on TYP_LONG - GT_UDIV");
+ nextNode = DecomposeMul(use);
break;
case GT_UMOD:
- NYI("Arithmetic binary operators on TYP_LONG - GT_UMOD");
+ nextNode = DecomposeUMod(use);
break;
case GT_LSH:
@@ -266,11 +246,7 @@ GenTree* DecomposeLongs::DecomposeNode(LIR::Use& use)
case GT_ROL:
case GT_ROR:
- NYI("Arithmetic binary operators on TYP_LONG - ROTATE");
- break;
-
- case GT_MULHI:
- NYI("Arithmetic binary operators on TYP_LONG - MULHI");
+ nextNode = DecomposeRotate(use);
break;
case GT_LOCKADD:
@@ -288,6 +264,37 @@ GenTree* DecomposeLongs::DecomposeNode(LIR::Use& use)
}
}
+ // If we replaced the argument to a GT_FIELD_LIST element with a GT_LONG node, split that field list
+ // element into two elements: one for each half of the GT_LONG.
+ if ((use.Def()->OperGet() == GT_LONG) && !use.IsDummyUse() && (use.User()->OperGet() == GT_FIELD_LIST))
+ {
+ GenTreeOp* value = use.Def()->AsOp();
+ Range().Remove(value);
+
+ // The node returned by `use.User()` is the head of the field list. We need to find the actual node that uses
+ // the `GT_LONG` so that we can split it.
+ GenTreeFieldList* listNode = use.User()->AsFieldList();
+ for (; listNode != nullptr; listNode = listNode->Rest())
+ {
+ if (listNode->Current() == value)
+ {
+ break;
+ }
+ }
+
+ assert(listNode != nullptr);
+ GenTree* rest = listNode->gtOp2;
+
+ GenTreeFieldList* loNode = listNode;
+ loNode->gtOp1 = value->gtOp1;
+ loNode->gtFieldType = TYP_INT;
+
+ GenTreeFieldList* hiNode =
+ new (m_compiler, GT_FIELD_LIST) GenTreeFieldList(value->gtOp2, loNode->gtFieldOffset + 4, TYP_INT, loNode);
+
+ hiNode->gtOp2 = rest;
+ }
+
#ifdef DEBUG
if (m_compiler->verbose)
{
@@ -308,23 +315,25 @@ GenTree* DecomposeLongs::DecomposeNode(LIR::Use& use)
// Arguments:
// use - the LIR::Use object for the def that needs to be decomposed.
// loResult - the decomposed low part
-// hiResult - the decomposed high part. This must follow loResult in the linear order,
-// as the new GT_LONG node will be inserted immediately after it.
+// hiResult - the decomposed high part
+// insertResultAfter - the node that the GT_LONG should be inserted after
//
// Return Value:
// The next node to process.
//
-GenTree* DecomposeLongs::FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult)
+GenTree* DecomposeLongs::FinalizeDecomposition(LIR::Use& use,
+ GenTree* loResult,
+ GenTree* hiResult,
+ GenTree* insertResultAfter)
{
assert(use.IsInitialized());
assert(loResult != nullptr);
assert(hiResult != nullptr);
assert(Range().Contains(loResult));
assert(Range().Contains(hiResult));
- assert(loResult->Precedes(hiResult));
GenTree* gtLong = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loResult, hiResult);
- Range().InsertAfter(hiResult, gtLong);
+ Range().InsertAfter(insertResultAfter, gtLong);
use.ReplaceWith(m_compiler, gtLong);
@@ -366,8 +375,6 @@ GenTree* DecomposeLongs::DecomposeLclVar(LIR::Use& use)
}
else
{
- noway_assert(varDsc->lvLRACandidate == false);
-
loResult->SetOper(GT_LCL_FLD);
loResult->AsLclFld()->gtLclOffs = 0;
loResult->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
@@ -380,7 +387,7 @@ GenTree* DecomposeLongs::DecomposeLclVar(LIR::Use& use)
m_compiler->lvaIncRefCnts(loResult);
m_compiler->lvaIncRefCnts(hiResult);
- return FinalizeDecomposition(use, loResult, hiResult);
+ return FinalizeDecomposition(use, loResult, hiResult, hiResult);
}
//------------------------------------------------------------------------
@@ -404,7 +411,7 @@ GenTree* DecomposeLongs::DecomposeLclFld(LIR::Use& use)
GenTree* hiResult = m_compiler->gtNewLclFldNode(loResult->gtLclNum, TYP_INT, loResult->gtLclOffs + 4);
Range().InsertAfter(loResult, hiResult);
- return FinalizeDecomposition(use, loResult, hiResult);
+ return FinalizeDecomposition(use, loResult, hiResult, hiResult);
}
//------------------------------------------------------------------------
@@ -423,59 +430,118 @@ GenTree* DecomposeLongs::DecomposeStoreLclVar(LIR::Use& use)
GenTree* tree = use.Def();
GenTree* rhs = tree->gtGetOp1();
- if ((rhs->OperGet() == GT_PHI) || (rhs->OperGet() == GT_CALL))
+ if ((rhs->OperGet() == GT_PHI) || (rhs->OperGet() == GT_CALL) ||
+ ((rhs->OperGet() == GT_MUL_LONG) && (rhs->gtFlags & GTF_MUL_64RSLT) != 0))
{
// GT_CALLs are not decomposed, so will not be converted to GT_LONG
// GT_STORE_LCL_VAR = GT_CALL are handled in genMultiRegCallStoreToLocal
+ // GT_MULs are not decomposed, so will not be converted to GT_LONG
return tree->gtNext;
}
noway_assert(rhs->OperGet() == GT_LONG);
+
unsigned varNum = tree->AsLclVarCommon()->gtLclNum;
LclVarDsc* varDsc = m_compiler->lvaTable + varNum;
+ if (!varDsc->lvPromoted)
+ {
+ // We cannot decompose a st.lclVar that is not promoted because doing so
+ // changes its liveness semantics. For example, consider the following
+ // decomposition of a st.lclVar into two st.lclFlds:
+ //
+ // Before:
+ //
+ // /--* t0 int
+ // +--* t1 int
+ // t2 = * gt_long long
+ //
+ // /--* t2 long
+ // * st.lclVar long V0
+ //
+ // After:
+ // /--* t0 int
+ // * st.lclFld int V0 [+0]
+ //
+ // /--* t1 int
+ // * st.lclFld int V0 [+4]
+ //
+ // Before decomposition, the `st.lclVar` is a simple def of `V0`. After
+ // decomposition, each `st.lclFld` is a partial def of `V0`. This partial
+ // def is treated as both a use and a def of the appropriate lclVar. This
+ // difference will affect any situation in which the liveness of a variable
+ // at a def matters (e.g. dead store elimination, live-in sets, etc.). As
+ // a result, we leave these stores as-is and generate the decomposed store
+ // in the code generator.
+ //
+ // NOTE: this does extend the lifetime of the low half of the `GT_LONG`
+ // node as compared to the decomposed form. If we start doing more code
+ // motion in the backend, this may cause some CQ issues and some sort of
+ // decomposition could be beneficial.
+ return tree->gtNext;
+ }
+
+ assert(varDsc->lvFieldCnt == 2);
m_compiler->lvaDecRefCnts(tree);
- GenTree* loRhs = rhs->gtGetOp1();
- GenTree* hiRhs = rhs->gtGetOp2();
- GenTree* hiStore = m_compiler->gtNewLclLNode(varNum, TYP_INT);
+ GenTreeOp* value = rhs->AsOp();
+ Range().Remove(value);
- if (varDsc->lvPromoted)
- {
- assert(varDsc->lvFieldCnt == 2);
+ const unsigned loVarNum = varDsc->lvFieldLclStart;
+ GenTree* loStore = tree;
+ loStore->AsLclVarCommon()->SetLclNum(loVarNum);
+ loStore->gtOp.gtOp1 = value->gtOp1;
+ loStore->gtType = TYP_INT;
- unsigned loVarNum = varDsc->lvFieldLclStart;
- unsigned hiVarNum = loVarNum + 1;
- tree->AsLclVarCommon()->SetLclNum(loVarNum);
- hiStore->SetOper(GT_STORE_LCL_VAR);
- hiStore->AsLclVarCommon()->SetLclNum(hiVarNum);
- }
- else
- {
- noway_assert(varDsc->lvLRACandidate == false);
+ const unsigned hiVarNum = loVarNum + 1;
+ GenTree* hiStore = m_compiler->gtNewLclLNode(hiVarNum, TYP_INT);
+ hiStore->SetOper(GT_STORE_LCL_VAR);
+ hiStore->gtOp.gtOp1 = value->gtOp2;
+ hiStore->gtFlags |= GTF_VAR_DEF;
- tree->SetOper(GT_STORE_LCL_FLD);
- tree->AsLclFld()->gtLclOffs = 0;
- tree->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
+ m_compiler->lvaIncRefCnts(loStore);
+ m_compiler->lvaIncRefCnts(hiStore);
- hiStore->SetOper(GT_STORE_LCL_FLD);
- hiStore->AsLclFld()->gtLclOffs = 4;
- hiStore->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
- }
+ Range().InsertAfter(tree, hiStore);
- // 'tree' is going to steal the loRhs node for itself, so we need to remove the
- // GT_LONG node from the threading.
- Range().Remove(rhs);
+ return hiStore->gtNext;
+}
- tree->gtOp.gtOp1 = loRhs;
- tree->gtType = TYP_INT;
+//------------------------------------------------------------------------
+// DecomposeStoreLclFld: Decompose GT_STORE_LCL_FLD.
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeStoreLclFld(LIR::Use& use)
+{
+ assert(use.IsInitialized());
+ assert(use.Def()->OperGet() == GT_STORE_LCL_FLD);
- hiStore->gtOp.gtOp1 = hiRhs;
- hiStore->gtFlags |= GTF_VAR_DEF;
+ GenTreeLclFld* store = use.Def()->AsLclFld();
+
+ GenTreeOp* value = store->gtOp1->AsOp();
+ assert(value->OperGet() == GT_LONG);
+ Range().Remove(value);
+
+ // The original store node will be repurposed to store the low half of the GT_LONG.
+ GenTreeLclFld* loStore = store;
+ loStore->gtOp1 = value->gtOp1;
+ loStore->gtType = TYP_INT;
+ loStore->gtFlags |= GTF_VAR_USEASG;
- m_compiler->lvaIncRefCnts(tree);
+ // Create the store for the upper half of the GT_LONG and insert it after the low store.
+ GenTreeLclFld* hiStore = m_compiler->gtNewLclFldNode(loStore->gtLclNum, TYP_INT, loStore->gtLclOffs + 4);
+ hiStore->SetOper(GT_STORE_LCL_FLD);
+ hiStore->gtOp1 = value->gtOp2;
+ hiStore->gtFlags |= (GTF_VAR_DEF | GTF_VAR_USEASG);
+
+ // Bump the ref count for the destination.
m_compiler->lvaIncRefCnts(hiStore);
- Range().InsertAfter(tree, hiStore);
+ Range().InsertAfter(loStore, hiStore);
return hiStore->gtNext;
}
@@ -494,35 +560,103 @@ GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use)
assert(use.IsInitialized());
assert(use.Def()->OperGet() == GT_CAST);
- GenTree* tree = use.Def();
+ GenTree* cast = use.Def()->AsCast();
GenTree* loResult = nullptr;
GenTree* hiResult = nullptr;
- assert(tree->gtPrev == tree->gtGetOp1());
- NYI_IF(tree->gtOverflow(), "TYP_LONG cast with overflow");
- switch (tree->AsCast()->CastFromType())
+ var_types srcType = cast->CastFromType();
+ var_types dstType = cast->CastToType();
+
+ if ((cast->gtFlags & GTF_UNSIGNED) != 0)
+ {
+ srcType = genUnsignedType(srcType);
+ }
+
+ if (varTypeIsLong(srcType))
+ {
+ if (cast->gtOverflow() && (varTypeIsUnsigned(srcType) != varTypeIsUnsigned(dstType)))
+ {
+ GenTree* srcOp = cast->gtGetOp1();
+ noway_assert(srcOp->OperGet() == GT_LONG);
+ GenTree* loSrcOp = srcOp->gtGetOp1();
+ GenTree* hiSrcOp = srcOp->gtGetOp2();
+
+ //
+ // When casting between long types an overflow check is needed only if the types
+ // have different signedness. In both cases (long->ulong and ulong->long) we only
+ // need to check if the high part is negative or not. Use the existing cast node
+ // to perform a int->uint cast of the high part to take advantage of the overflow
+ // check provided by codegen.
+ //
+
+ loResult = loSrcOp;
+
+ hiResult = cast;
+ hiResult->gtType = TYP_INT;
+ hiResult->AsCast()->gtCastType = TYP_UINT;
+ hiResult->gtFlags &= ~GTF_UNSIGNED;
+ hiResult->gtOp.gtOp1 = hiSrcOp;
+
+ Range().Remove(cast);
+ Range().Remove(srcOp);
+ Range().InsertAfter(hiSrcOp, hiResult);
+ }
+ else
+ {
+ NYI("Unimplemented long->long no-op cast decomposition");
+ }
+ }
+ else if (varTypeIsIntegralOrI(srcType))
{
- case TYP_INT:
- if (tree->gtFlags & GTF_UNSIGNED)
+ if (cast->gtOverflow() && !varTypeIsUnsigned(srcType) && varTypeIsUnsigned(dstType))
+ {
+ //
+ // An overflow check is needed only when casting from a signed type to ulong.
+ // Change the cast type to uint to take advantage of the overflow check provided
+ // by codegen and then zero extend the resulting uint to ulong.
+ //
+
+ loResult = cast;
+ loResult->AsCast()->gtCastType = TYP_UINT;
+ loResult->gtType = TYP_INT;
+
+ hiResult = m_compiler->gtNewZeroConNode(TYP_INT);
+
+ Range().InsertAfter(loResult, hiResult);
+ }
+ else
+ {
+ if (varTypeIsUnsigned(srcType))
{
- loResult = tree->gtGetOp1();
- Range().Remove(tree);
+ loResult = cast->gtGetOp1();
+ hiResult = m_compiler->gtNewZeroConNode(TYP_INT);
- hiResult = new (m_compiler, GT_CNS_INT) GenTreeIntCon(TYP_INT, 0);
+ Range().Remove(cast);
Range().InsertAfter(loResult, hiResult);
}
else
{
- NYI("Lowering of signed cast TYP_INT->TYP_LONG");
- }
- break;
+ LIR::Use src(Range(), &(cast->gtOp.gtOp1), cast);
+ unsigned lclNum = src.ReplaceWithLclVar(m_compiler, m_blockWeight);
- default:
- NYI("Unimplemented type for Lowering of cast to TYP_LONG");
- break;
+ loResult = src.Def();
+
+ GenTree* loCopy = m_compiler->gtNewLclvNode(lclNum, TYP_INT);
+ GenTree* shiftBy = m_compiler->gtNewIconNode(31, TYP_INT);
+ hiResult = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, loCopy, shiftBy);
+
+ Range().Remove(cast);
+ Range().InsertAfter(loResult, loCopy, shiftBy, hiResult);
+ m_compiler->lvaIncRefCnts(loCopy);
+ }
+ }
+ }
+ else
+ {
+ NYI("Unimplemented cast decomposition");
}
- return FinalizeDecomposition(use, loResult, hiResult);
+ return FinalizeDecomposition(use, loResult, hiResult, hiResult);
}
//------------------------------------------------------------------------
@@ -549,7 +683,7 @@ GenTree* DecomposeLongs::DecomposeCnsLng(LIR::Use& use)
GenTree* hiResult = new (m_compiler, GT_CNS_INT) GenTreeIntCon(TYP_INT, hiVal);
Range().InsertAfter(loResult, hiResult);
- return FinalizeDecomposition(use, loResult, hiResult);
+ return FinalizeDecomposition(use, loResult, hiResult, hiResult);
}
//------------------------------------------------------------------------
@@ -567,35 +701,7 @@ GenTree* DecomposeLongs::DecomposeCall(LIR::Use& use)
assert(use.Def()->OperGet() == GT_CALL);
// We only need to force var = call() if the call's result is used.
- if (use.IsDummyUse())
- return use.Def()->gtNext;
-
- GenTree* user = use.User();
- if (user->OperGet() == GT_STORE_LCL_VAR)
- {
- // If parent is already a STORE_LCL_VAR, we can skip it if
- // it is already marked as lvIsMultiRegRet.
- unsigned varNum = user->AsLclVarCommon()->gtLclNum;
- if (m_compiler->lvaTable[varNum].lvIsMultiRegRet)
- {
- return use.Def()->gtNext;
- }
- else if (!m_compiler->lvaTable[varNum].lvPromoted)
- {
- // If var wasn't promoted, we can just set lvIsMultiRegRet.
- m_compiler->lvaTable[varNum].lvIsMultiRegRet = true;
- return use.Def()->gtNext;
- }
- }
-
- GenTree* originalNode = use.Def();
-
- // Otherwise, we need to force var = call()
- unsigned varNum = use.ReplaceWithLclVar(m_compiler, m_blockWeight);
- m_compiler->lvaTable[varNum].lvIsMultiRegRet = true;
-
- // Decompose the new LclVar use
- return DecomposeLclVar(use);
+ return StoreNodeToVar(use);
}
//------------------------------------------------------------------------
@@ -627,7 +733,7 @@ GenTree* DecomposeLongs::DecomposeStoreInd(LIR::Use& use)
// + --* t155 long
// * storeIndir long
- GenTree* gtLong = tree->gtOp.gtOp2;
+ GenTree* gtLong = tree->gtOp.gtOp2;
// Save address to a temp. It is used in storeIndLow and storeIndHigh trees.
LIR::Use address(Range(), &tree->gtOp.gtOp1, tree);
@@ -721,12 +827,13 @@ GenTree* DecomposeLongs::DecomposeInd(LIR::Use& use)
GenTreePtr addrHigh =
new (m_compiler, GT_LEA) GenTreeAddrMode(TYP_REF, addrBaseHigh, nullptr, 0, genTypeSize(TYP_INT));
GenTreePtr indHigh = new (m_compiler, GT_IND) GenTreeIndir(GT_IND, TYP_INT, addrHigh, nullptr);
+ indHigh->gtFlags |= (indLow->gtFlags & (GTF_GLOB_REF | GTF_EXCEPT | GTF_IND_FLAGS));
m_compiler->lvaIncRefCnts(addrBaseHigh);
Range().InsertAfter(indLow, addrBaseHigh, addrHigh, indHigh);
- return FinalizeDecomposition(use, indLow, indHigh);
+ return FinalizeDecomposition(use, indLow, indHigh, indHigh);
}
//------------------------------------------------------------------------
@@ -758,7 +865,7 @@ GenTree* DecomposeLongs::DecomposeNot(LIR::Use& use)
GenTree* hiResult = new (m_compiler, GT_NOT) GenTreeOp(GT_NOT, TYP_INT, hiOp1, nullptr);
Range().InsertAfter(loResult, hiResult);
- return FinalizeDecomposition(use, loResult, hiResult);
+ return FinalizeDecomposition(use, loResult, hiResult, hiResult);
}
//------------------------------------------------------------------------
@@ -779,14 +886,6 @@ GenTree* DecomposeLongs::DecomposeNeg(LIR::Use& use)
GenTree* gtLong = tree->gtGetOp1();
noway_assert(gtLong->OperGet() == GT_LONG);
- LIR::Use op1(Range(), &gtLong->gtOp.gtOp1, gtLong);
- op1.ReplaceWithLclVar(m_compiler, m_blockWeight);
-
- LIR::Use op2(Range(), &gtLong->gtOp.gtOp2, gtLong);
- op2.ReplaceWithLclVar(m_compiler, m_blockWeight);
-
- // Neither GT_NEG nor the introduced temporaries have side effects.
- tree->gtFlags &= ~GTF_ALL_EFFECT;
GenTree* loOp1 = gtLong->gtGetOp1();
GenTree* hiOp1 = gtLong->gtGetOp2();
@@ -799,11 +898,10 @@ GenTree* DecomposeLongs::DecomposeNeg(LIR::Use& use)
GenTree* zero = m_compiler->gtNewZeroConNode(TYP_INT);
GenTree* hiAdjust = m_compiler->gtNewOperNode(GT_ADD_HI, TYP_INT, hiOp1, zero);
GenTree* hiResult = m_compiler->gtNewOperNode(GT_NEG, TYP_INT, hiAdjust);
- hiResult->gtFlags = tree->gtFlags;
Range().InsertAfter(loResult, zero, hiAdjust, hiResult);
- return FinalizeDecomposition(use, loResult, hiResult);
+ return FinalizeDecomposition(use, loResult, hiResult, hiResult);
}
//------------------------------------------------------------------------
@@ -864,14 +962,19 @@ GenTree* DecomposeLongs::DecomposeArith(LIR::Use& use)
}
}
- return FinalizeDecomposition(use, loResult, hiResult);
+ return FinalizeDecomposition(use, loResult, hiResult, hiResult);
}
//------------------------------------------------------------------------
-// DecomposeShift: Decompose GT_LSH, GT_RSH, GT_RSZ. For shift nodes, we need to use
-// the shift helper functions, so we here convert the shift into a helper call by
-// pulling its arguments out of linear order and making them the args to a call, then
-// replacing the original node with the new call.
+// DecomposeShift: Decompose GT_LSH, GT_RSH, GT_RSZ. For shift nodes being shifted
+// by a constant int, we can inspect the shift amount and decompose to the appropriate
+// node types, generating a shl/shld pattern for GT_LSH, a shrd/shr pattern for GT_RSZ,
+// and a shrd/sar pattern for GT_SHR for most shift amounts. Shifting by 0, >= 32 and
+// >= 64 are special cased to produce better code patterns.
+//
+// For all other shift nodes, we need to use the shift helper functions, so we here convert
+// the shift into a helper call by pulling its arguments out of linear order and making
+// them the args to a call, then replacing the original node with the new call.
//
// Arguments:
// use - the LIR::Use object for the def that needs to be decomposed.
@@ -883,66 +986,646 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use)
{
assert(use.IsInitialized());
- GenTree* tree = use.Def();
- GenTree* gtLong = tree->gtGetOp1();
- genTreeOps oper = tree->OperGet();
+ GenTree* tree = use.Def();
+ GenTree* gtLong = tree->gtGetOp1();
+ GenTree* loOp1 = gtLong->gtGetOp1();
+ GenTree* hiOp1 = gtLong->gtGetOp2();
+ GenTree* shiftByOp = tree->gtGetOp2();
+
+ genTreeOps oper = tree->OperGet();
+ genTreeOps shiftByOper = shiftByOp->OperGet();
assert((oper == GT_LSH) || (oper == GT_RSH) || (oper == GT_RSZ));
- LIR::Use loOp1Use(Range(), &gtLong->gtOp.gtOp1, gtLong);
- loOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ // If we are shifting by a constant int, we do not want to use a helper, instead, we decompose.
+ if (shiftByOper == GT_CNS_INT)
+ {
+ unsigned int count = shiftByOp->gtIntCon.gtIconVal;
+ Range().Remove(shiftByOp);
- LIR::Use hiOp1Use(Range(), &gtLong->gtOp.gtOp2, gtLong);
- hiOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ if (count == 0)
+ {
+ GenTree* next = tree->gtNext;
+ // Remove tree and don't do anything else.
+ Range().Remove(tree);
+ use.ReplaceWith(m_compiler, gtLong);
+ return next;
+ }
- LIR::Use shiftWidthUse(Range(), &tree->gtOp.gtOp2, tree);
- shiftWidthUse.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ GenTree* loResult;
+ GenTree* hiResult;
- GenTree* loOp1 = gtLong->gtGetOp1();
- GenTree* hiOp1 = gtLong->gtGetOp2();
+ GenTree* insertAfter;
- GenTree* shiftWidthOp = tree->gtGetOp2();
+ switch (oper)
+ {
+ case GT_LSH:
+ {
+ Range().Remove(hiOp1);
+ if (count < 32)
+ {
+ // Hi is a GT_LSH_HI, lo is a GT_LSH. Will produce:
+ // reg1 = lo
+ // shl lo, shift
+ // shld hi, reg1, shift
+
+ Range().Remove(gtLong);
+ loOp1 = RepresentOpAsLocalVar(loOp1, gtLong, &gtLong->gtOp.gtOp1);
+ unsigned loOp1LclNum = loOp1->AsLclVarCommon()->gtLclNum;
+ Range().Remove(loOp1);
+
+ GenTree* shiftByHi = m_compiler->gtNewIconNode(count, TYP_INT);
+ GenTree* shiftByLo = m_compiler->gtNewIconNode(count, TYP_INT);
+
+ loResult = m_compiler->gtNewOperNode(GT_LSH, TYP_INT, loOp1, shiftByLo);
+
+ // Create a GT_LONG that contains loCopy and hiOp1. This will be used in codegen to
+ // generate the shld instruction
+ GenTree* loCopy = m_compiler->gtNewLclvNode(loOp1LclNum, TYP_INT);
+ GenTree* hiOp = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loCopy, hiOp1);
+ hiResult = m_compiler->gtNewOperNode(GT_LSH_HI, TYP_INT, hiOp, shiftByHi);
+
+ m_compiler->lvaIncRefCnts(loCopy);
+
+ Range().InsertBefore(tree, loCopy, hiOp1, hiOp);
+ Range().InsertBefore(tree, shiftByHi, hiResult);
+ Range().InsertBefore(tree, loOp1, shiftByLo, loResult);
+
+ insertAfter = loResult;
+ }
+ else
+ {
+ assert(count >= 32);
+
+ if (count < 64)
+ {
+ if (count == 32)
+ {
+ // Move loOp1 into hiResult (shift of 32 bits is just a mov of lo to hi)
+ // We need to make sure that we save lo to a temp variable so that we don't overwrite lo
+ // before saving it to hi in the case that we are doing an inplace shift. I.e.:
+ // x = x << 32
+
+ LIR::Use loOp1Use(Range(), &gtLong->gtOp.gtOp1, gtLong);
+ loOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+
+ hiResult = loOp1Use.Def();
+ Range().Remove(gtLong);
+ }
+ else
+ {
+ Range().Remove(gtLong);
+ Range().Remove(loOp1);
+ assert(count > 32 && count < 64);
+
+ // Move loOp1 into hiResult, do a GT_LSH with count - 32.
+ // We will compute hiResult before loResult in this case, so we don't need to store lo to a
+ // temp
+ GenTree* shiftBy = m_compiler->gtNewIconNode(count - 32, TYP_INT);
+ hiResult = m_compiler->gtNewOperNode(oper, TYP_INT, loOp1, shiftBy);
+ Range().InsertBefore(tree, loOp1, shiftBy, hiResult);
+ }
+ }
+ else
+ {
+ Range().Remove(gtLong);
+ Range().Remove(loOp1);
+ assert(count >= 64);
+
+ // Zero out hi (shift of >= 64 bits moves all the bits out of the two registers)
+ hiResult = m_compiler->gtNewZeroConNode(TYP_INT);
+ Range().InsertBefore(tree, hiResult);
+ }
+
+ // Zero out loResult (shift of >= 32 bits shifts all lo bits to hiResult)
+ loResult = m_compiler->gtNewZeroConNode(TYP_INT);
+ Range().InsertBefore(tree, loResult);
+
+ insertAfter = loResult;
+ }
+ }
+ break;
+ case GT_RSZ:
+ {
+ Range().Remove(gtLong);
+
+ if (count < 32)
+ {
+ // Hi is a GT_RSZ, lo is a GT_RSH_LO. Will produce:
+ // reg1 = hi
+ // shrd lo, reg1, shift
+ // shr hi, shift
+
+ hiOp1 = RepresentOpAsLocalVar(hiOp1, gtLong, &gtLong->gtOp.gtOp2);
+ unsigned hiOp1LclNum = hiOp1->AsLclVarCommon()->gtLclNum;
+ GenTree* hiCopy = m_compiler->gtNewLclvNode(hiOp1LclNum, TYP_INT);
+
+ GenTree* shiftByHi = m_compiler->gtNewIconNode(count, TYP_INT);
+ GenTree* shiftByLo = m_compiler->gtNewIconNode(count, TYP_INT);
+
+ m_compiler->lvaIncRefCnts(hiCopy);
+
+ hiResult = m_compiler->gtNewOperNode(GT_RSZ, TYP_INT, hiOp1, shiftByHi);
+
+ // Create a GT_LONG that contains loOp1 and hiCopy. This will be used in codegen to
+ // generate the shrd instruction
+ GenTree* loOp = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loOp1, hiCopy);
+ loResult = m_compiler->gtNewOperNode(GT_RSH_LO, TYP_INT, loOp, shiftByLo);
+
+ Range().InsertBefore(tree, hiCopy, loOp);
+ Range().InsertBefore(tree, shiftByLo, loResult);
+ Range().InsertBefore(tree, shiftByHi, hiResult);
+ }
+ else
+ {
+ Range().Remove(loOp1);
+ Range().Remove(hiOp1);
+ assert(count >= 32);
+ if (count < 64)
+ {
+ if (count == 32)
+ {
+ // Move hiOp1 into loResult.
+ loResult = hiOp1;
+ Range().InsertBefore(tree, loResult);
+ }
+ else
+ {
+ assert(count > 32 && count < 64);
+
+ // Move hiOp1 into loResult, do a GT_RSZ with count - 32.
+ GenTree* shiftBy = m_compiler->gtNewIconNode(count - 32, TYP_INT);
+ loResult = m_compiler->gtNewOperNode(oper, TYP_INT, hiOp1, shiftBy);
+ Range().InsertBefore(tree, hiOp1, shiftBy, loResult);
+ }
+ }
+ else
+ {
+ assert(count >= 64);
+
+ // Zero out lo
+ loResult = m_compiler->gtNewZeroConNode(TYP_INT);
+ Range().InsertBefore(tree, loResult);
+ }
+
+ // Zero out hi
+ hiResult = m_compiler->gtNewZeroConNode(TYP_INT);
+ Range().InsertBefore(tree, hiResult);
+ }
+
+ insertAfter = hiResult;
+ }
+ break;
+ case GT_RSH:
+ {
+ Range().Remove(gtLong);
+ Range().Remove(loOp1);
+
+ hiOp1 = RepresentOpAsLocalVar(hiOp1, gtLong, &gtLong->gtOp.gtOp2);
+ unsigned hiOp1LclNum = hiOp1->AsLclVarCommon()->gtLclNum;
+ GenTree* hiCopy = m_compiler->gtNewLclvNode(hiOp1LclNum, TYP_INT);
+ Range().Remove(hiOp1);
+
+ if (count < 32)
+ {
+ // Hi is a GT_RSH, lo is a GT_RSH_LO. Will produce:
+ // reg1 = hi
+ // shrd lo, reg1, shift
+ // sar hi, shift
+
+ GenTree* shiftByHi = m_compiler->gtNewIconNode(count, TYP_INT);
+ GenTree* shiftByLo = m_compiler->gtNewIconNode(count, TYP_INT);
+ m_compiler->lvaIncRefCnts(hiCopy);
+
+ hiResult = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, hiOp1, shiftByHi);
+
+ // Create a GT_LONG that contains loOp1 and hiCopy. This will be used in codegen to
+ // generate the shrd instruction
+ GenTree* loOp = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loOp1, hiCopy);
+ loResult = m_compiler->gtNewOperNode(GT_RSH_LO, TYP_INT, loOp, shiftByLo);
+
+ Range().InsertBefore(tree, loOp1, hiCopy, loOp);
+ Range().InsertBefore(tree, shiftByLo, loResult);
+ Range().InsertBefore(tree, shiftByHi, hiOp1, hiResult);
+ }
+ else
+ {
+ assert(count >= 32);
+ if (count < 64)
+ {
+ if (count == 32)
+ {
+ // Move hiOp1 into loResult.
+ loResult = hiOp1;
+ Range().InsertBefore(tree, loResult);
+ }
+ else
+ {
+ assert(count > 32 && count < 64);
+
+ // Move hiOp1 into loResult, do a GT_RSH with count - 32.
+ GenTree* shiftBy = m_compiler->gtNewIconNode(count - 32, TYP_INT);
+ loResult = m_compiler->gtNewOperNode(oper, TYP_INT, hiOp1, shiftBy);
+ Range().InsertBefore(tree, hiOp1, shiftBy, loResult);
+ }
+
+ // Propagate sign bit in hiResult
+ GenTree* shiftBy = m_compiler->gtNewIconNode(31, TYP_INT);
+ hiResult = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, hiCopy, shiftBy);
+ Range().InsertBefore(tree, shiftBy, hiCopy, hiResult);
+
+ m_compiler->lvaIncRefCnts(hiCopy);
+ }
+ else
+ {
+ assert(count >= 64);
+
+ // Propagate sign bit in loResult
+ GenTree* loShiftBy = m_compiler->gtNewIconNode(31, TYP_INT);
+ loResult = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, hiCopy, loShiftBy);
+ Range().InsertBefore(tree, hiCopy, loShiftBy, loResult);
+
+ // Propagate sign bit in hiResult
+ GenTree* shiftBy = m_compiler->gtNewIconNode(31, TYP_INT);
+ hiResult = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, hiOp1, shiftBy);
+ Range().InsertBefore(tree, shiftBy, hiOp1, hiResult);
+
+ m_compiler->lvaIncRefCnts(hiCopy);
+ }
+ }
+
+ insertAfter = hiResult;
+ }
+ break;
+ default:
+ unreached();
+ }
- Range().Remove(gtLong);
- Range().Remove(loOp1);
- Range().Remove(hiOp1);
+ // Remove tree from Range
+ Range().Remove(tree);
- Range().Remove(shiftWidthOp);
+ return FinalizeDecomposition(use, loResult, hiResult, insertAfter);
+ }
+ else
+ {
+ // arguments are single used, but LIR call can work only with local vars.
+ shiftByOp = RepresentOpAsLocalVar(shiftByOp, tree, &tree->gtOp.gtOp2);
+ loOp1 = RepresentOpAsLocalVar(loOp1, gtLong, &gtLong->gtOp.gtOp1);
+ hiOp1 = RepresentOpAsLocalVar(hiOp1, gtLong, &gtLong->gtOp.gtOp2);
- // TODO-X86-CQ: If the shift operand is a GT_CNS_INT, we should pipe the instructions through to codegen
- // and generate the shift instructions ourselves there, rather than replacing it with a helper call.
+ Range().Remove(shiftByOp);
+ Range().Remove(gtLong);
+ Range().Remove(loOp1);
+ Range().Remove(hiOp1);
- unsigned helper;
+ unsigned helper;
- switch (oper)
+ switch (oper)
+ {
+ case GT_LSH:
+ helper = CORINFO_HELP_LLSH;
+ break;
+ case GT_RSH:
+ helper = CORINFO_HELP_LRSH;
+ break;
+ case GT_RSZ:
+ helper = CORINFO_HELP_LRSZ;
+ break;
+ default:
+ unreached();
+ }
+
+ GenTreeArgList* argList = m_compiler->gtNewArgList(loOp1, hiOp1, shiftByOp);
+
+ GenTree* call = m_compiler->gtNewHelperCallNode(helper, TYP_LONG, 0, argList);
+ call->gtFlags |= tree->gtFlags & GTF_ALL_EFFECT;
+
+ GenTreeCall* callNode = call->AsCall();
+ ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
+ retTypeDesc->InitializeLongReturnType(m_compiler);
+
+ call = m_compiler->fgMorphArgs(callNode);
+ Range().InsertAfter(tree, LIR::SeqTree(m_compiler, call));
+
+ Range().Remove(tree);
+ use.ReplaceWith(m_compiler, call);
+ return call;
+ }
+}
+
+//------------------------------------------------------------------------
+// DecomposeRotate: Decompose GT_ROL and GT_ROR with constant shift amounts. We can
+// inspect the rotate amount and decompose to the appropriate node types, generating
+// a shld/shld pattern for GT_ROL, a shrd/shrd pattern for GT_ROR, for most rotate
+// amounts.
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeRotate(LIR::Use& use)
+{
+ GenTree* tree = use.Def();
+ GenTree* gtLong = tree->gtGetOp1();
+ GenTree* rotateByOp = tree->gtGetOp2();
+
+ genTreeOps oper = tree->OperGet();
+
+ assert((oper == GT_ROL) || (oper == GT_ROR));
+ assert(rotateByOp->IsCnsIntOrI());
+
+ // For longs, we need to change rols into two GT_LSH_HIs and rors into two GT_RSH_LOs
+ // so we will get:
+ //
+ // shld lo, hi, rotateAmount
+ // shld hi, loCopy, rotateAmount
+ //
+ // or:
+ //
+ // shrd lo, hi, rotateAmount
+ // shrd hi, loCopy, rotateAmount
+
+ if (oper == GT_ROL)
{
- case GT_LSH:
- helper = CORINFO_HELP_LLSH;
- break;
- case GT_RSH:
- helper = CORINFO_HELP_LRSH;
- break;
- case GT_RSZ:
- helper = CORINFO_HELP_LRSZ;
- break;
- default:
- unreached();
+ oper = GT_LSH_HI;
+ }
+ else
+ {
+ oper = GT_RSH_LO;
}
- GenTreeArgList* argList = m_compiler->gtNewArgList(loOp1, hiOp1, shiftWidthOp);
+ unsigned int count = rotateByOp->gtIntCon.gtIconVal;
+ Range().Remove(rotateByOp);
+
+ // Make sure the rotate amount is between 0 and 63.
+ assert((count < 64) && (count != 0));
+
+ GenTree* loResult;
+ GenTree* hiResult;
+
+ if (count == 32)
+ {
+ // If the rotate amount is 32, then swap hi and lo
+ LIR::Use loOp1Use(Range(), &gtLong->gtOp.gtOp1, gtLong);
+ loOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+
+ LIR::Use hiOp1Use(Range(), &gtLong->gtOp.gtOp2, gtLong);
+ hiOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+
+ hiResult = loOp1Use.Def();
+ loResult = hiOp1Use.Def();
+ gtLong->gtOp.gtOp1 = loResult;
+ gtLong->gtOp.gtOp2 = hiResult;
+
+ GenTree* next = tree->gtNext;
+ // Remove tree and don't do anything else.
+ Range().Remove(tree);
+ use.ReplaceWith(m_compiler, gtLong);
+ return next;
+ }
+ else
+ {
+ GenTree* loOp1;
+ GenTree* hiOp1;
+
+ if (count > 32)
+ {
+ // If count > 32, we swap hi and lo, and subtract 32 from count
+ hiOp1 = gtLong->gtGetOp1();
+ loOp1 = gtLong->gtGetOp2();
+
+ Range().Remove(gtLong);
+ loOp1 = RepresentOpAsLocalVar(loOp1, gtLong, &gtLong->gtOp.gtOp2);
+ hiOp1 = RepresentOpAsLocalVar(hiOp1, gtLong, &gtLong->gtOp.gtOp1);
+
+ count -= 32;
+ }
+ else
+ {
+ loOp1 = gtLong->gtGetOp1();
+ hiOp1 = gtLong->gtGetOp2();
+
+ Range().Remove(gtLong);
+ loOp1 = RepresentOpAsLocalVar(loOp1, gtLong, &gtLong->gtOp.gtOp1);
+ hiOp1 = RepresentOpAsLocalVar(hiOp1, gtLong, &gtLong->gtOp.gtOp2);
+ }
+
+ unsigned loOp1LclNum = loOp1->AsLclVarCommon()->gtLclNum;
+ unsigned hiOp1LclNum = hiOp1->AsLclVarCommon()->gtLclNum;
+
+ Range().Remove(loOp1);
+ Range().Remove(hiOp1);
+
+ GenTree* rotateByHi = m_compiler->gtNewIconNode(count, TYP_INT);
+ GenTree* rotateByLo = m_compiler->gtNewIconNode(count, TYP_INT);
+
+ // Create a GT_LONG that contains loOp1 and hiCopy. This will be used in codegen to
+ // generate the shld instruction
+ GenTree* hiCopy = m_compiler->gtNewLclvNode(hiOp1LclNum, TYP_INT);
+ GenTree* loOp = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, hiCopy, loOp1);
+ loResult = m_compiler->gtNewOperNode(oper, TYP_INT, loOp, rotateByLo);
+
+ // Create a GT_LONG that contains loCopy and hiOp1. This will be used in codegen to
+ // generate the shld instruction
+ GenTree* loCopy = m_compiler->gtNewLclvNode(loOp1LclNum, TYP_INT);
+ GenTree* hiOp = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loCopy, hiOp1);
+ hiResult = m_compiler->gtNewOperNode(oper, TYP_INT, hiOp, rotateByHi);
+
+ m_compiler->lvaIncRefCnts(loCopy);
+ m_compiler->lvaIncRefCnts(hiCopy);
+
+ Range().InsertBefore(tree, hiCopy, loOp1, loOp);
+ Range().InsertBefore(tree, rotateByLo, loResult);
+ Range().InsertBefore(tree, loCopy, hiOp1, hiOp);
+ Range().InsertBefore(tree, rotateByHi, hiResult);
+
+ Range().Remove(tree);
+
+ return FinalizeDecomposition(use, loResult, hiResult, hiResult);
+ }
+}
+
+//------------------------------------------------------------------------
+// DecomposeMul: Decompose GT_MUL. The only GT_MULs that make it to decompose are
+// those with the GTF_MUL_64RSLT flag set. These muls result in a mul instruction that
+// returns its result in two registers like GT_CALLs do. Additionally, these muls are
+// guaranteed to be in the form long = (long)int * (long)int. Therefore, to decompose
+// these nodes, we convert them into GT_MUL_LONGs, undo the cast from int to long by
+// stripping out the lo ops, and force them into the form var = mul, as we do for
+// GT_CALLs. In codegen, we then produce a mul instruction that produces the result
+// in edx:eax, and store those registers on the stack in genStoreLongLclVar.
+//
+// All other GT_MULs have been converted to helper calls in morph.cpp
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeMul(LIR::Use& use)
+{
+ assert(use.IsInitialized());
+
+ GenTree* tree = use.Def();
+ genTreeOps oper = tree->OperGet();
+
+ assert(oper == GT_MUL);
+ assert((tree->gtFlags & GTF_MUL_64RSLT) != 0);
+
+ GenTree* op1 = tree->gtGetOp1();
+ GenTree* op2 = tree->gtGetOp2();
+
+ GenTree* loOp1 = op1->gtGetOp1();
+ GenTree* hiOp1 = op1->gtGetOp2();
+ GenTree* loOp2 = op2->gtGetOp1();
+ GenTree* hiOp2 = op2->gtGetOp2();
+
+ Range().Remove(hiOp1);
+ Range().Remove(hiOp2);
+ Range().Remove(op1);
+ Range().Remove(op2);
+
+ // Get rid of the hi ops. We don't need them.
+ tree->gtOp.gtOp1 = loOp1;
+ tree->gtOp.gtOp2 = loOp2;
+ tree->SetOperRaw(GT_MUL_LONG);
+
+ return StoreNodeToVar(use);
+}
+
+//------------------------------------------------------------------------
+// DecomposeUMod: Decompose GT_UMOD. The only GT_UMODs that make it to decompose
+// are guaranteed to be an unsigned long mod with op2 which is a cast to long from
+// a constant int whose value is between 2 and 0x3fffffff. All other GT_UMODs are
+// morphed into helper calls. These GT_UMODs will actually return an int value in
+// RDX. In decompose, we make the lo operation a TYP_INT GT_UMOD, with op2 as the
+// original lo half and op1 as a GT_LONG. We make the hi part 0, so we end up with:
+//
+// GT_UMOD[TYP_INT] ( GT_LONG [TYP_LONG] (loOp1, hiOp1), loOp2 [TYP_INT] )
+//
+// With the expectation that we will generate:
+//
+// EDX = hiOp1
+// EAX = loOp1
+// reg = loOp2
+// idiv reg
+// EDX is the remainder, and result of GT_UMOD
+// mov hiReg = 0
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeUMod(LIR::Use& use)
+{
+ assert(use.IsInitialized());
+
+ GenTree* tree = use.Def();
+ genTreeOps oper = tree->OperGet();
+
+ assert(oper == GT_UMOD);
+
+ GenTree* op1 = tree->gtGetOp1();
+ GenTree* op2 = tree->gtGetOp2();
+ assert(op1->OperGet() == GT_LONG);
+ assert(op2->OperGet() == GT_LONG);
+
+ GenTree* loOp2 = op2->gtGetOp1();
+ GenTree* hiOp2 = op2->gtGetOp2();
+
+ assert(loOp2->OperGet() == GT_CNS_INT);
+ assert(hiOp2->OperGet() == GT_CNS_INT);
+ assert((loOp2->gtIntCon.gtIconVal >= 2) && (loOp2->gtIntCon.gtIconVal <= 0x3fffffff));
+ assert(hiOp2->gtIntCon.gtIconVal == 0);
+
+ // Get rid of op2's hi part. We don't need it.
+ Range().Remove(hiOp2);
+ Range().Remove(op2);
+
+ // Lo part is the GT_UMOD
+ GenTree* loResult = tree;
+ loResult->gtOp.gtOp2 = loOp2;
+ loResult->gtType = TYP_INT;
- GenTree* call = m_compiler->gtNewHelperCallNode(helper, TYP_LONG, 0, argList);
+ // Set the high part to 0
+ GenTree* hiResult = m_compiler->gtNewZeroConNode(TYP_INT);
- GenTreeCall* callNode = call->AsCall();
- ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
- retTypeDesc->InitializeLongReturnType(m_compiler);
+ Range().InsertAfter(loResult, hiResult);
- call = m_compiler->fgMorphArgs(callNode);
- Range().InsertAfter(tree, LIR::SeqTree(m_compiler, call));
-
- Range().Remove(tree);
- use.ReplaceWith(m_compiler, call);
- return call;
+ return FinalizeDecomposition(use, loResult, hiResult, hiResult);
+}
+
+//------------------------------------------------------------------------
+// StoreNodeToVar: Check if the user is a STORE_LCL_VAR, and if it isn't,
+// store the node to a var. Then decompose the new LclVar.
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::StoreNodeToVar(LIR::Use& use)
+{
+ if (use.IsDummyUse())
+ return use.Def()->gtNext;
+
+ GenTree* tree = use.Def();
+ GenTree* user = use.User();
+
+ if (user->OperGet() == GT_STORE_LCL_VAR)
+ {
+ // If parent is already a STORE_LCL_VAR, we can skip it if
+ // it is already marked as lvIsMultiRegRet.
+ unsigned varNum = user->AsLclVarCommon()->gtLclNum;
+ if (m_compiler->lvaTable[varNum].lvIsMultiRegRet)
+ {
+ return tree->gtNext;
+ }
+ else if (!m_compiler->lvaTable[varNum].lvPromoted)
+ {
+ // If var wasn't promoted, we can just set lvIsMultiRegRet.
+ m_compiler->lvaTable[varNum].lvIsMultiRegRet = true;
+ return tree->gtNext;
+ }
+ }
+
+ // Otherwise, we need to force var = call()
+ unsigned varNum = use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ m_compiler->lvaTable[varNum].lvIsMultiRegRet = true;
+
+ // Decompose the new LclVar use
+ return DecomposeLclVar(use);
+}
+
+//------------------------------------------------------------------------
+// Check is op already local var, if not store it to local.
+//
+// Arguments:
+// op - GenTree* to represent as local variable
+// user - user of op
+// edge - edge from user to op
+//
+// Return Value:
+// op represented as local var
+//
+GenTree* DecomposeLongs::RepresentOpAsLocalVar(GenTree* op, GenTree* user, GenTree** edge)
+{
+ if (op->OperGet() == GT_LCL_VAR)
+ {
+ return op;
+ }
+ else
+ {
+ LIR::Use opUse(Range(), edge, user);
+ opUse.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ return *edge;
+ }
}
//------------------------------------------------------------------------
@@ -965,9 +1648,6 @@ genTreeOps DecomposeLongs::GetHiOper(genTreeOps oper)
case GT_SUB:
return GT_SUB_HI;
break;
- case GT_MUL:
- return GT_MUL_HI;
- break;
case GT_DIV:
return GT_DIV_HI;
break;
diff --git a/src/jit/decomposelongs.h b/src/jit/decomposelongs.h
index af9b342fb2..8965a0b330 100644
--- a/src/jit/decomposelongs.h
+++ b/src/jit/decomposelongs.h
@@ -35,13 +35,14 @@ private:
}
// Driver functions
- void DecomposeRangeHelper();
- GenTree* DecomposeNode(LIR::Use& use);
+ void DecomposeRangeHelper();
+ GenTree* DecomposeNode(GenTree* tree);
// Per-node type decompose cases
GenTree* DecomposeLclVar(LIR::Use& use);
GenTree* DecomposeLclFld(LIR::Use& use);
GenTree* DecomposeStoreLclVar(LIR::Use& use);
+ GenTree* DecomposeStoreLclFld(LIR::Use& use);
GenTree* DecomposeCast(LIR::Use& use);
GenTree* DecomposeCnsLng(LIR::Use& use);
GenTree* DecomposeCall(LIR::Use& use);
@@ -51,10 +52,15 @@ private:
GenTree* DecomposeNeg(LIR::Use& use);
GenTree* DecomposeArith(LIR::Use& use);
GenTree* DecomposeShift(LIR::Use& use);
+ GenTree* DecomposeRotate(LIR::Use& use);
+ GenTree* DecomposeMul(LIR::Use& use);
+ GenTree* DecomposeUMod(LIR::Use& use);
// Helper functions
- GenTree* FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult);
+ GenTree* FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult, GenTree* insertResultAfter);
+ GenTree* RepresentOpAsLocalVar(GenTree* op, GenTree* user, GenTree** edge);
+ GenTree* StoreNodeToVar(LIR::Use& use);
static genTreeOps GetHiOper(genTreeOps oper);
static genTreeOps GetLoOper(genTreeOps oper);
diff --git a/src/jit/dll/CMakeLists.txt b/src/jit/dll/CMakeLists.txt
index 01e58dbbb8..43ed07eae5 100644
--- a/src/jit/dll/CMakeLists.txt
+++ b/src/jit/dll/CMakeLists.txt
@@ -1,20 +1,20 @@
project(ClrJit)
-if(CLR_CMAKE_PLATFORM_ARCH_I386 OR CLR_CMAKE_PLATFORM_ARCH_ARM)
+if(CLR_CMAKE_TARGET_ARCH_ARM)
add_definitions(-DLEGACY_BACKEND)
-endif(CLR_CMAKE_PLATFORM_ARCH_I386 OR CLR_CMAKE_PLATFORM_ARCH_ARM)
+endif(CLR_CMAKE_TARGET_ARCH_ARM)
# Disable the following for UNIX altjit on Windows
if(CLR_CMAKE_PLATFORM_UNIX)
add_compile_options(-fPIC)
- add_library_clr(${JIT_BASE_NAME}_static
+ add_library_clr(clrjit_static
STATIC
${SHARED_LIB_SOURCES}
)
- add_dependencies(${JIT_BASE_NAME}_static coreclrpal gcinfo)
+ add_dependencies(clrjit_static coreclrpal gcinfo)
else()
- add_library_clr(${JIT_BASE_NAME}_static
+ add_library_clr(clrjit_static
${SOURCES}
)
# Disable up to here (see above) the following for UNIX altjit on Windows
diff --git a/src/jit/dll/jit.nativeproj b/src/jit/dll/jit.nativeproj
index 97981e7eff..7505f5e8ef 100644
--- a/src/jit/dll/jit.nativeproj
+++ b/src/jit/dll/jit.nativeproj
@@ -37,9 +37,9 @@
<!-- Profile-guided optimization -->
- <PogoOptimize Condition="('$(BuildArchitecture)' == 'arm')">false</PogoOptimize>
- <PogoInstrument Condition="('$(BuildArchitecture)' == 'arm') and ('$(_BuildType)' == 'ret') and ('$(BuildProjectName)' == '')">true</PogoInstrument>
- <PogoUpdate Condition="('$(BuildArchitecture)' == 'arm') and ('$(_BuildType)' == 'ret') and ('$(BuildProjectName)' == '')">true</PogoUpdate>
+ <PogoOptimize Condition="('$(BuildArchitecture)' == 'amd64' or '$(BuildArchitecture)' == 'arm')">false</PogoOptimize>
+ <PogoInstrument Condition="('$(BuildArchitecture)' == 'amd64' or '$(BuildArchitecture)' == 'arm') and ('$(_BuildType)' == 'ret') and ('$(BuildProjectName)' == '')">true</PogoInstrument>
+ <PogoUpdate Condition="('$(BuildArchitecture)' == 'amd64' or '$(BuildArchitecture)' == 'arm') and ('$(_BuildType)' == 'ret') and ('$(BuildProjectName)' == '')">true</PogoUpdate>
<Win32DllLibs Condition="'$(PogoInstrument)' == 'true' and '$(BuildArchitecture)' == 'amd64'">$(Win32DllLibs);$(CrtLibPath)\pgort.lib</Win32DllLibs>
<Win32DllLibs Condition="'$(PogoInstrument)' == 'true' and '$(BuildArchitecture)' == 'arm'">$(Win32DllLibs);$(CrtLibPath)\pgort.lib;$(SdkLibPath)\ntdll.lib</Win32DllLibs>
<OptimizationDataRelativeDir>$(_BuildArch)\CLR\Base</OptimizationDataRelativeDir>
diff --git a/src/jit/earlyprop.cpp b/src/jit/earlyprop.cpp
index 70d1012aa0..51de631d19 100644
--- a/src/jit/earlyprop.cpp
+++ b/src/jit/earlyprop.cpp
@@ -189,8 +189,7 @@ void Compiler::optEarlyProp()
// Walk the stmt tree in linear order to rewrite any array length reference with a
// constant array length.
- bool isRewritten = false;
- bool bbHasNullCheck = (block->bbFlags & BBF_HAS_NULLCHECK) != 0;
+ bool isRewritten = false;
for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree != nullptr; tree = tree->gtNext)
{
if (optEarlyPropRewriteTree(tree))
@@ -238,12 +237,8 @@ bool Compiler::optEarlyPropRewriteTree(GenTreePtr tree)
objectRefPtr = tree->gtOp.gtOp1;
propKind = optPropKind::OPK_ARRAYLEN;
}
- else if ((tree->OperGet() == GT_IND) && !varTypeIsStruct(tree))
+ else if (tree->OperIsIndir())
{
- // TODO-1stClassStructs: The above condition should apply equally to all indirections,
- // but previously the implicit indirections due to a struct assignment were not
- // considered, so we are currently limiting it to non-structs to preserve existing
- // behavior.
// optFoldNullCheck takes care of updating statement info if a null check is removed.
optFoldNullCheck(tree);
@@ -259,7 +254,7 @@ bool Compiler::optEarlyPropRewriteTree(GenTreePtr tree)
return false;
}
- objectRefPtr = tree->gtOp.gtOp1;
+ objectRefPtr = tree->AsIndir()->Addr();
propKind = optPropKind::OPK_OBJ_GETTYPE;
}
else
@@ -511,15 +506,23 @@ void Compiler::optFoldNullCheck(GenTreePtr tree)
// |
// x
- assert(tree->OperGet() == GT_IND);
- if (tree->gtGetOp1()->OperGet() == GT_LCL_VAR)
+ if ((compCurBB->bbFlags & BBF_HAS_NULLCHECK) == 0)
+ {
+ return;
+ }
+
+ assert(tree->OperIsIndir());
+
+ GenTree* const addr = tree->AsIndir()->Addr();
+ if (addr->OperGet() == GT_LCL_VAR)
{
// Check if we have the pattern above and find the nullcheck node if we do.
// Find the definition of the indirected local (x in the picture)
- GenTreePtr indLocalTree = tree->gtGetOp1();
- unsigned lclNum = indLocalTree->AsLclVarCommon()->GetLclNum();
- unsigned ssaNum = indLocalTree->AsLclVarCommon()->GetSsaNum();
+ GenTreeLclVarCommon* const lclVarNode = addr->AsLclVarCommon();
+
+ const unsigned lclNum = lclVarNode->GetLclNum();
+ const unsigned ssaNum = lclVarNode->GetSsaNum();
if (ssaNum != SsaConfig::RESERVED_SSA_NUM)
{
@@ -557,7 +560,7 @@ void Compiler::optFoldNullCheck(GenTreePtr tree)
{
// Walk from the use to the def in reverse execution order to see
// if any nodes have unsafe side effects.
- GenTreePtr currentTree = indLocalTree->gtPrev;
+ GenTreePtr currentTree = lclVarNode->gtPrev;
bool isInsideTry = compCurBB->hasTryIndex();
bool canRemoveNullCheck = true;
const unsigned maxNodesWalked = 25;
@@ -612,13 +615,8 @@ void Compiler::optFoldNullCheck(GenTreePtr tree)
additionNode->gtFlags & (GTF_EXCEPT | GTF_DONT_CSE);
// Re-morph the statement.
- fgMorphBlockStmt(compCurBB, curStmt DEBUGARG("optFoldNullCheck"));
-
- // Recalculate the gtCostSz, etc...
- gtSetStmtInfo(curStmt);
-
- // Re-thread the nodes
- fgSetStmtSeq(curStmt);
+ fgMorphBlockStmt(compCurBB,
+ curStmt->AsStmt() DEBUGARG("optFoldNullCheck"));
}
}
}
@@ -668,4 +666,4 @@ bool Compiler::optCanMoveNullCheckPastTree(GenTreePtr tree, bool isInsideTry)
}
}
return result;
-} \ No newline at end of file
+}
diff --git a/src/jit/ee_il_dll.cpp b/src/jit/ee_il_dll.cpp
index 527244221e..dcadaa9453 100755..100644
--- a/src/jit/ee_il_dll.cpp
+++ b/src/jit/ee_il_dll.cpp
@@ -284,21 +284,17 @@ CorJitResult CILJit::compileMethod(
return g_realJitCompiler->compileMethod(compHnd, methodInfo, flags, entryAddress, nativeSizeOfCode);
}
- CORJIT_FLAGS jitFlags = {0};
+ JitFlags jitFlags;
- DWORD jitFlagsSize = 0;
#if COR_JIT_EE_VERSION > 460
- if (flags == CORJIT_FLG_CALL_GETJITFLAGS)
- {
- jitFlagsSize = compHnd->getJitFlags(&jitFlags, sizeof(jitFlags));
- }
-#endif
-
- assert(jitFlagsSize <= sizeof(jitFlags));
- if (jitFlagsSize == 0)
- {
- jitFlags.corJitFlags = flags;
- }
+ assert(flags == CORJIT_FLAGS::CORJIT_FLAG_CALL_GETJITFLAGS);
+ CORJIT_FLAGS corJitFlags;
+ DWORD jitFlagsSize = compHnd->getJitFlags(&corJitFlags, sizeof(corJitFlags));
+ assert(jitFlagsSize == sizeof(corJitFlags));
+ jitFlags.SetFromFlags(corJitFlags);
+#else // COR_JIT_EE_VERSION <= 460
+ jitFlags.SetFromOldFlags(flags, 0);
+#endif // COR_JIT_EE_VERSION <= 460
int result;
void* methodCodePtr = nullptr;
@@ -385,17 +381,31 @@ void CILJit::getVersionIdentifier(GUID* versionIdentifier)
/*****************************************************************************
* Determine the maximum length of SIMD vector supported by this JIT.
*/
+
+#if COR_JIT_EE_VERSION > 460
+unsigned CILJit::getMaxIntrinsicSIMDVectorLength(CORJIT_FLAGS cpuCompileFlags)
+#else
unsigned CILJit::getMaxIntrinsicSIMDVectorLength(DWORD cpuCompileFlags)
+#endif
{
if (g_realJitCompiler != nullptr)
{
return g_realJitCompiler->getMaxIntrinsicSIMDVectorLength(cpuCompileFlags);
}
-#ifdef _TARGET_AMD64_
+ JitFlags jitFlags;
+
+#if COR_JIT_EE_VERSION > 460
+ jitFlags.SetFromFlags(cpuCompileFlags);
+#else // COR_JIT_EE_VERSION <= 460
+ jitFlags.SetFromOldFlags(cpuCompileFlags, 0);
+#endif // COR_JIT_EE_VERSION <= 460
+
+#ifdef FEATURE_SIMD
+#ifdef _TARGET_XARCH_
#ifdef FEATURE_AVX_SUPPORT
- if (((cpuCompileFlags & CORJIT_FLG_PREJIT) == 0) && ((cpuCompileFlags & CORJIT_FLG_FEATURE_SIMD) != 0) &&
- ((cpuCompileFlags & CORJIT_FLG_USE_AVX2) != 0))
+ if (!jitFlags.IsSet(JitFlags::JIT_FLAG_PREJIT) && jitFlags.IsSet(JitFlags::JIT_FLAG_FEATURE_SIMD) &&
+ jitFlags.IsSet(JitFlags::JIT_FLAG_USE_AVX2))
{
if (JitConfig.EnableAVX() != 0)
{
@@ -404,9 +414,10 @@ unsigned CILJit::getMaxIntrinsicSIMDVectorLength(DWORD cpuCompileFlags)
}
#endif // FEATURE_AVX_SUPPORT
return 16;
-#else // !_TARGET_AMD64_
+#endif // _TARGET_XARCH_
+#else // !FEATURE_SIMD
return 0;
-#endif // !_TARGET_AMD64_
+#endif // !FEATURE_SIMD
}
void CILJit::setRealJit(ICorJitCompiler* realJitCompiler)
@@ -1378,7 +1389,7 @@ bool Compiler::eeRunWithErrorTrapImp(void (*function)(void*), void* param)
* Utility functions
*/
-#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD)
+#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD) || defined(FEATURE_TRACELOGGING)
/*****************************************************************************/
@@ -1526,6 +1537,9 @@ const char* Compiler::eeGetClassName(CORINFO_CLASS_HANDLE clsHnd)
const wchar_t* Compiler::eeGetCPString(size_t strHandle)
{
+#ifdef FEATURE_PAL
+ return nullptr;
+#else
char buff[512 + sizeof(CORINFO_String)];
// make this bulletproof, so it works even if we are wrong.
@@ -1547,6 +1561,7 @@ const wchar_t* Compiler::eeGetCPString(size_t strHandle)
}
return (asString->chars);
+#endif // FEATURE_PAL
}
#endif // DEBUG
diff --git a/src/jit/ee_il_dll.hpp b/src/jit/ee_il_dll.hpp
index d9bf95fde8..3899d92192 100644
--- a/src/jit/ee_il_dll.hpp
+++ b/src/jit/ee_il_dll.hpp
@@ -21,7 +21,11 @@ class CILJit : public ICorJitCompiler
void getVersionIdentifier(GUID* versionIdentifier /* OUT */
);
+#if COR_JIT_EE_VERSION > 460
+ unsigned getMaxIntrinsicSIMDVectorLength(CORJIT_FLAGS cpuCompileFlags);
+#else
unsigned getMaxIntrinsicSIMDVectorLength(DWORD cpuCompileFlags);
+#endif
void setRealJit(ICorJitCompiler* realJitCompiler);
};
diff --git a/src/jit/emit.cpp b/src/jit/emit.cpp
index 5c991ddf1b..0929b7392e 100644
--- a/src/jit/emit.cpp
+++ b/src/jit/emit.cpp
@@ -1264,9 +1264,9 @@ void* emitter::emitAllocInstr(size_t sz, emitAttr opsz)
// ARM - This is currently broken on _TARGET_ARM_
// When nopSize is odd we misalign emitCurIGsize
//
- if (!(emitComp->opts.eeFlags & CORJIT_FLG_PREJIT) && !emitInInstrumentation &&
- !emitIGisInProlog(emitCurIG) // don't do this in prolog or epilog
- && !emitIGisInEpilog(emitCurIG) &&
+ if (!emitComp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) && !emitInInstrumentation &&
+ !emitIGisInProlog(emitCurIG) && // don't do this in prolog or epilog
+ !emitIGisInEpilog(emitCurIG) &&
emitRandomNops // sometimes we turn off where exact codegen is needed (pinvoke inline)
)
{
@@ -1670,13 +1670,9 @@ void emitter::emitCreatePlaceholderIG(insGroupPlaceholderType igType,
emitCurIGsize += MAX_PLACEHOLDER_IG_SIZE;
emitCurCodeOffset += emitCurIGsize;
-#ifdef DEBUGGING_SUPPORT
-
#if FEATURE_EH_FUNCLETS
// Add the appropriate IP mapping debugging record for this placeholder
- // group.
-
- // genExitCode() adds the mapping for main function epilogs
+ // group. genExitCode() adds the mapping for main function epilogs.
if (emitComp->opts.compDbgInfo)
{
if (igType == IGPT_FUNCLET_PROLOG)
@@ -1690,8 +1686,6 @@ void emitter::emitCreatePlaceholderIG(insGroupPlaceholderType igType,
}
#endif // FEATURE_EH_FUNCLETS
-#endif // DEBUGGING_SUPPORT
-
/* Start a new IG if more code follows */
if (last)
@@ -2320,7 +2314,7 @@ bool emitter::emitNoGChelper(unsigned IHX)
case CORINFO_HELP_PROF_FCN_LEAVE:
case CORINFO_HELP_PROF_FCN_ENTER:
-#ifdef _TARGET_AMD64_
+#if defined(_TARGET_AMD64_) || (defined(_TARGET_X86_) && !defined(LEGACY_BACKEND))
case CORINFO_HELP_PROF_FCN_TAILCALL:
#endif
case CORINFO_HELP_LLSH:
@@ -3414,8 +3408,6 @@ size_t emitter::emitIssue1Instr(insGroup* ig, instrDesc* id, BYTE** dp)
#endif
-#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
-
/* Did the size of the instruction match our expectations? */
UNATIVE_OFFSET csz = (UNATIVE_OFFSET)(*dp - curInsAdr);
@@ -3447,8 +3439,6 @@ size_t emitter::emitIssue1Instr(insGroup* ig, instrDesc* id, BYTE** dp)
#endif
}
-#endif
-
#ifdef DEBUG
/* Make sure the instruction descriptor size also matches our expectations */
if (is != emitSizeOfInsDsc(id))
@@ -6048,7 +6038,7 @@ unsigned char emitter::emitOutputLong(BYTE* dst, ssize_t val)
#ifdef DEBUG
if (emitComp->opts.dspEmit)
{
- printf("; emit_long 0%08XH\n", val);
+ printf("; emit_long 0%08XH\n", (int)val);
}
#ifdef _TARGET_AMD64_
// if we're emitting code bytes, ensure that we've already emitted the rex prefix!
@@ -6072,9 +6062,9 @@ unsigned char emitter::emitOutputSizeT(BYTE* dst, ssize_t val)
if (emitComp->opts.dspEmit)
{
#ifdef _TARGET_AMD64_
- printf("; emit_size_t 0%016llXH\n", (size_t)val);
+ printf("; emit_size_t 0%016llXH\n", val);
#else // _TARGET_AMD64_
- printf("; emit_size_t 0%08XH\n", (size_t)val);
+ printf("; emit_size_t 0%08XH\n", val);
#endif // _TARGET_AMD64_
}
#endif // DEBUG
@@ -6082,6 +6072,60 @@ unsigned char emitter::emitOutputSizeT(BYTE* dst, ssize_t val)
return sizeof(size_t);
}
+//------------------------------------------------------------------------
+// Wrappers to emitOutputByte, emitOutputWord, emitOutputLong, emitOutputSizeT
+// that take unsigned __int64 or size_t type instead of ssize_t. Used on RyuJIT/x86.
+//
+// Arguments:
+// dst - passed through
+// val - passed through
+//
+// Return Value:
+// Same as wrapped function.
+//
+
+#if !defined(LEGACY_BACKEND) && defined(_TARGET_X86_)
+unsigned char emitter::emitOutputByte(BYTE* dst, size_t val)
+{
+ return emitOutputByte(dst, (ssize_t)val);
+}
+
+unsigned char emitter::emitOutputWord(BYTE* dst, size_t val)
+{
+ return emitOutputWord(dst, (ssize_t)val);
+}
+
+unsigned char emitter::emitOutputLong(BYTE* dst, size_t val)
+{
+ return emitOutputLong(dst, (ssize_t)val);
+}
+
+unsigned char emitter::emitOutputSizeT(BYTE* dst, size_t val)
+{
+ return emitOutputSizeT(dst, (ssize_t)val);
+}
+
+unsigned char emitter::emitOutputByte(BYTE* dst, unsigned __int64 val)
+{
+ return emitOutputByte(dst, (ssize_t)val);
+}
+
+unsigned char emitter::emitOutputWord(BYTE* dst, unsigned __int64 val)
+{
+ return emitOutputWord(dst, (ssize_t)val);
+}
+
+unsigned char emitter::emitOutputLong(BYTE* dst, unsigned __int64 val)
+{
+ return emitOutputLong(dst, (ssize_t)val);
+}
+
+unsigned char emitter::emitOutputSizeT(BYTE* dst, unsigned __int64 val)
+{
+ return emitOutputSizeT(dst, (ssize_t)val);
+}
+#endif // !defined(LEGACY_BACKEND) && defined(_TARGET_X86_)
+
/*****************************************************************************
*
* Given a block cookie and a code position, return the actual code offset;
diff --git a/src/jit/emit.h b/src/jit/emit.h
index 8fb24bcd60..5b1a395379 100644
--- a/src/jit/emit.h
+++ b/src/jit/emit.h
@@ -427,6 +427,11 @@ public:
// There seem to be some cases where this is used without being initialized via CodeGen::inst_set_SV_var().
emitVarRefOffs = 0;
#endif // DEBUG
+
+#ifdef _TARGET_XARCH_
+ SetUseSSE3_4(false);
+#endif // _TARGET_XARCH_
+
#ifdef FEATURE_AVX_SUPPORT
SetUseAVX(false);
#endif // FEATURE_AVX_SUPPORT
@@ -1659,6 +1664,18 @@ private:
unsigned char emitOutputLong(BYTE* dst, ssize_t val);
unsigned char emitOutputSizeT(BYTE* dst, ssize_t val);
+#if !defined(LEGACY_BACKEND) && defined(_TARGET_X86_)
+ unsigned char emitOutputByte(BYTE* dst, size_t val);
+ unsigned char emitOutputWord(BYTE* dst, size_t val);
+ unsigned char emitOutputLong(BYTE* dst, size_t val);
+ unsigned char emitOutputSizeT(BYTE* dst, size_t val);
+
+ unsigned char emitOutputByte(BYTE* dst, unsigned __int64 val);
+ unsigned char emitOutputWord(BYTE* dst, unsigned __int64 val);
+ unsigned char emitOutputLong(BYTE* dst, unsigned __int64 val);
+ unsigned char emitOutputSizeT(BYTE* dst, unsigned __int64 val);
+#endif // !defined(LEGACY_BACKEND) && defined(_TARGET_X86_)
+
size_t emitIssue1Instr(insGroup* ig, instrDesc* id, BYTE** dp);
size_t emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp);
@@ -1742,8 +1759,8 @@ private:
BYTE* emitCurIGfreeEndp; // one byte past the last available byte in buffer
BYTE* emitCurIGfreeBase; // first byte address
- unsigned emitCurIGinsCnt; // # of collected instr's in buffer
- unsigned emitCurIGsize; // estimated code size of current group in bytes
+ unsigned emitCurIGinsCnt; // # of collected instr's in buffer
+ unsigned emitCurIGsize; // estimated code size of current group in bytes
UNATIVE_OFFSET emitCurCodeOffset; // current code offset within group
UNATIVE_OFFSET emitTotalCodeSize; // bytes of code in entire method
@@ -1822,8 +1839,12 @@ private:
void emitInsertIGAfter(insGroup* insertAfterIG, insGroup* ig);
void emitNewIG();
+
+#if !defined(JIT32_GCENCODER)
void emitDisableGC();
void emitEnableGC();
+#endif // !defined(JIT32_GCENCODER)
+
void emitGenIG(insGroup* ig);
insGroup* emitSavIG(bool emitAdd = false);
void emitNxtIG(bool emitAdd = false);
@@ -2707,6 +2728,7 @@ inline void emitter::emitNewIG()
emitGenIG(ig);
}
+#if !defined(JIT32_GCENCODER)
// Start a new instruction group that is not interruptable
inline void emitter::emitDisableGC()
{
@@ -2736,6 +2758,7 @@ inline void emitter::emitEnableGC()
// instruction groups.
emitForceNewIG = true;
}
+#endif // !defined(JIT32_GCENCODER)
/*****************************************************************************/
#endif // _EMIT_H_
diff --git a/src/jit/emitarm.cpp b/src/jit/emitarm.cpp
index 1f57048a80..45928ca2d2 100644
--- a/src/jit/emitarm.cpp
+++ b/src/jit/emitarm.cpp
@@ -4368,6 +4368,7 @@ void emitter::emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNu
*
* EC_FUNC_TOKEN : addr is the method address
* EC_FUNC_ADDR : addr is the absolute address of the function
+ * if addr is NULL, it is a recursive call
*
* If callType is one of these emitCallTypes, addr has to be NULL.
* EC_INDIR_R : "call ireg".
@@ -4463,13 +4464,11 @@ void emitter::emitIns_Call(EmitCallType callType,
assert(argSize % (int)sizeof(void*) == 0);
argCnt = argSize / (int)sizeof(void*);
-#ifdef DEBUGGING_SUPPORT
/* Managed RetVal: emit sequence point for the call */
if (emitComp->opts.compDbgInfo && ilOffset != BAD_IL_OFFSET)
{
codeGen->genIPmappingAdd(ilOffset, false);
}
-#endif
/*
We need to allocate the appropriate instruction descriptor based
@@ -4555,8 +4554,8 @@ void emitter::emitIns_Call(EmitCallType callType,
assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR);
- assert(addr != NULL);
- assert(codeGen->validImmForBL((ssize_t)addr));
+ // if addr is nullptr then this call is treated as a recursive call.
+ assert(addr == nullptr || codeGen->arm_Valid_Imm_For_BL((ssize_t)addr));
if (isJump)
{
@@ -5266,8 +5265,8 @@ BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i)
else
#endif
{
- assert(distVal >= -16777216);
- assert(distVal <= 16777214);
+ assert(distVal >= CALL_DIST_MAX_NEG);
+ assert(distVal <= CALL_DIST_MAX_POS);
if (distVal < 0)
code |= 1 << 26;
@@ -6211,7 +6210,14 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
sz = sizeof(instrDesc);
}
- addr = id->idAddr()->iiaAddr;
+ if (id->idAddr()->iiaAddr == NULL) /* a recursive call */
+ {
+ addr = emitCodeBlock;
+ }
+ else
+ {
+ addr = id->idAddr()->iiaAddr;
+ }
code = emitInsCode(ins, fmt);
#ifdef RELOC_SUPPORT
diff --git a/src/jit/emitarm64.cpp b/src/jit/emitarm64.cpp
index a632ec12c8..12c4087115 100644
--- a/src/jit/emitarm64.cpp
+++ b/src/jit/emitarm64.cpp
@@ -6738,13 +6738,11 @@ void emitter::emitIns_Call(EmitCallType callType,
assert(argSize % REGSIZE_BYTES == 0);
argCnt = (int)(argSize / (int)sizeof(void*));
-#ifdef DEBUGGING_SUPPORT
/* Managed RetVal: emit sequence point for the call */
if (emitComp->opts.compDbgInfo && ilOffset != BAD_IL_OFFSET)
{
codeGen->genIPmappingAdd(ilOffset, false);
}
-#endif
/*
We need to allocate the appropriate instruction descriptor based
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
index d43f766ee8..b6bacfa520 100644
--- a/src/jit/emitxarch.cpp
+++ b/src/jit/emitxarch.cpp
@@ -30,6 +30,15 @@ bool IsSSE2Instruction(instruction ins)
return (ins >= INS_FIRST_SSE2_INSTRUCTION && ins <= INS_LAST_SSE2_INSTRUCTION);
}
+bool IsSSE4Instruction(instruction ins)
+{
+#ifdef LEGACY_BACKEND
+ return false;
+#else
+ return (ins >= INS_FIRST_SSE4_INSTRUCTION && ins <= INS_LAST_SSE4_INSTRUCTION);
+#endif
+}
+
bool IsSSEOrAVXInstruction(instruction ins)
{
#ifdef FEATURE_AVX_SUPPORT
@@ -48,7 +57,9 @@ bool emitter::IsAVXInstruction(instruction ins)
#endif
}
+#ifdef _TARGET_AMD64_
#define REX_PREFIX_MASK 0xFF00000000LL
+#endif // _TARGET_AMD64_
#ifdef FEATURE_AVX_SUPPORT
// Returns true if the AVX instruction is a binary operator that requires 3 operands.
@@ -75,10 +86,8 @@ bool emitter::IsThreeOperandBinaryAVXInstruction(instruction ins)
ins == INS_maxss || ins == INS_maxsd || ins == INS_andnps || ins == INS_andnpd || ins == INS_paddb ||
ins == INS_paddw || ins == INS_paddd || ins == INS_paddq || ins == INS_psubb || ins == INS_psubw ||
ins == INS_psubd || ins == INS_psubq || ins == INS_pmuludq || ins == INS_pxor || ins == INS_pmaxub ||
- ins == INS_pminub || ins == INS_pmaxsw || ins == INS_pminsw || ins == INS_insertps || ins == INS_vinsertf128 ||
- ins == INS_punpckldq
-
- );
+ ins == INS_pminub || ins == INS_pmaxsw || ins == INS_pminsw || ins == INS_insertps ||
+ ins == INS_vinsertf128 || ins == INS_punpckldq || ins == INS_phaddd);
}
// Returns true if the AVX instruction is a move operator that requires 3 operands.
@@ -92,22 +101,45 @@ bool emitter::IsThreeOperandMoveAVXInstruction(instruction ins)
return IsAVXInstruction(ins) &&
(ins == INS_movlpd || ins == INS_movlps || ins == INS_movhpd || ins == INS_movhps || ins == INS_movss);
}
-#endif // FEATURE_AVX_SUPPORT
-// Returns true if the AVX instruction is a 4-byte opcode.
+// ------------------------------------------------------------------------------
+// Is4ByteAVXInstruction: Returns true if the AVX instruction is a 4-byte opcode.
+//
+// Arguments:
+// ins - instructions
+//
// Note that this should be true for any of the instructions in instrsXArch.h
// that use the SSE38 or SSE3A macro.
+//
// TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this
// needs to be addressed by expanding instruction encodings.
-bool Is4ByteAVXInstruction(instruction ins)
+bool emitter::Is4ByteAVXInstruction(instruction ins)
{
-#ifdef FEATURE_AVX_SUPPORT
- return (ins == INS_dpps || ins == INS_dppd || ins == INS_insertps || ins == INS_pcmpeqq || ins == INS_pcmpgtq ||
+ return UseAVX() &&
+ (ins == INS_dpps || ins == INS_dppd || ins == INS_insertps || ins == INS_pcmpeqq || ins == INS_pcmpgtq ||
ins == INS_vbroadcastss || ins == INS_vbroadcastsd || ins == INS_vpbroadcastb || ins == INS_vpbroadcastw ||
ins == INS_vpbroadcastd || ins == INS_vpbroadcastq || ins == INS_vextractf128 || ins == INS_vinsertf128 ||
- ins == INS_pmulld);
-#else
+ ins == INS_pmulld || ins == INS_ptest || ins == INS_phaddd);
+}
+#endif // FEATURE_AVX_SUPPORT
+
+// -------------------------------------------------------------------
+// Is4ByteSSE4Instruction: Returns true if the SSE4 instruction
+// is a 4-byte opcode.
+//
+// Arguments:
+// ins - instruction
+//
+// Note that this should be true for any of the instructions in instrsXArch.h
+// that use the SSE38 or SSE3A macro.
+bool emitter::Is4ByteSSE4Instruction(instruction ins)
+{
+#ifdef LEGACY_BACKEND
+ // On legacy backend SSE3_4 is not enabled.
return false;
+#else
+ return UseSSE3_4() && (ins == INS_dpps || ins == INS_dppd || ins == INS_insertps || ins == INS_pcmpeqq ||
+ ins == INS_pcmpgtq || ins == INS_pmulld || ins == INS_ptest || ins == INS_phaddd);
#endif
}
@@ -150,8 +182,9 @@ bool emitter::TakesVexPrefix(instruction ins)
// prefix. Based on 'attr' param we could add 2-byte VEX prefix in case of scalar
// and AVX-128 bit operations.
#define DEFAULT_3BYTE_VEX_PREFIX 0xC4E07800000000ULL
-#define LBIT_IN_3BYTE_VEX_PREFIX 0X00000400000000ULL
-size_t emitter::AddVexPrefix(instruction ins, size_t code, emitAttr attr)
+#define DEFAULT_3BYTE_VEX_PREFIX_MASK 0xFFFFFF00000000ULL
+#define LBIT_IN_3BYTE_VEX_PREFIX 0x00000400000000ULL
+emitter::code_t emitter::AddVexPrefix(instruction ins, code_t code, emitAttr attr)
{
// Only AVX instructions require VEX prefix
assert(IsAVXInstruction(ins));
@@ -160,6 +193,7 @@ size_t emitter::AddVexPrefix(instruction ins, size_t code, emitAttr attr)
assert(!hasVexPrefix(code));
// Set L bit to 1 in case of instructions that operate on 256-bits.
+ assert((code & DEFAULT_3BYTE_VEX_PREFIX_MASK) == 0);
code |= DEFAULT_3BYTE_VEX_PREFIX;
if (attr == EA_32BYTE)
{
@@ -296,25 +330,25 @@ bool IsXMMReg(regNumber reg)
}
// Returns bits to be encoded in instruction for the given register.
-regNumber RegEncoding(regNumber reg)
+unsigned RegEncoding(regNumber reg)
{
#ifndef LEGACY_BACKEND
// XMM registers do not share the same reg numbers as integer registers.
// But register encoding of integer and XMM registers is the same.
// Therefore, subtract XMMBASE from regNumber to get the register encoding
// in case of XMM registers.
- return (regNumber)((IsXMMReg(reg) ? reg - XMMBASE : reg) & 0x7);
+ return (unsigned)((IsXMMReg(reg) ? reg - XMMBASE : reg) & 0x7);
#else // LEGACY_BACKEND
// Legacy X86: XMM registers share the same reg numbers as integer registers and
// hence nothing to do to get reg encoding.
- return (regNumber)(reg & 0x7);
+ return (unsigned)(reg & 0x7);
#endif // LEGACY_BACKEND
}
// Utility routines that abstract the logic of adding REX.W, REX.R, REX.X, REX.B and REX prefixes
// SSE2: separate 1-byte prefix gets added before opcode.
// AVX: specific bits within VEX prefix need to be set in bit-inverted form.
-size_t emitter::AddRexWPrefix(instruction ins, size_t code)
+emitter::code_t emitter::AddRexWPrefix(instruction ins, code_t code)
{
#ifdef _TARGET_AMD64_
if (UseAVX() && IsAVXInstruction(ins))
@@ -335,7 +369,7 @@ size_t emitter::AddRexWPrefix(instruction ins, size_t code)
#ifdef _TARGET_AMD64_
-size_t emitter::AddRexRPrefix(instruction ins, size_t code)
+emitter::code_t emitter::AddRexRPrefix(instruction ins, code_t code)
{
if (UseAVX() && IsAVXInstruction(ins))
{
@@ -349,7 +383,7 @@ size_t emitter::AddRexRPrefix(instruction ins, size_t code)
return code | 0x4400000000ULL;
}
-size_t emitter::AddRexXPrefix(instruction ins, size_t code)
+emitter::code_t emitter::AddRexXPrefix(instruction ins, code_t code)
{
if (UseAVX() && IsAVXInstruction(ins))
{
@@ -363,7 +397,7 @@ size_t emitter::AddRexXPrefix(instruction ins, size_t code)
return code | 0x4200000000ULL;
}
-size_t emitter::AddRexBPrefix(instruction ins, size_t code)
+emitter::code_t emitter::AddRexBPrefix(instruction ins, code_t code)
{
if (UseAVX() && IsAVXInstruction(ins))
{
@@ -378,12 +412,14 @@ size_t emitter::AddRexBPrefix(instruction ins, size_t code)
}
// Adds REX prefix (0x40) without W, R, X or B bits set
-size_t emitter::AddRexPrefix(instruction ins, size_t code)
+emitter::code_t emitter::AddRexPrefix(instruction ins, code_t code)
{
assert(!UseAVX() || !IsAVXInstruction(ins));
return code | 0x4000000000ULL;
}
+#endif //_TARGET_AMD64_
+
bool isPrefix(BYTE b)
{
assert(b != 0); // Caller should check this
@@ -401,17 +437,15 @@ bool isPrefix(BYTE b)
return ((b == 0xF2) || (b == 0xF3) || (b == 0x66));
}
-#endif //_TARGET_AMD64_
-
// Outputs VEX prefix (in case of AVX instructions) and REX.R/X/W/B otherwise.
-unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, size_t& code)
+unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code)
{
-#ifdef _TARGET_AMD64_ // TODO-x86: This needs to be enabled for AVX support on x86.
+#ifdef FEATURE_AVX_SUPPORT
if (hasVexPrefix(code))
{
// Only AVX instructions should have a VEX prefix
assert(UseAVX() && IsAVXInstruction(ins));
- size_t vexPrefix = (code >> 32) & 0x00FFFFFF;
+ code_t vexPrefix = (code >> 32) & 0x00FFFFFF;
code &= 0x00000000FFFFFFFFLL;
WORD leadingBytes = 0;
@@ -504,7 +538,10 @@ unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, s
emitOutputByte(dst + 2, vexPrefix & 0xFF);
return 3;
}
- else if (code > 0x00FFFFFFFFLL)
+#endif // FEATURE_AVX_SUPPORT
+
+#ifdef _TARGET_AMD64_
+ if (code > 0x00FFFFFFFFLL)
{
BYTE prefix = (code >> 32) & 0xFF;
noway_assert(prefix >= 0x40 && prefix <= 0x4F);
@@ -543,13 +580,13 @@ unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, s
{
// 3 prefixes were rex = rr, check = c1, check2 = c2 encoded as 0xrrc1c2XXXX
// Change to c2rrc1XXXX, and emit check2 now
- code = (((size_t)prefix << 24) | ((size_t)check << 16) | (code & 0x0000FFFFLL));
+ code = (((code_t)prefix << 24) | ((code_t)check << 16) | (code & 0x0000FFFFLL));
}
else
{
// 2 prefixes were rex = rr, check2 = c2 encoded as 0xrrXXc2XXXX, (check is part of the opcode)
// Change to c2XXrrXXXX, and emit check2 now
- code = (((size_t)check << 24) | ((size_t)prefix << 16) | (code & 0x0000FFFFLL));
+ code = (((code_t)check << 24) | ((code_t)prefix << 16) | (code & 0x0000FFFFLL));
}
return emitOutputByte(dst, check2);
}
@@ -593,7 +630,6 @@ void emitter::emitOutputPreEpilogNOP()
// Size of rex prefix in bytes
unsigned emitter::emitGetRexPrefixSize(instruction ins)
{
-
// In case of AVX instructions, REX prefixes are part of VEX prefix.
// And hence requires no additional byte to encode REX prefixes.
if (IsAVXInstruction(ins))
@@ -630,7 +666,7 @@ unsigned emitter::emitGetVexPrefixSize(instruction ins, emitAttr attr)
//=(opcodeSize - ExtrabytesSize) + vexPrefixSize
//=opcodeSize + (vexPrefixSize - ExtrabytesSize)
//=opcodeSize + vexPrefixAdjustedSize
-unsigned emitter::emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, size_t code)
+unsigned emitter::emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, code_t code)
{
#ifdef FEATURE_AVX_SUPPORT
if (IsAVXInstruction(ins))
@@ -674,19 +710,19 @@ unsigned emitter::emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, s
}
// Get size of rex or vex prefix emitted in code
-unsigned emitter::emitGetPrefixSize(size_t code)
+unsigned emitter::emitGetPrefixSize(code_t code)
{
-#ifdef FEATURE_AVX_SUPPORT
- if (code & VEX_PREFIX_MASK_3BYTE)
+ if (hasVexPrefix(code))
{
return 3;
}
- else
-#endif
- if (code & REX_PREFIX_MASK)
+
+#ifdef _TARGET_AMD64_
+ if (code & REX_PREFIX_MASK)
{
return 1;
}
+#endif // _TARGET_AMD64_
return 0;
}
@@ -1058,7 +1094,7 @@ size_t insCodesMR[] =
// clang-format on
// Returns true iff the give CPU instruction has an MR encoding.
-inline size_t hasCodeMR(instruction ins)
+inline bool hasCodeMR(instruction ins)
{
assert((unsigned)ins < sizeof(insCodesMR) / sizeof(insCodesMR[0]));
return ((insCodesMR[ins] != BAD_CODE));
@@ -1083,7 +1119,7 @@ inline size_t insCodeMR(instruction ins)
* part of an opcode.
*/
-inline unsigned emitter::insEncodeReg012(instruction ins, regNumber reg, emitAttr size, size_t* code)
+inline unsigned emitter::insEncodeReg012(instruction ins, regNumber reg, emitAttr size, code_t* code)
{
assert(reg < REG_STK);
@@ -1106,16 +1142,16 @@ inline unsigned emitter::insEncodeReg012(instruction ins, regNumber reg, emitAtt
}
#endif // _TARGET_AMD64_
- reg = RegEncoding(reg);
- assert(reg < 8);
- return reg;
+ unsigned regBits = RegEncoding(reg);
#else // LEGACY_BACKEND
- assert(reg < 8);
- return reg;
+ unsigned regBits = reg;
#endif // LEGACY_BACKEND
+
+ assert(regBits < 8);
+ return regBits;
}
/*****************************************************************************
@@ -1124,7 +1160,7 @@ inline unsigned emitter::insEncodeReg012(instruction ins, regNumber reg, emitAtt
* part of an opcode.
*/
-inline unsigned emitter::insEncodeReg345(instruction ins, regNumber reg, emitAttr size, size_t* code)
+inline unsigned emitter::insEncodeReg345(instruction ins, regNumber reg, emitAttr size, code_t* code)
{
assert(reg < REG_STK);
@@ -1147,14 +1183,16 @@ inline unsigned emitter::insEncodeReg345(instruction ins, regNumber reg, emitAtt
}
#endif // _TARGET_AMD64_
- reg = RegEncoding(reg);
- assert(reg < 8);
- return (reg << 3);
+ unsigned regBits = RegEncoding(reg);
+
+#else // LEGACY_BACKEND
+
+ unsigned regBits = reg;
-#else // LEGACY_BACKEND
- assert(reg < 8);
- return (reg << 3);
#endif // LEGACY_BACKEND
+
+ assert(regBits < 8);
+ return (regBits << 3);
}
/***********************************************************************************
@@ -1162,7 +1200,7 @@ inline unsigned emitter::insEncodeReg345(instruction ins, regNumber reg, emitAtt
* Returns modified AVX opcode with the specified register encoded in bits 3-6 of
* byte 2 of VEX prefix.
*/
-inline size_t emitter::insEncodeReg3456(instruction ins, regNumber reg, emitAttr size, size_t code)
+inline emitter::code_t emitter::insEncodeReg3456(instruction ins, regNumber reg, emitAttr size, code_t code)
{
#ifdef FEATURE_AVX_SUPPORT
assert(reg < REG_STK);
@@ -1172,7 +1210,7 @@ inline size_t emitter::insEncodeReg3456(instruction ins, regNumber reg, emitAttr
// Get 4-bit register encoding
// RegEncoding() gives lower 3 bits
// IsExtendedReg() gives MSB.
- size_t regBits = RegEncoding(reg);
+ code_t regBits = RegEncoding(reg);
if (IsExtendedReg(reg))
{
regBits |= 0x08;
@@ -1196,7 +1234,7 @@ inline size_t emitter::insEncodeReg3456(instruction ins, regNumber reg, emitAttr
* Used exclusively to generate the REX.X bit and truncate the register.
*/
-inline unsigned emitter::insEncodeRegSIB(instruction ins, regNumber reg, size_t* code)
+inline unsigned emitter::insEncodeRegSIB(instruction ins, regNumber reg, code_t* code)
{
assert(reg < REG_STK);
@@ -1210,11 +1248,13 @@ inline unsigned emitter::insEncodeRegSIB(instruction ins, regNumber reg, size_t*
{
*code = AddRexXPrefix(ins, *code); // REX.X
}
- reg = RegEncoding(reg);
-#endif
+ unsigned regBits = RegEncoding(reg);
+#else // !_TARGET_AMD64_
+ unsigned regBits = reg;
+#endif // !_TARGET_AMD64_
- assert(reg < 8);
- return reg;
+ assert(regBits < 8);
+ return regBits;
}
/*****************************************************************************
@@ -1222,7 +1262,7 @@ inline unsigned emitter::insEncodeRegSIB(instruction ins, regNumber reg, size_t*
* Returns the "[r/m]" opcode with the mod/RM field set to register.
*/
-inline size_t emitter::insEncodeMRreg(instruction ins, size_t code)
+inline emitter::code_t emitter::insEncodeMRreg(instruction ins, code_t code)
{
// If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes.
// Otherwise, it will be placed after the 4 byte encoding.
@@ -1237,22 +1277,10 @@ inline size_t emitter::insEncodeMRreg(instruction ins, size_t code)
/*****************************************************************************
*
- * Returns the "[r/m], icon" opcode with the mod/RM field set to register.
- */
-
-inline size_t insEncodeMIreg(instruction ins, size_t code)
-{
- assert((code & 0xC000) == 0);
- code |= 0xC000;
- return code;
-}
-
-/*****************************************************************************
- *
* Returns the given "[r/m]" opcode with the mod/RM field set to register.
*/
-inline size_t insEncodeRMreg(instruction ins, size_t code)
+inline emitter::code_t emitter::insEncodeRMreg(instruction ins, code_t code)
{
// If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes.
// Otherwise, it will be placed after the 4 byte encoding.
@@ -1270,7 +1298,7 @@ inline size_t insEncodeRMreg(instruction ins, size_t code)
* the given register.
*/
-inline size_t emitter::insEncodeMRreg(instruction ins, regNumber reg, emitAttr size, size_t code)
+inline emitter::code_t emitter::insEncodeMRreg(instruction ins, regNumber reg, emitAttr size, code_t code)
{
assert((code & 0xC000) == 0);
code |= 0xC000;
@@ -1285,7 +1313,7 @@ inline size_t emitter::insEncodeMRreg(instruction ins, regNumber reg, emitAttr s
* the given register.
*/
-inline size_t emitter::insEncodeMIreg(instruction ins, regNumber reg, emitAttr size, size_t code)
+inline emitter::code_t emitter::insEncodeMIreg(instruction ins, regNumber reg, emitAttr size, code_t code)
{
assert((code & 0xC000) == 0);
code |= 0xC000;
@@ -1310,12 +1338,12 @@ inline bool insNeedsRRIb(instruction ins)
* Returns the "reg,reg,imm8" opcode with both the reg's set to the
* the given register.
*/
-inline size_t emitter::insEncodeRRIb(instruction ins, regNumber reg, emitAttr size)
+inline emitter::code_t emitter::insEncodeRRIb(instruction ins, regNumber reg, emitAttr size)
{
assert(size == EA_4BYTE); // All we handle for now.
assert(insNeedsRRIb(ins));
// If this list gets longer, use a switch, or a table lookup.
- size_t code = 0x69c0;
+ code_t code = 0x69c0;
unsigned regcode = insEncodeReg012(ins, reg, size, &code);
// We use the same register as source and destination. (Could have another version that does both regs...)
code |= regcode;
@@ -1329,9 +1357,9 @@ inline size_t emitter::insEncodeRRIb(instruction ins, regNumber reg, emitAttr si
* nibble of the opcode
*/
-inline size_t emitter::insEncodeOpreg(instruction ins, regNumber reg, emitAttr size)
+inline emitter::code_t emitter::insEncodeOpreg(instruction ins, regNumber reg, emitAttr size)
{
- size_t code = insCodeRR(ins);
+ code_t code = insCodeRR(ins);
unsigned regcode = insEncodeReg012(ins, reg, size, &code);
code |= regcode;
return code;
@@ -1342,7 +1370,7 @@ inline size_t emitter::insEncodeOpreg(instruction ins, regNumber reg, emitAttr s
* Return the 'SS' field value for the given index scale factor.
*/
-inline unsigned insSSval(unsigned scale)
+inline unsigned emitter::insSSval(unsigned scale)
{
assert(scale == 1 || scale == 2 || scale == 4 || scale == 8);
@@ -1447,7 +1475,7 @@ bool emitter::emitVerifyEncodable(instruction ins, emitAttr size, regNumber reg1
* Estimate the size (in bytes of generated code) of the given instruction.
*/
-inline UNATIVE_OFFSET emitter::emitInsSize(size_t code)
+inline UNATIVE_OFFSET emitter::emitInsSize(code_t code)
{
UNATIVE_OFFSET size = (code & 0xFF000000) ? 4 : (code & 0x00FF0000) ? 3 : 2;
#ifdef _TARGET_AMD64_
@@ -1466,18 +1494,17 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instruction ins, regNumber reg1, re
emitAttr size = EA_SIZE(attr);
UNATIVE_OFFSET sz;
-#ifdef _TARGET_AMD64_
- // If Byte 4 (which is 0xFF00) is non-zero, that's where the RM encoding goes.
+
+ // If Byte 4 (which is 0xFF00) is zero, that's where the RM encoding goes.
// Otherwise, it will be placed after the 4 byte encoding, making the total 5 bytes.
// This would probably be better expressed as a different format or something?
- if (insCodeRM(ins) & 0xFF00)
+ if ((insCodeRM(ins) & 0xFF00) != 0)
{
sz = 5;
}
else
-#endif // _TARGET_AMD64_
{
- size_t code = insCodeRM(ins);
+ code_t code = insCodeRM(ins);
sz = emitInsSize(insEncodeRMreg(ins, code));
}
@@ -1502,7 +1529,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instruction ins, regNumber reg1, re
/*****************************************************************************/
-inline UNATIVE_OFFSET emitter::emitInsSizeSV(size_t code, int var, int dsp)
+inline UNATIVE_OFFSET emitter::emitInsSizeSV(code_t code, int var, int dsp)
{
UNATIVE_OFFSET size = emitInsSize(code);
UNATIVE_OFFSET offs;
@@ -1777,7 +1804,7 @@ static bool baseRegisterRequiresDisplacement(regNumber base)
#endif
}
-UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, size_t code)
+UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code)
{
emitAttr attrSize = id->idOpSize();
instruction ins = id->idIns();
@@ -1994,7 +2021,7 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, size_t code)
return size;
}
-inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, size_t code, int val)
+inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code, int val)
{
instruction ins = id->idIns();
UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize());
@@ -2027,7 +2054,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, size_t code, int val
return valSize + emitInsSizeAM(id, code);
}
-inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, size_t code)
+inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code)
{
instruction ins = id->idIns();
@@ -2047,7 +2074,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, size_t code)
return size + emitInsSize(code);
}
-inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, size_t code, int val)
+inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code, int val)
{
instruction ins = id->idIns();
UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize());
@@ -2252,7 +2279,7 @@ void emitter::emitIns(instruction ins)
{
UNATIVE_OFFSET sz;
instrDesc* id = emitNewInstr();
- size_t code = insCodeMR(ins);
+ code_t code = insCodeMR(ins);
#ifdef DEBUG
#if FEATURE_STACK_FP_X87
@@ -2328,7 +2355,7 @@ void emitter::emitIns(instruction ins, emitAttr attr)
{
UNATIVE_OFFSET sz;
instrDesc* id = emitNewInstr(attr);
- size_t code = insCodeMR(ins);
+ code_t code = insCodeMR(ins);
assert(ins == INS_cdq);
assert((code & 0xFFFFFF00) == 0);
sz = 1;
@@ -2499,8 +2526,9 @@ void emitter::emitHandleMemOp(GenTreeIndir* indir, instrDesc* id, insFormat fmt,
// Absolute addresses marked as contained should fit within the base of addr mode.
assert(memBase->AsIntConCommon()->FitsInAddrBase(emitComp));
- // Either not generating relocatable code or addr must be an icon handle
- assert(!emitComp->opts.compReloc || memBase->IsIconHandle());
+ // Either not generating relocatable code, or addr must be an icon handle, or the
+ // constant is zero (which we won't generate a relocation for).
+ assert(!emitComp->opts.compReloc || memBase->IsIconHandle() || memBase->IsIntegralConst(0));
if (memBase->AsIntConCommon()->AddrNeedsReloc(emitComp))
{
@@ -2904,6 +2932,19 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
varNum = tmpDsc->tdTempNum();
offset = 0;
}
+ else
+ {
+ // At this point we must have a memory operand that is a contained indir: if we do not, we should have handled
+ // this instruction above in the reg/imm or reg/reg case.
+ assert(mem != nullptr);
+ assert(memBase != nullptr);
+
+ if (memBase->OperGet() == GT_LCL_VAR_ADDR)
+ {
+ varNum = memBase->AsLclVarCommon()->GetLclNum();
+ offset = 0;
+ }
+ }
// Spill temp numbers are negative and start with -1
// which also happens to be BAD_VAR_NUM. For this reason
@@ -2911,7 +2952,7 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
if (varNum != BAD_VAR_NUM || tmpDsc != nullptr)
{
// Is the memory op in the source position?
- if (src->isContainedLclField() || src->isContainedLclVar() || src->isContainedSpillTemp())
+ if (src->isContainedMemoryOp())
{
if (instrHasImplicitRegPairDest(ins))
{
@@ -3351,22 +3392,7 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
dispIns(id);
emitCurIGsize += sz;
-#if !FEATURE_FIXED_OUT_ARGS
-
- if (ins == INS_push)
- {
- emitCurStackLvl += emitCntStackDepth;
-
- if (emitMaxStackDepth < emitCurStackLvl)
- emitMaxStackDepth = emitCurStackLvl;
- }
- else if (ins == INS_pop)
- {
- emitCurStackLvl -= emitCntStackDepth;
- assert((int)emitCurStackLvl >= 0);
- }
-
-#endif // !FEATURE_FIXED_OUT_ARGS
+ emitAdjustStackDepthPushPop(ins);
}
/*****************************************************************************
@@ -3484,7 +3510,7 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t
sz += emitGetRexPrefixSize(ins);
}
-#ifdef _TARGET_X86_
+#if defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
assert(reg < 8);
#endif
@@ -3504,34 +3530,10 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t
dispIns(id);
emitCurIGsize += sz;
-#if !FEATURE_FIXED_OUT_ARGS
-
if (reg == REG_ESP)
{
- if (emitCntStackDepth)
- {
- if (ins == INS_sub)
- {
- S_UINT32 newStackLvl(emitCurStackLvl);
- newStackLvl += S_UINT32(val);
- noway_assert(!newStackLvl.IsOverflow());
-
- emitCurStackLvl = newStackLvl.Value();
-
- if (emitMaxStackDepth < emitCurStackLvl)
- emitMaxStackDepth = emitCurStackLvl;
- }
- else if (ins == INS_add)
- {
- S_UINT32 newStackLvl = S_UINT32(emitCurStackLvl) - S_UINT32(val);
- noway_assert(!newStackLvl.IsOverflow());
-
- emitCurStackLvl = newStackLvl.Value();
- }
- }
+ emitAdjustStackDepth(ins, val);
}
-
-#endif // !FEATURE_FIXED_OUT_ARGS
}
/*****************************************************************************
@@ -3584,17 +3586,7 @@ void emitter::emitIns_I(instruction ins, emitAttr attr, int val)
dispIns(id);
emitCurIGsize += sz;
-#if !FEATURE_FIXED_OUT_ARGS
-
- if (ins == INS_push)
- {
- emitCurStackLvl += emitCntStackDepth;
-
- if (emitMaxStackDepth < emitCurStackLvl)
- emitMaxStackDepth = emitCurStackLvl;
- }
-
-#endif // !FEATURE_FIXED_OUT_ARGS
+ emitAdjustStackDepthPushPop(ins);
}
/*****************************************************************************
@@ -3693,22 +3685,7 @@ void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fld
dispIns(id);
emitCurIGsize += sz;
-#if !FEATURE_FIXED_OUT_ARGS
-
- if (ins == INS_push)
- {
- emitCurStackLvl += emitCntStackDepth;
-
- if (emitMaxStackDepth < emitCurStackLvl)
- emitMaxStackDepth = emitCurStackLvl;
- }
- else if (ins == INS_pop)
- {
- emitCurStackLvl -= emitCntStackDepth;
- assert((int)emitCurStackLvl >= 0);
- }
-
-#endif // !FEATURE_FIXED_OUT_ARGS
+ emitAdjustStackDepthPushPop(ins);
}
/*****************************************************************************
@@ -3757,11 +3734,14 @@ void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNum
void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival)
{
- // SSE2 version requires 5 bytes and AVX version 6 bytes
+ // SSE2 version requires 5 bytes and SSE4/AVX version 6 bytes
UNATIVE_OFFSET sz = 4;
if (IsSSEOrAVXInstruction(ins))
{
- sz = UseAVX() ? 6 : 5;
+ // AVX: 3 byte VEX prefix + 1 byte opcode + 1 byte ModR/M + 1 byte immediate
+ // SSE4: 4 byte opcode + 1 byte ModR/M + 1 byte immediate
+ // SSE2: 3 byte opcode + 1 byte ModR/M + 1 byte immediate
+ sz = (UseAVX() || UseSSE3_4()) ? 6 : 5;
}
#ifdef _TARGET_AMD64_
@@ -4014,7 +3994,7 @@ void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE f
id->idIns(ins);
id->idInsFmt(fmt);
- size_t code = insCodeMI(ins);
+ code_t code = insCodeMI(ins);
UNATIVE_OFFSET sz = emitInsSizeCV(id, code, val);
#ifdef _TARGET_AMD64_
@@ -4387,22 +4367,7 @@ void emitter::emitIns_AR_R(
dispIns(id);
emitCurIGsize += sz;
-#if !FEATURE_FIXED_OUT_ARGS
-
- if (ins == INS_push)
- {
- emitCurStackLvl += emitCntStackDepth;
-
- if (emitMaxStackDepth < emitCurStackLvl)
- emitMaxStackDepth = emitCurStackLvl;
- }
- else if (ins == INS_pop)
- {
- emitCurStackLvl -= emitCntStackDepth;
- assert((int)emitCurStackLvl >= 0);
- }
-
-#endif // !FEATURE_FIXED_OUT_ARGS
+ emitAdjustStackDepthPushPop(ins);
}
void emitter::emitIns_AI_R(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp)
@@ -4443,22 +4408,7 @@ void emitter::emitIns_AI_R(instruction ins, emitAttr attr, regNumber ireg, ssize
dispIns(id);
emitCurIGsize += sz;
-#if !FEATURE_FIXED_OUT_ARGS
-
- if (ins == INS_push)
- {
- emitCurStackLvl += emitCntStackDepth;
-
- if (emitMaxStackDepth < emitCurStackLvl)
- emitMaxStackDepth = emitCurStackLvl;
- }
- else if (ins == INS_pop)
- {
- emitCurStackLvl -= emitCntStackDepth;
- assert((int)emitCurStackLvl >= 0);
- }
-
-#endif // !FEATURE_FIXED_OUT_ARGS
+ emitAdjustStackDepthPushPop(ins);
}
void emitter::emitIns_I_ARR(instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, int disp)
@@ -4575,22 +4525,7 @@ void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regN
dispIns(id);
emitCurIGsize += sz;
-#if !FEATURE_FIXED_OUT_ARGS
-
- if (ins == INS_push)
- {
- emitCurStackLvl += emitCntStackDepth;
-
- if (emitMaxStackDepth < emitCurStackLvl)
- emitMaxStackDepth = emitCurStackLvl;
- }
- else if (ins == INS_pop)
- {
- emitCurStackLvl -= emitCntStackDepth;
- assert((int)emitCurStackLvl >= 0);
- }
-
-#endif // !FEATURE_FIXED_OUT_ARGS
+ emitAdjustStackDepthPushPop(ins);
}
void emitter::emitIns_I_ARX(
@@ -4711,22 +4646,7 @@ void emitter::emitIns_ARX_R(
dispIns(id);
emitCurIGsize += sz;
-#if !FEATURE_FIXED_OUT_ARGS
-
- if (ins == INS_push)
- {
- emitCurStackLvl += emitCntStackDepth;
-
- if (emitMaxStackDepth < emitCurStackLvl)
- emitMaxStackDepth = emitCurStackLvl;
- }
- else if (ins == INS_pop)
- {
- emitCurStackLvl -= emitCntStackDepth;
- assert((int)emitCurStackLvl >= 0);
- }
-
-#endif // !FEATURE_FIXED_OUT_ARGS
+ emitAdjustStackDepthPushPop(ins);
}
void emitter::emitIns_I_AX(instruction ins, emitAttr attr, int val, regNumber reg, unsigned mul, int disp)
@@ -4842,22 +4762,7 @@ void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNu
dispIns(id);
emitCurIGsize += sz;
-#if !FEATURE_FIXED_OUT_ARGS
-
- if (ins == INS_push)
- {
- emitCurStackLvl += emitCntStackDepth;
-
- if (emitMaxStackDepth < emitCurStackLvl)
- emitMaxStackDepth = emitCurStackLvl;
- }
- else if (ins == INS_pop)
- {
- emitCurStackLvl -= emitCntStackDepth;
- assert((int)emitCurStackLvl >= 0);
- }
-
-#endif // !FEATURE_FIXED_OUT_ARGS
+ emitAdjustStackDepthPushPop(ins);
}
/*****************************************************************************
@@ -4901,22 +4806,7 @@ void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs)
dispIns(id);
emitCurIGsize += sz;
-#if !FEATURE_FIXED_OUT_ARGS
-
- if (ins == INS_push)
- {
- emitCurStackLvl += emitCntStackDepth;
-
- if (emitMaxStackDepth < emitCurStackLvl)
- emitMaxStackDepth = emitCurStackLvl;
- }
- else if (ins == INS_pop)
- {
- emitCurStackLvl -= emitCntStackDepth;
- assert((int)emitCurStackLvl >= 0);
- }
-
-#endif // !FEATURE_FIXED_OUT_ARGS
+ emitAdjustStackDepthPushPop(ins);
}
void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs)
@@ -5197,8 +5087,23 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount /* = 0
dispIns(id);
emitCurIGsize += sz;
+ emitAdjustStackDepthPushPop(ins);
+}
+
#if !FEATURE_FIXED_OUT_ARGS
+//------------------------------------------------------------------------
+// emitAdjustStackDepthPushPop: Adjust the current and maximum stack depth.
+//
+// Arguments:
+// ins - the instruction. Only INS_push and INS_pop adjust the stack depth.
+//
+// Notes:
+// 1. Alters emitCurStackLvl and possibly emitMaxStackDepth.
+// 2. emitCntStackDepth must be set (0 in prolog/epilog, one DWORD elsewhere)
+//
+void emitter::emitAdjustStackDepthPushPop(instruction ins)
+{
if (ins == INS_push)
{
emitCurStackLvl += emitCntStackDepth;
@@ -5206,10 +5111,53 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount /* = 0
if (emitMaxStackDepth < emitCurStackLvl)
emitMaxStackDepth = emitCurStackLvl;
}
+ else if (ins == INS_pop)
+ {
+ emitCurStackLvl -= emitCntStackDepth;
+ assert((int)emitCurStackLvl >= 0);
+ }
+}
-#endif // !FEATURE_FIXED_OUT_ARGS
+//------------------------------------------------------------------------
+// emitAdjustStackDepth: Adjust the current and maximum stack depth.
+//
+// Arguments:
+// ins - the instruction. Only INS_add and INS_sub adjust the stack depth.
+// It is assumed that the add/sub is on the stack pointer.
+// val - the number of bytes to add to or subtract from the stack pointer.
+//
+// Notes:
+// 1. Alters emitCurStackLvl and possibly emitMaxStackDepth.
+// 2. emitCntStackDepth must be set (0 in prolog/epilog, one DWORD elsewhere)
+//
+void emitter::emitAdjustStackDepth(instruction ins, ssize_t val)
+{
+ // If we're in the prolog or epilog, or otherwise not tracking the stack depth, just return.
+ if (emitCntStackDepth == 0)
+ return;
+
+ if (ins == INS_sub)
+ {
+ S_UINT32 newStackLvl(emitCurStackLvl);
+ newStackLvl += S_UINT32(val);
+ noway_assert(!newStackLvl.IsOverflow());
+
+ emitCurStackLvl = newStackLvl.Value();
+
+ if (emitMaxStackDepth < emitCurStackLvl)
+ emitMaxStackDepth = emitCurStackLvl;
+ }
+ else if (ins == INS_add)
+ {
+ S_UINT32 newStackLvl = S_UINT32(emitCurStackLvl) - S_UINT32(val);
+ noway_assert(!newStackLvl.IsOverflow());
+
+ emitCurStackLvl = newStackLvl.Value();
+ }
}
+#endif // EMIT_TRACK_STACK_DEPTH
+
/*****************************************************************************
*
* Add a call instruction (direct or indirect).
@@ -5393,13 +5341,11 @@ void emitter::emitIns_Call(EmitCallType callType,
assert(argSize % sizeof(void*) == 0);
argCnt = (int)(argSize / (ssize_t)sizeof(void*)); // we need a signed-divide
-#ifdef DEBUGGING_SUPPORT
/* Managed RetVal: emit sequence point for the call */
if (emitComp->opts.compDbgInfo && ilOffset != BAD_IL_OFFSET)
{
codeGen->genIPmappingAdd(ilOffset, false);
}
-#endif
/*
We need to allocate the appropriate instruction descriptor based
@@ -5793,9 +5739,18 @@ const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varName)
return emitXMMregName(reg);
case EA_8BYTE:
+ if ((REG_XMM0 <= reg) && (reg <= REG_XMM15))
+ {
+ return emitXMMregName(reg);
+ }
break;
case EA_4BYTE:
+ if ((REG_XMM0 <= reg) && (reg <= REG_XMM15))
+ {
+ return emitXMMregName(reg);
+ }
+
if (reg > REG_R15)
{
break;
@@ -5880,10 +5835,24 @@ const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varName)
case EA_16BYTE:
return emitXMMregName(reg);
-#endif // LEGACY_BACKEND
+ case EA_8BYTE:
+ if ((REG_XMM0 <= reg) && (reg <= REG_XMM7))
+ {
+ return emitXMMregName(reg);
+ }
+ break;
+
+ case EA_4BYTE:
+ if ((REG_XMM0 <= reg) && (reg <= REG_XMM7))
+ {
+ return emitXMMregName(reg);
+ }
+ break;
+#else // LEGACY_BACKEND
case EA_4BYTE:
break;
+#endif // LEGACY_BACKEND
case EA_2BYTE:
rn++;
@@ -6661,9 +6630,9 @@ void emitter::emitDispIns(
printf(" %-9s", sstr);
}
#ifndef FEATURE_PAL
- if (strnlen_s(sstr, 10) > 8)
+ if (strnlen_s(sstr, 10) >= 8)
#else // FEATURE_PAL
- if (strnlen(sstr, 10) > 8)
+ if (strnlen(sstr, 10) >= 8)
#endif // FEATURE_PAL
{
printf(" ");
@@ -6808,17 +6777,8 @@ void emitter::emitDispIns(
case IF_RRD_ARD:
case IF_RWR_ARD:
case IF_RRW_ARD:
- if (IsAVXInstruction(ins))
- {
- printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), sstr);
- }
- else if (IsSSE2Instruction(ins))
- {
- printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), sstr);
- }
- else
#ifdef _TARGET_AMD64_
- if (ins == INS_movsxd)
+ if (ins == INS_movsxd)
{
printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), sstr);
}
@@ -6841,18 +6801,7 @@ void emitter::emitDispIns(
printf("%s", sstr);
emitDispAddrMode(id);
- if (IsAVXInstruction(ins))
- {
- printf(", %s", emitYMMregName((unsigned)id->idReg1()));
- }
- else if (IsSSE2Instruction(ins))
- {
- printf(", %s", emitXMMregName((unsigned)id->idReg1()));
- }
- else
- {
- printf(", %s", emitRegName(id->idReg1(), attr));
- }
+ printf(", %s", emitRegName(id->idReg1(), attr));
break;
case IF_ARD_CNS:
@@ -6930,18 +6879,7 @@ void emitter::emitDispIns(
emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
- if (IsAVXInstruction(ins))
- {
- printf(", %s", emitYMMregName((unsigned)id->idReg1()));
- }
- else if (IsSSE2Instruction(ins))
- {
- printf(", %s", emitXMMregName((unsigned)id->idReg1()));
- }
- else
- {
- printf(", %s", emitRegName(id->idReg1(), attr));
- }
+ printf(", %s", emitRegName(id->idReg1(), attr));
break;
case IF_SRD_CNS:
@@ -6983,17 +6921,8 @@ void emitter::emitDispIns(
case IF_RRD_SRD:
case IF_RWR_SRD:
case IF_RRW_SRD:
- if (IsAVXInstruction(ins))
- {
- printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), sstr);
- }
- else if (IsSSE2Instruction(ins))
- {
- printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), sstr);
- }
- else
#ifdef _TARGET_AMD64_
- if (ins == INS_movsxd)
+ if (ins == INS_movsxd)
{
printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), sstr);
}
@@ -7016,36 +6945,31 @@ void emitter::emitDispIns(
case IF_RRD_RRD:
case IF_RWR_RRD:
case IF_RRW_RRD:
-
if (ins == INS_mov_i2xmm)
{
- printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), emitRegName(id->idReg2(), attr));
+ printf("%s, %s", emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr));
}
else if (ins == INS_mov_xmm2i)
{
- printf("%s, %s", emitRegName(id->idReg2(), attr), emitXMMregName((unsigned)id->idReg1()));
+ printf("%s, %s", emitRegName(id->idReg2(), attr), emitRegName(id->idReg1(), EA_16BYTE));
+ }
+ else if (ins == INS_pmovmskb)
+ {
+ printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr));
}
#ifndef LEGACY_BACKEND
else if ((ins == INS_cvtsi2ss) || (ins == INS_cvtsi2sd))
{
- printf(" %s, %s", emitXMMregName((unsigned)id->idReg1()), emitRegName(id->idReg2(), attr));
+ printf(" %s, %s", emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr));
}
#endif
else if ((ins == INS_cvttsd2si)
#ifndef LEGACY_BACKEND
|| (ins == INS_cvtss2si) || (ins == INS_cvtsd2si) || (ins == INS_cvttss2si)
#endif
- )
- {
- printf(" %s, %s", emitRegName(id->idReg1(), attr), emitXMMregName((unsigned)id->idReg2()));
- }
- else if (IsAVXInstruction(ins))
- {
- printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), emitYMMregName((unsigned)id->idReg2()));
- }
- else if (IsSSE2Instruction(ins))
+ || 0)
{
- printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), emitXMMregName((unsigned)id->idReg2()));
+ printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
}
#ifdef _TARGET_AMD64_
else if (ins == INS_movsxd)
@@ -7079,16 +7003,8 @@ void emitter::emitDispIns(
break;
#endif
case IF_RRW_RRW_CNS:
- if (IsAVXInstruction(ins))
- {
- printf("%s,", emitYMMregName((unsigned)id->idReg1()), attr);
- printf(" %s", emitYMMregName((unsigned)id->idReg2()), attr);
- }
- else
- {
- printf("%s,", emitRegName(id->idReg1(), attr));
- printf(" %s", emitRegName(id->idReg2(), attr));
- }
+ printf("%s,", emitRegName(id->idReg1(), attr));
+ printf(" %s", emitRegName(id->idReg2(), attr));
val = emitGetInsSC(id);
#ifdef _TARGET_AMD64_
// no 8-byte immediates allowed here!
@@ -7133,18 +7049,7 @@ void emitter::emitDispIns(
attr = EA_PTRSIZE;
}
#endif
- if (IsAVXInstruction(ins))
- {
- printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), sstr);
- }
- else if (IsSSE2Instruction(ins))
- {
- printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), sstr);
- }
- else
- {
- printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
- }
+ printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
offs = emitGetInsDsp(id);
emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
break;
@@ -7521,7 +7426,7 @@ static BYTE* emitOutputNOP(BYTE* dst, size_t nBytes)
* Output an instruction involving an address mode.
*/
-BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc)
+BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
{
regNumber reg;
regNumber rgx;
@@ -7543,7 +7448,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc)
// Special case: call via a register
if (id->idIsCallRegPtr())
{
- size_t opcode = insEncodeMRreg(INS_call, reg, EA_PTRSIZE, insCodeMR(INS_call));
+ code_t opcode = insEncodeMRreg(INS_call, reg, EA_PTRSIZE, insCodeMR(INS_call));
dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, opcode);
dst += emitOutputWord(dst, opcode);
@@ -7559,13 +7464,15 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc)
if (IsExtendedReg(reg, EA_PTRSIZE))
{
insEncodeReg012(ins, reg, EA_PTRSIZE, &code);
- reg = RegEncoding(reg);
+ // TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
+ reg = (regNumber)RegEncoding(reg);
}
if (IsExtendedReg(rgx, EA_PTRSIZE))
{
insEncodeRegSIB(ins, rgx, &code);
- rgx = RegEncoding(rgx);
+ // TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
+ rgx = (regNumber)RegEncoding(rgx);
}
// And emit the REX prefix
@@ -7605,7 +7512,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc)
// For this format, moves do not support a third operand, so we only need to handle the binary ops.
if (IsThreeOperandBinaryAVXInstruction(ins))
{
- // Encode source operand reg in 'vvvv' bits in 1's compliement form
+ // Encode source operand reg in 'vvvv' bits in 1's complement form
// The order of operands are reversed, therefore use reg2 as the source.
code = insEncodeReg3456(ins, id->idReg1(), size, code);
}
@@ -7619,13 +7526,15 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc)
if (IsExtendedReg(reg, EA_PTRSIZE))
{
insEncodeReg012(ins, reg, EA_PTRSIZE, &code);
- reg = RegEncoding(reg);
+ // TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
+ reg = (regNumber)RegEncoding(reg);
}
if (IsExtendedReg(rgx, EA_PTRSIZE))
{
insEncodeRegSIB(ins, rgx, &code);
- rgx = RegEncoding(rgx);
+ // TODO-Cleanup: stop casting RegEncoding() back to a regNumber.
+ rgx = (regNumber)RegEncoding(rgx);
}
// Is this a 'big' opcode?
@@ -8185,7 +8094,7 @@ DONE:
* Output an instruction involving a stack frame value.
*/
-BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc)
+BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
{
int adr;
int dsp;
@@ -8234,7 +8143,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc)
// Special case emitting AVX instructions
if (Is4ByteAVXInstruction(ins))
{
- size_t regcode = insEncodeReg345(ins, id->idReg1(), size, &code);
+ unsigned regcode = insEncodeReg345(ins, id->idReg1(), size, &code);
dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
// Emit last opcode byte
@@ -8581,7 +8490,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc)
* Output an instruction with a static data member (class variable).
*/
-BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc)
+BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
{
BYTE* addr;
CORINFO_FIELD_HANDLE fldh;
@@ -8646,20 +8555,18 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc)
{
case IF_RWR_MRD:
- assert((unsigned)code ==
- (insCodeRM(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500));
+ assert(code == (insCodeRM(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500));
- code &= ~((size_t)0xFFFFFFFF);
+ code &= ~((code_t)0xFFFFFFFF);
code |= 0xA0;
isMoffset = true;
break;
case IF_MWR_RRD:
- assert((unsigned)code ==
- (insCodeMR(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500));
+ assert(code == (insCodeMR(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500));
- code &= ~((size_t)0xFFFFFFFF);
+ code &= ~((code_t)0xFFFFFFFF);
code |= 0xA2;
isMoffset = true;
break;
@@ -8674,7 +8581,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc)
// Special case emitting AVX instructions
if (Is4ByteAVXInstruction(ins))
{
- size_t regcode = insEncodeReg345(ins, id->idReg1(), size, &code);
+ unsigned regcode = insEncodeReg345(ins, id->idReg1(), size, &code);
dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
// Emit last opcode byte
@@ -9017,7 +8924,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc)
BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id)
{
- size_t code;
+ code_t code;
instruction ins = id->idIns();
regNumber reg = id->idReg1();
@@ -9228,7 +9135,7 @@ BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id)
BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
{
- size_t code;
+ code_t code;
instruction ins = id->idIns();
regNumber reg1 = id->idReg1();
@@ -9238,7 +9145,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
// Get the 'base' opcode
code = insCodeRM(ins);
code = AddVexPrefixIfNeeded(ins, code, size);
- if (IsSSE2Instruction(ins) || IsAVXInstruction(ins))
+ if (IsSSEOrAVXInstruction(ins))
{
code = insEncodeRMreg(ins, code);
@@ -9322,12 +9229,12 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
// now we use the single source as source1 and source2.
if (IsThreeOperandBinaryAVXInstruction(ins))
{
- // encode source/dest operand reg in 'vvvv' bits in 1's compliement form
+ // encode source/dest operand reg in 'vvvv' bits in 1's complement form
code = insEncodeReg3456(ins, reg1, size, code);
}
else if (IsThreeOperandMoveAVXInstruction(ins))
{
- // encode source operand reg in 'vvvv' bits in 1's compliement form
+ // encode source operand reg in 'vvvv' bits in 1's complement form
code = insEncodeReg3456(ins, reg2, size, code);
}
@@ -9340,6 +9247,13 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
// Output the highest word of the opcode
dst += emitOutputWord(dst, code >> 16);
code &= 0x0000FFFF;
+
+ if (Is4ByteSSE4Instruction(ins))
+ {
+ // Output 3rd byte of the opcode
+ dst += emitOutputByte(dst, code);
+ code &= 0xFF00;
+ }
}
else if (code & 0x00FF0000)
{
@@ -9349,13 +9263,13 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
// If byte 4 is 0xC0, then it contains the Mod/RM encoding for a 3-byte
// encoding. Otherwise, this is an instruction with a 4-byte encoding,
- // and the MOd/RM encoding needs to go in the 5th byte.
+ // and the Mod/RM encoding needs to go in the 5th byte.
// TODO-XArch-CQ: Currently, this will only support registers in the 5th byte.
// We probably need a different mechanism to identify the 4-byte encodings.
if ((code & 0xFF) == 0x00)
{
- // This case happens for AVX instructions only
- assert(IsAVXInstruction(ins));
+ // This case happens for SSE4/AVX instructions only
+ assert(IsAVXInstruction(ins) || IsSSE4Instruction(ins));
if ((code & 0xFF00) == 0xC000)
{
dst += emitOutputByte(dst, (0xC0 | regCode));
@@ -9560,7 +9474,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
#ifdef FEATURE_AVX_SUPPORT
BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id)
{
- size_t code;
+ code_t code;
instruction ins = id->idIns();
assert(IsAVXInstruction(ins));
@@ -9642,7 +9556,7 @@ BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id)
BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id)
{
- size_t code;
+ code_t code;
emitAttr size = id->idOpSize();
instruction ins = id->idIns();
regNumber reg = id->idReg1();
@@ -10004,7 +9918,7 @@ DONE:
BYTE* emitter::emitOutputIV(BYTE* dst, instrDesc* id)
{
- size_t code;
+ code_t code;
instruction ins = id->idIns();
emitAttr size = id->idOpSize();
ssize_t val = emitGetInsSC(id);
@@ -10286,27 +10200,29 @@ BYTE* emitter::emitOutputLJ(BYTE* dst, instrDesc* i)
}
else
{
- size_t code;
+ code_t code;
// Long jump
if (jmp)
{
+ // clang-format off
assert(INS_jmp + (INS_l_jmp - INS_jmp) == INS_l_jmp);
- assert(INS_jo + (INS_l_jmp - INS_jmp) == INS_l_jo);
- assert(INS_jb + (INS_l_jmp - INS_jmp) == INS_l_jb);
+ assert(INS_jo + (INS_l_jmp - INS_jmp) == INS_l_jo);
+ assert(INS_jb + (INS_l_jmp - INS_jmp) == INS_l_jb);
assert(INS_jae + (INS_l_jmp - INS_jmp) == INS_l_jae);
- assert(INS_je + (INS_l_jmp - INS_jmp) == INS_l_je);
+ assert(INS_je + (INS_l_jmp - INS_jmp) == INS_l_je);
assert(INS_jne + (INS_l_jmp - INS_jmp) == INS_l_jne);
assert(INS_jbe + (INS_l_jmp - INS_jmp) == INS_l_jbe);
- assert(INS_ja + (INS_l_jmp - INS_jmp) == INS_l_ja);
- assert(INS_js + (INS_l_jmp - INS_jmp) == INS_l_js);
+ assert(INS_ja + (INS_l_jmp - INS_jmp) == INS_l_ja);
+ assert(INS_js + (INS_l_jmp - INS_jmp) == INS_l_js);
assert(INS_jns + (INS_l_jmp - INS_jmp) == INS_l_jns);
assert(INS_jpe + (INS_l_jmp - INS_jmp) == INS_l_jpe);
assert(INS_jpo + (INS_l_jmp - INS_jmp) == INS_l_jpo);
- assert(INS_jl + (INS_l_jmp - INS_jmp) == INS_l_jl);
+ assert(INS_jl + (INS_l_jmp - INS_jmp) == INS_l_jl);
assert(INS_jge + (INS_l_jmp - INS_jmp) == INS_l_jge);
assert(INS_jle + (INS_l_jmp - INS_jmp) == INS_l_jle);
- assert(INS_jg + (INS_l_jmp - INS_jmp) == INS_l_jg);
+ assert(INS_jg + (INS_l_jmp - INS_jmp) == INS_l_jg);
+ // clang-format on
code = insCode((instruction)(ins + (INS_l_jmp - INS_jmp)));
}
@@ -10452,10 +10368,10 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
// What instruction format have we got?
switch (id->idInsFmt())
{
- size_t code;
- size_t regcode;
- int args;
- CnsVal cnsVal;
+ code_t code;
+ unsigned regcode;
+ int args;
+ CnsVal cnsVal;
BYTE* addr;
bool recCall;
@@ -10792,6 +10708,10 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
dst += emitOutputWord(dst, code);
dst += emitOutputByte(dst, emitGetInsSC(id));
sz = emitSizeOfInsDsc(id);
+
+ // Update GC info.
+ assert(!id->idGCref());
+ emitGCregDeadUpd(id->idReg1(), dst);
break;
case IF_RRD_RRD:
@@ -10871,7 +10791,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
// Output the REX prefix
dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
- if (UseAVX() && Is4ByteAVXInstruction(ins))
+ if (Is4ByteAVXInstruction(ins))
{
// We just need to output the last byte of the opcode.
assert((code & 0xFF) == 0);
@@ -10883,6 +10803,12 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
{
dst += emitOutputWord(dst, code >> 16);
code &= 0x0000FFFF;
+
+ if (Is4ByteSSE4Instruction(ins))
+ {
+ dst += emitOutputWord(dst, code);
+ code = 0;
+ }
}
else if (code & 0x00FF0000)
{
@@ -10898,9 +10824,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
}
else
{
- // This case occurs for AVX instructions.
+ // This case occurs for SSE4/AVX instructions.
// Note that regcode is left shifted by 8-bits.
- assert(Is4ByteAVXInstruction(ins));
+ assert(Is4ByteAVXInstruction(ins) || Is4ByteSSE4Instruction(ins));
dst += emitOutputByte(dst, 0xC0 | (regcode >> 8));
}
diff --git a/src/jit/emitxarch.h b/src/jit/emitxarch.h
index dfd7e6ec50..98256cdaa7 100644
--- a/src/jit/emitxarch.h
+++ b/src/jit/emitxarch.h
@@ -28,6 +28,15 @@ inline static bool isDoubleReg(regNumber reg)
/* Routines that compute the size of / encode instructions */
/************************************************************************/
+// code_t is a type used to accumulate bits of opcode + prefixes. On amd64, it must be 64 bits
+// to support the REX prefixes. On both x86 and amd64, it must be 64 bits to support AVX, with
+// its 3-byte VEX prefix. For legacy backend (which doesn't support AVX), leave it as size_t.
+#if defined(LEGACY_BACKEND)
+typedef size_t code_t;
+#else // !defined(LEGACY_BACKEND)
+typedef unsigned __int64 code_t;
+#endif // !defined(LEGACY_BACKEND)
+
struct CnsVal
{
ssize_t cnsVal;
@@ -36,19 +45,19 @@ struct CnsVal
#endif
};
-UNATIVE_OFFSET emitInsSize(size_t code);
+UNATIVE_OFFSET emitInsSize(code_t code);
UNATIVE_OFFSET emitInsSizeRM(instruction ins);
-UNATIVE_OFFSET emitInsSizeSV(size_t code, int var, int dsp);
+UNATIVE_OFFSET emitInsSizeSV(code_t code, int var, int dsp);
UNATIVE_OFFSET emitInsSizeSV(instrDesc* id, int var, int dsp, int val);
UNATIVE_OFFSET emitInsSizeRR(instruction ins, regNumber reg1, regNumber reg2, emitAttr attr);
-UNATIVE_OFFSET emitInsSizeAM(instrDesc* id, size_t code);
-UNATIVE_OFFSET emitInsSizeAM(instrDesc* id, size_t code, int val);
-UNATIVE_OFFSET emitInsSizeCV(instrDesc* id, size_t code);
-UNATIVE_OFFSET emitInsSizeCV(instrDesc* id, size_t code, int val);
+UNATIVE_OFFSET emitInsSizeAM(instrDesc* id, code_t code);
+UNATIVE_OFFSET emitInsSizeAM(instrDesc* id, code_t code, int val);
+UNATIVE_OFFSET emitInsSizeCV(instrDesc* id, code_t code);
+UNATIVE_OFFSET emitInsSizeCV(instrDesc* id, code_t code, int val);
-BYTE* emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc = nullptr);
-BYTE* emitOutputSV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc = nullptr);
-BYTE* emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc = nullptr);
+BYTE* emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc = nullptr);
+BYTE* emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc = nullptr);
+BYTE* emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc = nullptr);
BYTE* emitOutputR(BYTE* dst, instrDesc* id);
BYTE* emitOutputRI(BYTE* dst, instrDesc* id);
@@ -61,42 +70,60 @@ BYTE* emitOutputRRR(BYTE* dst, instrDesc* id);
BYTE* emitOutputLJ(BYTE* dst, instrDesc* id);
-unsigned emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, size_t& code);
+unsigned emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code);
unsigned emitGetRexPrefixSize(instruction ins);
unsigned emitGetVexPrefixSize(instruction ins, emitAttr attr);
-unsigned emitGetPrefixSize(size_t code);
-unsigned emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, size_t code);
+unsigned emitGetPrefixSize(code_t code);
+unsigned emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, code_t code);
+
+unsigned insEncodeReg012(instruction ins, regNumber reg, emitAttr size, code_t* code);
+unsigned insEncodeReg345(instruction ins, regNumber reg, emitAttr size, code_t* code);
+code_t insEncodeReg3456(instruction ins, regNumber reg, emitAttr size, code_t code);
+unsigned insEncodeRegSIB(instruction ins, regNumber reg, code_t* code);
-unsigned insEncodeReg345(instruction ins, regNumber reg, emitAttr size, size_t* code);
-unsigned insEncodeReg012(instruction ins, regNumber reg, emitAttr size, size_t* code);
-size_t insEncodeReg3456(instruction ins, regNumber reg, emitAttr size, size_t code);
-unsigned insEncodeRegSIB(instruction ins, regNumber reg, size_t* code);
+code_t insEncodeMRreg(instruction ins, code_t code);
+code_t insEncodeRMreg(instruction ins, code_t code);
+code_t insEncodeMRreg(instruction ins, regNumber reg, emitAttr size, code_t code);
+code_t insEncodeRRIb(instruction ins, regNumber reg, emitAttr size);
+code_t insEncodeOpreg(instruction ins, regNumber reg, emitAttr size);
-size_t insEncodeMRreg(instruction ins, size_t code);
-size_t insEncodeMRreg(instruction ins, regNumber reg, emitAttr size, size_t code);
-size_t insEncodeRRIb(instruction ins, regNumber reg, emitAttr size);
-size_t insEncodeOpreg(instruction ins, regNumber reg, emitAttr size);
+unsigned insSSval(unsigned scale);
bool IsAVXInstruction(instruction ins);
-size_t insEncodeMIreg(instruction ins, regNumber reg, emitAttr size, size_t code);
+code_t insEncodeMIreg(instruction ins, regNumber reg, emitAttr size, code_t code);
-size_t AddRexWPrefix(instruction ins, size_t code);
-size_t AddRexRPrefix(instruction ins, size_t code);
-size_t AddRexXPrefix(instruction ins, size_t code);
-size_t AddRexBPrefix(instruction ins, size_t code);
-size_t AddRexPrefix(instruction ins, size_t code);
+code_t AddRexWPrefix(instruction ins, code_t code);
+code_t AddRexRPrefix(instruction ins, code_t code);
+code_t AddRexXPrefix(instruction ins, code_t code);
+code_t AddRexBPrefix(instruction ins, code_t code);
+code_t AddRexPrefix(instruction ins, code_t code);
+
+bool useSSE3_4Encodings;
+bool UseSSE3_4()
+{
+ return useSSE3_4Encodings;
+}
+void SetUseSSE3_4(bool value)
+{
+ useSSE3_4Encodings = value;
+}
+bool Is4ByteSSE4Instruction(instruction ins);
#ifdef FEATURE_AVX_SUPPORT
+
// 3-byte VEX prefix starts with byte 0xC4
-#define VEX_PREFIX_MASK_3BYTE 0xC4000000000000LL
+#define VEX_PREFIX_MASK_3BYTE 0xFF000000000000ULL
+#define VEX_PREFIX_CODE_3BYTE 0xC4000000000000ULL
+
bool TakesVexPrefix(instruction ins);
+
// Returns true if the instruction encoding already contains VEX prefix
-bool hasVexPrefix(size_t code)
+bool hasVexPrefix(code_t code)
{
- return (code & VEX_PREFIX_MASK_3BYTE) != 0;
+ return (code & VEX_PREFIX_MASK_3BYTE) == VEX_PREFIX_CODE_3BYTE;
}
-size_t AddVexPrefix(instruction ins, size_t code, emitAttr attr);
-size_t AddVexPrefixIfNeeded(instruction ins, size_t code, emitAttr size)
+code_t AddVexPrefix(instruction ins, code_t code, emitAttr attr);
+code_t AddVexPrefixIfNeeded(instruction ins, code_t code, emitAttr size)
{
if (TakesVexPrefix(ins))
{
@@ -104,7 +131,7 @@ size_t AddVexPrefixIfNeeded(instruction ins, size_t code, emitAttr size)
}
return code;
}
-size_t AddVexPrefixIfNeededAndNotPresent(instruction ins, size_t code, emitAttr size)
+code_t AddVexPrefixIfNeededAndNotPresent(instruction ins, code_t code, emitAttr size)
{
if (TakesVexPrefix(ins) && !hasVexPrefix(code))
{
@@ -112,6 +139,7 @@ size_t AddVexPrefixIfNeededAndNotPresent(instruction ins, size_t code, emitAttr
}
return code;
}
+
bool useAVXEncodings;
bool UseAVX()
{
@@ -121,18 +149,20 @@ void SetUseAVX(bool value)
{
useAVXEncodings = value;
}
+
bool IsThreeOperandBinaryAVXInstruction(instruction ins);
bool IsThreeOperandMoveAVXInstruction(instruction ins);
bool IsThreeOperandAVXInstruction(instruction ins)
{
return (IsThreeOperandBinaryAVXInstruction(ins) || IsThreeOperandMoveAVXInstruction(ins));
}
+bool Is4ByteAVXInstruction(instruction ins);
#else // !FEATURE_AVX_SUPPORT
-bool UseAVX()
+bool UseAVX()
{
return false;
}
-bool hasVexPrefix(size_t code)
+bool hasVexPrefix(code_t code)
{
return false;
}
@@ -148,15 +178,19 @@ bool IsThreeOperandAVXInstruction(instruction ins)
{
return false;
}
+bool Is4ByteAVXInstruction(instruction ins)
+{
+ return false;
+}
bool TakesVexPrefix(instruction ins)
{
return false;
}
-size_t AddVexPrefixIfNeeded(instruction ins, size_t code, emitAttr attr)
+code_t AddVexPrefixIfNeeded(instruction ins, code_t code, emitAttr attr)
{
return code;
}
-size_t AddVexPrefixIfNeededAndNotPresent(instruction ins, size_t code, emitAttr size)
+code_t AddVexPrefixIfNeededAndNotPresent(instruction ins, code_t code, emitAttr size)
{
return code;
}
@@ -226,6 +260,18 @@ bool emitVerifyEncodable(instruction ins, emitAttr size, regNumber reg1, regNumb
bool emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id);
+#if FEATURE_FIXED_OUT_ARGS
+void emitAdjustStackDepthPushPop(instruction ins)
+{
+}
+void emitAdjustStackDepth(instruction ins, ssize_t val)
+{
+}
+#else // !FEATURE_FIXED_OUT_ARGS
+void emitAdjustStackDepthPushPop(instruction ins);
+void emitAdjustStackDepth(instruction ins, ssize_t val);
+#endif // !FEATURE_FIXED_OUT_ARGS
+
/*****************************************************************************
*
* Convert between an index scale in bytes to a smaller encoding used for
diff --git a/src/jit/error.cpp b/src/jit/error.cpp
index 71c3301045..f42dcef5c6 100644
--- a/src/jit/error.cpp
+++ b/src/jit/error.cpp
@@ -129,7 +129,7 @@ void noWayAssertBodyConditional(
}
}
-#if !defined(_TARGET_X86_) || !defined(LEGACY_BACKEND)
+#if defined(ALT_JIT) && (!defined(_TARGET_X86_) || !defined(LEGACY_BACKEND))
/*****************************************************************************/
void notYetImplemented(const char* msg, const char* filename, unsigned line)
@@ -193,7 +193,7 @@ void notYetImplemented(const char* msg, const char* filename, unsigned line)
}
}
-#endif // #if !defined(_TARGET_X86_) || !defined(LEGACY_BACKEND)
+#endif // #if defined(ALT_JIT) && (!defined(_TARGET_X86_) || !defined(LEGACY_BACKEND))
/*****************************************************************************/
LONG __JITfilter(PEXCEPTION_POINTERS pExceptionPointers, LPVOID lpvParam)
diff --git a/src/jit/error.h b/src/jit/error.h
index c56971aaf7..0535601055 100644
--- a/src/jit/error.h
+++ b/src/jit/error.h
@@ -58,10 +58,11 @@ extern LONG __JITfilter(PEXCEPTION_POINTERS pExceptionPointers, LPVOID lpvParam)
/*****************************************************************************/
+// clang-format off
+
extern void debugError(const char* msg, const char* file, unsigned line);
extern void DECLSPEC_NORETURN badCode();
-extern void DECLSPEC_NORETURN
-badCode3(const char* msg, const char* msg2, int arg, __in_z const char* file, unsigned line);
+extern void DECLSPEC_NORETURN badCode3(const char* msg, const char* msg2, int arg, __in_z const char* file, unsigned line);
extern void DECLSPEC_NORETURN noWay();
extern void DECLSPEC_NORETURN NOMEM();
extern void DECLSPEC_NORETURN fatal(int errCode);
@@ -79,120 +80,6 @@ extern void noWayAssertBodyConditional(
);
extern void noWayAssertBodyConditional(const char* cond, const char* file, unsigned line);
-#if !defined(_TARGET_X86_) || !defined(LEGACY_BACKEND)
-
-// This guy can return based on Config flag/Debugger
-extern void notYetImplemented(const char* msg, const char* file, unsigned line);
-#define NYI(msg) notYetImplemented("NYI: " #msg, __FILE__, __LINE__)
-#define NYI_IF(cond, msg) \
- if (cond) \
- notYetImplemented("NYI: " #msg, __FILE__, __LINE__)
-
-#ifdef _TARGET_AMD64_
-
-#define NYI_AMD64(msg) notYetImplemented("NYI_AMD64: " #msg, __FILE__, __LINE__)
-#define NYI_X86(msg) \
- do \
- { \
- } while (0)
-#define NYI_ARM(msg) \
- do \
- { \
- } while (0)
-#define NYI_ARM64(msg) \
- do \
- { \
- } while (0)
-
-#elif defined(_TARGET_X86_)
-
-#define NYI_AMD64(msg) \
- do \
- { \
- } while (0)
-#define NYI_X86(msg) notYetImplemented("NYI_X86: " #msg, __FILE__, __LINE__)
-#define NYI_ARM(msg) \
- do \
- { \
- } while (0)
-#define NYI_ARM64(msg) \
- do \
- { \
- } while (0)
-
-#elif defined(_TARGET_ARM_)
-
-#define NYI_AMD64(msg) \
- do \
- { \
- } while (0)
-#define NYI_X86(msg) \
- do \
- { \
- } while (0)
-#define NYI_ARM(msg) notYetImplemented("NYI_ARM: " #msg, __FILE__, __LINE__)
-#define NYI_ARM64(msg) \
- do \
- { \
- } while (0)
-
-#elif defined(_TARGET_ARM64_)
-
-#define NYI_AMD64(msg) \
- do \
- { \
- } while (0)
-#define NYI_X86(msg) \
- do \
- { \
- } while (0)
-#define NYI_ARM(msg) \
- do \
- { \
- } while (0)
-#define NYI_ARM64(msg) notYetImplemented("NYI_ARM64: " #msg, __FILE__, __LINE__)
-
-#else
-
-#error "Unknown platform, not x86, ARM, or AMD64?"
-
-#endif
-
-#else // defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
-
-#define NYI(msg) assert(!msg)
-#define NYI_AMD64(msg) \
- do \
- { \
- } while (0)
-#define NYI_ARM(msg) \
- do \
- { \
- } while (0)
-#define NYI_ARM64(msg) \
- do \
- { \
- } while (0)
-
-#endif // _TARGET_X86_
-
-#if !defined(_TARGET_X86_) && !defined(FEATURE_STACK_FP_X87)
-#define NYI_FLAT_FP_X87(msg) notYetImplemented("NYI: " #msg, __FILE__, __LINE__)
-#define NYI_FLAT_FP_X87_NC(msg) notYetImplemented("NYI: " #msg, __FILE__, __LINE__)
-
-#else
-
-#define NYI_FLAT_FP_X87(msg) \
- do \
- { \
- } while (0)
-#define NYI_FLAT_FP_X87_NC(msg) \
- do \
- { \
- } while (0)
-
-#endif // !_TARGET_X86_ && !FEATURE_STACK_FP_X87
-
#ifdef DEBUG
#define NO_WAY(msg) (debugError(msg, __FILE__, __LINE__), noWay())
// Used for fallback stress mode
@@ -210,6 +97,8 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line);
} while (0)
#define unreached() noWayAssertBody("unreached", __FILE__, __LINE__)
+#define NOWAY_MSG(msg) noWayAssertBodyConditional(msg, __FILE__, __LINE__)
+
#else
#define NO_WAY(msg) noWay()
@@ -232,6 +121,8 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line);
} while (0)
#define unreached() noWayAssertBody()
+#define NOWAY_MSG(msg) noWayAssertBodyConditional(NOWAY_ASSERT_BODY_ARGUMENTS)
+
#endif
// IMPL_LIMITATION is called when we encounter valid IL that is not
@@ -239,7 +130,81 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line);
// limitations (that could be removed in the future)
#define IMPL_LIMITATION(msg) NO_WAY(msg)
-#if defined(_HOST_X86_)
+#if !defined(_TARGET_X86_) || !defined(LEGACY_BACKEND)
+
+#if defined(ALT_JIT)
+
+// This guy can return based on Config flag/Debugger
+extern void notYetImplemented(const char* msg, const char* file, unsigned line);
+#define NYIRAW(msg) notYetImplemented(msg, __FILE__, __LINE__)
+
+#else // !defined(ALT_JIT)
+
+#define NYIRAW(msg) NOWAY_MSG(msg)
+
+#endif // !defined(ALT_JIT)
+
+#define NYI(msg) NYIRAW("NYI: " msg)
+#define NYI_IF(cond, msg) if (cond) NYIRAW("NYI: " msg)
+
+#ifdef _TARGET_AMD64_
+
+#define NYI_AMD64(msg) NYIRAW("NYI_AMD64: " msg)
+#define NYI_X86(msg) do { } while (0)
+#define NYI_ARM(msg) do { } while (0)
+#define NYI_ARM64(msg) do { } while (0)
+
+#elif defined(_TARGET_X86_)
+
+#define NYI_AMD64(msg) do { } while (0)
+#define NYI_X86(msg) NYIRAW("NYI_X86: " msg)
+#define NYI_ARM(msg) do { } while (0)
+#define NYI_ARM64(msg) do { } while (0)
+
+#elif defined(_TARGET_ARM_)
+
+#define NYI_AMD64(msg) do { } while (0)
+#define NYI_X86(msg) do { } while (0)
+#define NYI_ARM(msg) NYIRAW("NYI_ARM: " msg)
+#define NYI_ARM64(msg) do { } while (0)
+
+#elif defined(_TARGET_ARM64_)
+
+#define NYI_AMD64(msg) do { } while (0)
+#define NYI_X86(msg) do { } while (0)
+#define NYI_ARM(msg) do { } while (0)
+#define NYI_ARM64(msg) NYIRAW("NYI_ARM64: " msg)
+
+#else
+
+#error "Unknown platform, not x86, ARM, or AMD64?"
+
+#endif
+
+#else // NYI not available; make it an assert.
+
+#define NYI(msg) assert(!msg)
+#define NYI_AMD64(msg) do { } while (0)
+#define NYI_ARM(msg) do { } while (0)
+#define NYI_ARM64(msg) do { } while (0)
+
+#endif // NYI not available
+
+#if !defined(_TARGET_X86_) && !defined(FEATURE_STACK_FP_X87)
+
+#define NYI_FLAT_FP_X87(msg) NYI(msg)
+#define NYI_FLAT_FP_X87_NC(msg) NYI(msg)
+
+#else
+
+#define NYI_FLAT_FP_X87(msg) do { } while (0)
+#define NYI_FLAT_FP_X87_NC(msg) do { } while (0)
+
+#endif // !_TARGET_X86_ && !FEATURE_STACK_FP_X87
+
+// clang-format on
+
+#if defined(_HOST_X86_) && !defined(FEATURE_PAL)
// While debugging in an Debugger, the "int 3" will cause the program to break
// Outside, the exception handler will just filter out the "int 3".
diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp
index 1c68bfd96a..441569c339 100644
--- a/src/jit/flowgraph.cpp
+++ b/src/jit/flowgraph.cpp
@@ -2985,8 +2985,8 @@ void Compiler::fgRemovePreds()
{
C_ASSERT(offsetof(BasicBlock, bbPreds) ==
offsetof(BasicBlock, bbCheapPreds)); // bbPreds and bbCheapPreds are at the same place in a union,
- C_ASSERT(sizeof(((BasicBlock*)0)->bbPreds) ==
- sizeof(((BasicBlock*)0)->bbCheapPreds)); // and are the same size. So, this function removes both.
+ C_ASSERT(sizeof(((BasicBlock*)nullptr)->bbPreds) ==
+ sizeof(((BasicBlock*)nullptr)->bbCheapPreds)); // and are the same size. So, this function removes both.
for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
{
@@ -3890,8 +3890,7 @@ bool Compiler::fgCreateGCPoll(GCPollType pollType, BasicBlock* block)
BBjumpKinds oldJumpKind = top->bbJumpKind;
// Update block flags
- unsigned originalFlags;
- originalFlags = top->bbFlags | BBF_GC_SAFE_POINT;
+ const unsigned __int64 originalFlags = top->bbFlags | BBF_GC_SAFE_POINT;
// Unlike Fei's inliner from puclr, I'm allowed to split loops.
// And we keep a few other flags...
@@ -4269,6 +4268,7 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE*
const bool isForceInline = (info.compFlags & CORINFO_FLG_FORCEINLINE) != 0;
const bool makeInlineObservations = (compInlineResult != nullptr);
const bool isInlining = compIsForInlining();
+ unsigned retBlocks = 0;
if (makeInlineObservations)
{
@@ -4638,6 +4638,7 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE*
break;
case CEE_JMP:
+ retBlocks++;
#if !defined(_TARGET_X86_) && !defined(_TARGET_ARM_)
if (!isInlining)
@@ -4730,6 +4731,8 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE*
fgObserveInlineConstants(opcode, pushedStack, isInlining);
}
break;
+ case CEE_RET:
+ retBlocks++;
default:
break;
@@ -4758,6 +4761,27 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE*
{
compInlineResult->Note(InlineObservation::CALLEE_END_OPCODE_SCAN);
+ if (!compInlineResult->UsesLegacyPolicy())
+ {
+ // If there are no return blocks we know it does not return, however if there
+ // return blocks we don't know it returns as it may be counting unreachable code.
+ // However we will still make the CALLEE_DOES_NOT_RETURN observation.
+
+ compInlineResult->NoteBool(InlineObservation::CALLEE_DOES_NOT_RETURN, retBlocks == 0);
+
+ if (retBlocks == 0 && isInlining)
+ {
+ // Mark the call node as "no return" as it can impact caller's code quality.
+ impInlineInfo->iciCall->gtCallMoreFlags |= GTF_CALL_M_DOES_NOT_RETURN;
+ }
+ }
+
+ // Determine if call site is within a try.
+ if (isInlining && impInlineInfo->iciBlock->hasTryIndex())
+ {
+ compInlineResult->Note(InlineObservation::CALLSITE_IN_TRY_REGION);
+ }
+
// If the inline is viable and discretionary, do the
// profitability screening.
if (compInlineResult->IsDiscretionaryCandidate())
@@ -5062,22 +5086,23 @@ void Compiler::fgLinkBasicBlocks()
/*****************************************************************************
*
- * Walk the instrs to create the basic blocks.
+ * Walk the instrs to create the basic blocks. Returns the number of BBJ_RETURN in method
*/
-void Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* jumpTarget)
+unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* jumpTarget)
{
+ unsigned retBlocks;
const BYTE* codeBegp = codeAddr;
const BYTE* codeEndp = codeAddr + codeSize;
bool tailCall = false;
unsigned curBBoffs;
BasicBlock* curBBdesc;
+ retBlocks = 0;
/* Clear the beginning offset for the first BB */
curBBoffs = 0;
-#ifdef DEBUGGING_SUPPORT
if (opts.compDbgCode && (info.compVarScopesCount > 0))
{
compResetScopeLists();
@@ -5090,7 +5115,6 @@ void Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE*
{ /* do nothing */
}
}
-#endif
BBjumpKinds jmpKind;
@@ -5280,7 +5304,8 @@ void Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE*
// TODO-CQ: We can inline some callees with explicit tail calls if we can guarantee that the calls
// can be dispatched as tail calls from the caller.
compInlineResult->NoteFatal(InlineObservation::CALLEE_EXPLICIT_TAIL_PREFIX);
- return;
+ retBlocks++;
+ return retBlocks;
}
__fallthrough;
@@ -5391,6 +5416,7 @@ void Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE*
But instead of directly returning to the caller we jump and
execute something else in between */
case CEE_RET:
+ retBlocks++;
jmpKind = BBJ_RETURN;
break;
@@ -5473,8 +5499,6 @@ void Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE*
nxtBBoffs = (IL_OFFSET)(codeAddr - codeBegp);
-#ifdef DEBUGGING_SUPPORT
-
bool foundScope = false;
if (opts.compDbgCode && (info.compVarScopesCount > 0))
@@ -5488,7 +5512,6 @@ void Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE*
foundScope = true;
}
}
-#endif
/* Do we have a jump? */
@@ -5505,7 +5528,6 @@ void Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE*
bool makeBlock = (jumpTarget[nxtBBoffs] != JT_NONE);
-#ifdef DEBUGGING_SUPPORT
if (!makeBlock && foundScope)
{
makeBlock = true;
@@ -5516,7 +5538,6 @@ void Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE*
}
#endif // DEBUG
}
-#endif // DEBUGGING_SUPPORT
if (!makeBlock)
{
@@ -5581,6 +5602,8 @@ void Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE*
/* Finally link up the bbJumpDest of the blocks together */
fgLinkBasicBlocks();
+
+ return retBlocks;
}
/*****************************************************************************
@@ -5726,44 +5749,23 @@ void Compiler::fgFindBasicBlocks()
/* Now create the basic blocks */
- fgMakeBasicBlocks(info.compCode, info.compILCodeSize, jumpTarget);
+ unsigned retBlocks = fgMakeBasicBlocks(info.compCode, info.compILCodeSize, jumpTarget);
if (compIsForInlining())
{
- if (compInlineResult->IsFailure())
- {
- return;
- }
-
- bool hasReturnBlocks = false;
- bool hasMoreThanOneReturnBlock = false;
- for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
- {
- if (block->bbJumpKind == BBJ_RETURN)
- {
- if (hasReturnBlocks)
- {
- hasMoreThanOneReturnBlock = true;
- break;
- }
-
- hasReturnBlocks = true;
- }
- }
-
- if (!hasReturnBlocks && !compInlineResult->UsesLegacyPolicy())
+#ifdef DEBUG
+ // If fgFindJumpTargets marked the call as "no return" there
+ // really should be no BBJ_RETURN blocks in the method.
+ //
+ // Note LegacyPolicy does not mark calls as no return, so if
+ // it's active, skip the check.
+ if (!compInlineResult->UsesLegacyPolicy())
{
- //
- // Mark the call node as "no return". The inliner might ignore CALLEE_DOES_NOT_RETURN and
- // fail inline for a different reasons. In that case we still want to make the "no return"
- // information available to the caller as it can impact caller's code quality.
- //
-
- impInlineInfo->iciCall->gtCallMoreFlags |= GTF_CALL_M_DOES_NOT_RETURN;
+ bool markedNoReturn = (impInlineInfo->iciCall->gtCallMoreFlags & GTF_CALL_M_DOES_NOT_RETURN) != 0;
+ assert((markedNoReturn && (retBlocks == 0)) || (!markedNoReturn && (retBlocks >= 1)));
}
-
- compInlineResult->NoteBool(InlineObservation::CALLEE_DOES_NOT_RETURN, !hasReturnBlocks);
+#endif // DEBUG
if (compInlineResult->IsFailure())
{
@@ -5777,12 +5779,14 @@ void Compiler::fgFindBasicBlocks()
compHndBBtabCount = impInlineInfo->InlinerCompiler->compHndBBtabCount;
info.compXcptnsCount = impInlineInfo->InlinerCompiler->info.compXcptnsCount;
- if (info.compRetNativeType != TYP_VOID && hasMoreThanOneReturnBlock)
+ // Use a spill temp for the return value if there are multiple return blocks.
+ if ((info.compRetNativeType != TYP_VOID) && (retBlocks > 1))
{
// The lifetime of this var might expand multiple BBs. So it is a long lifetime compiler temp.
lvaInlineeReturnSpillTemp = lvaGrabTemp(false DEBUGARG("Inline candidate multiple BBJ_RETURN spill temp"));
lvaTable[lvaInlineeReturnSpillTemp].lvType = info.compRetNativeType;
}
+
return;
}
@@ -6666,7 +6670,7 @@ void Compiler::fgImport()
impImport(fgFirstBB);
- if (!(opts.eeFlags & CORJIT_FLG_SKIP_VERIFICATION))
+ if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_SKIP_VERIFICATION))
{
CorInfoMethodRuntimeFlags verFlag;
verFlag = tiIsVerifiableCode ? CORINFO_FLG_VERIFIABLE : CORINFO_FLG_UNVERIFIABLE;
@@ -6936,7 +6940,7 @@ GenTreePtr Compiler::fgGetSharedCCtor(CORINFO_CLASS_HANDLE cls)
if (opts.IsReadyToRun())
{
CORINFO_RESOLVED_TOKEN resolvedToken;
- ZeroMemory(&resolvedToken, sizeof(resolvedToken));
+ memset(&resolvedToken, 0, sizeof(resolvedToken));
resolvedToken.hClass = cls;
return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF);
@@ -8248,8 +8252,8 @@ void Compiler::fgAddInternal()
if (!varTypeIsFloating(info.compRetType))
{
lvaTable[genReturnLocal].setPrefReg(REG_INTRET, this);
-#ifdef REG_FLOATRET
}
+#ifdef REG_FLOATRET
else
{
lvaTable[genReturnLocal].setPrefReg(REG_FLOATRET, this);
@@ -8301,7 +8305,7 @@ void Compiler::fgAddInternal()
CORINFO_JUST_MY_CODE_HANDLE* pDbgHandle = nullptr;
CORINFO_JUST_MY_CODE_HANDLE dbgHandle = nullptr;
- if (opts.compDbgCode && !(opts.eeFlags & CORJIT_FLG_IL_STUB))
+ if (opts.compDbgCode && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB))
{
dbgHandle = info.compCompHnd->getJustMyCodeHandle(info.compMethodHnd, &pDbgHandle);
}
@@ -8589,17 +8593,12 @@ GenTreeStmt* Compiler::fgNewStmtFromTree(GenTreePtr tree, IL_OFFSETX offs)
// The first good IL offset of a statement in the block, or BAD_IL_OFFSET if such an IL offset
// cannot be found.
//
-// If we are not built with DEBUGGING_SUPPORT or DEBUG, then always report BAD_IL_OFFSET,
-// since in that case statements don't contain an IL offset. The effect will be that split
-// blocks will lose their IL offset information.
-
IL_OFFSET Compiler::fgFindBlockILOffset(BasicBlock* block)
{
// This function searches for IL offsets in statement nodes, so it can't be used in LIR. We
// could have a similar function for LIR that searches for GT_IL_OFFSET nodes.
assert(!block->IsLIR());
-#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
for (GenTree* stmt = block->bbTreeList; stmt != nullptr; stmt = stmt->gtNext)
{
assert(stmt->IsStatement());
@@ -8608,7 +8607,6 @@ IL_OFFSET Compiler::fgFindBlockILOffset(BasicBlock* block)
return jitGetILoffs(stmt->gtStmt.gtStmtILoffsx);
}
}
-#endif // defined(DEBUGGING_SUPPORT) || defined(DEBUG)
return BAD_IL_OFFSET;
}
@@ -8949,10 +8947,10 @@ void Compiler::fgSimpleLowering()
for (GenTreePtr tree = stmt->gtStmtList; tree; tree = tree->gtNext)
{
#else
- LIR::Range& range = LIR::AsRange(block);
- for (GenTree* tree : range)
+ LIR::Range& range = LIR::AsRange(block);
+ for (GenTree* tree : range)
+ {
{
- {
#endif
if (tree->gtOper == GT_ARR_LENGTH)
{
@@ -9000,7 +8998,7 @@ void Compiler::fgSimpleLowering()
add->gtNext = tree;
tree->gtPrev = add;
#else
- range.InsertAfter(arr, con, add);
+ range.InsertAfter(arr, con, add);
#endif
}
@@ -9339,6 +9337,7 @@ inline bool OperIsControlFlow(genTreeOps oper)
switch (oper)
{
case GT_JTRUE:
+ case GT_JCC:
case GT_SWITCH:
case GT_LABEL:
@@ -10019,10 +10018,10 @@ void Compiler::fgUnreachableBlock(BasicBlock* block)
/*****************************************************************************************************
*
- * Function called to remove or morph a GT_JTRUE statement when we jump to the same
+ * Function called to remove or morph a jump when we jump to the same
* block when both the condition is true or false.
*/
-void Compiler::fgRemoveJTrue(BasicBlock* block)
+void Compiler::fgRemoveConditionalJump(BasicBlock* block)
{
noway_assert(block->bbJumpKind == BBJ_COND && block->bbJumpDest == block->bbNext);
assert(compRationalIRForm == block->IsLIR());
@@ -10053,7 +10052,7 @@ void Compiler::fgRemoveJTrue(BasicBlock* block)
LIR::Range& blockRange = LIR::AsRange(block);
GenTree* test = blockRange.LastNode();
- assert(test->OperGet() == GT_JTRUE);
+ assert(test->OperIsConditionalJump());
bool isClosed;
unsigned sideEffects;
@@ -10109,7 +10108,7 @@ void Compiler::fgRemoveJTrue(BasicBlock* block)
{
test->gtStmtExpr = sideEffList;
- fgMorphBlockStmt(block, test DEBUGARG("fgRemoveJTrue"));
+ fgMorphBlockStmt(block, test DEBUGARG("fgRemoveConditionalJump"));
}
}
}
@@ -10545,7 +10544,7 @@ void Compiler::fgRemoveBlock(BasicBlock* block, bool unreachable)
// Make sure we are replacing "block" with "succBlock" in predBlock->bbJumpDest.
noway_assert(predBlock->bbJumpDest == block);
predBlock->bbJumpDest = succBlock;
- fgRemoveJTrue(predBlock);
+ fgRemoveConditionalJump(predBlock);
break;
}
@@ -10605,7 +10604,7 @@ void Compiler::fgRemoveBlock(BasicBlock* block, bool unreachable)
/* Check for branch to next block */
if (bPrev->bbJumpDest == bPrev->bbNext)
{
- fgRemoveJTrue(bPrev);
+ fgRemoveConditionalJump(bPrev);
}
break;
@@ -11031,10 +11030,10 @@ bool Compiler::fgExpandRarelyRunBlocks()
noway_assert(tmpbb->isBBCallAlwaysPair());
bPrevPrev = tmpbb;
#else
- if (tmpbb->bbJumpKind == BBJ_CALLFINALLY)
- {
- bPrevPrev = tmpbb;
- }
+ if (tmpbb->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ bPrevPrev = tmpbb;
+ }
#endif
}
@@ -11566,60 +11565,60 @@ BasicBlock* Compiler::fgRelocateEHRange(unsigned regionIndex, FG_RELOCATE_TYPE r
#else // FEATURE_EH_FUNCLETS
- for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ {
+ if (XTnum == regionIndex)
{
- if (XTnum == regionIndex)
- {
- // Don't update our handler's Last info
- continue;
- }
+ // Don't update our handler's Last info
+ continue;
+ }
- if (HBtab->ebdTryLast == bLast)
+ if (HBtab->ebdTryLast == bLast)
+ {
+ // If we moved a set of blocks that were at the end of
+ // a different try region then we may need to update ebdTryLast
+ for (block = HBtab->ebdTryBeg; block != NULL; block = block->bbNext)
{
- // If we moved a set of blocks that were at the end of
- // a different try region then we may need to update ebdTryLast
- for (block = HBtab->ebdTryBeg; block != NULL; block = block->bbNext)
+ if (block == bPrev)
{
- if (block == bPrev)
- {
- fgSetTryEnd(HBtab, bPrev);
- break;
- }
- else if (block == HBtab->ebdTryLast->bbNext)
- {
- // bPrev does not come after the TryBeg
- break;
- }
+ fgSetTryEnd(HBtab, bPrev);
+ break;
+ }
+ else if (block == HBtab->ebdTryLast->bbNext)
+ {
+ // bPrev does not come after the TryBeg
+ break;
}
}
- if (HBtab->ebdHndLast == bLast)
+ }
+ if (HBtab->ebdHndLast == bLast)
+ {
+ // If we moved a set of blocks that were at the end of
+ // a different handler region then we must update ebdHndLast
+ for (block = HBtab->ebdHndBeg; block != NULL; block = block->bbNext)
{
- // If we moved a set of blocks that were at the end of
- // a different handler region then we must update ebdHndLast
- for (block = HBtab->ebdHndBeg; block != NULL; block = block->bbNext)
+ if (block == bPrev)
{
- if (block == bPrev)
- {
- fgSetHndEnd(HBtab, bPrev);
- break;
- }
- else if (block == HBtab->ebdHndLast->bbNext)
- {
- // bPrev does not come after the HndBeg
- break;
- }
+ fgSetHndEnd(HBtab, bPrev);
+ break;
+ }
+ else if (block == HBtab->ebdHndLast->bbNext)
+ {
+ // bPrev does not come after the HndBeg
+ break;
}
}
- } // end exception table iteration
+ }
+ } // end exception table iteration
- // We have decided to insert the block(s) after fgLastBlock
- fgMoveBlocksAfter(bStart, bLast, insertAfterBlk);
+ // We have decided to insert the block(s) after fgLastBlock
+ fgMoveBlocksAfter(bStart, bLast, insertAfterBlk);
- // If bPrev falls through, we will insert a jump to block
- fgConnectFallThrough(bPrev, bStart);
+ // If bPrev falls through, we will insert a jump to block
+ fgConnectFallThrough(bPrev, bStart);
- // If bLast falls through, we will insert a jump to bNext
- fgConnectFallThrough(bLast, bNext);
+ // If bLast falls through, we will insert a jump to bNext
+ fgConnectFallThrough(bLast, bNext);
#endif // FEATURE_EH_FUNCLETS
@@ -12060,70 +12059,70 @@ void Compiler::fgCreateFunclets()
#else // !FEATURE_EH_FUNCLETS
- /*****************************************************************************
- *
- * Function called to relocate any and all EH regions.
- * Only entire consecutive EH regions will be moved and they will be kept together.
- * Except for the first block, the range can not have any blocks that jump into or out of the region.
- */
+/*****************************************************************************
+ *
+ * Function called to relocate any and all EH regions.
+ * Only entire consecutive EH regions will be moved and they will be kept together.
+ * Except for the first block, the range can not have any blocks that jump into or out of the region.
+ */
- bool Compiler::fgRelocateEHRegions()
- {
- bool result = false; // Our return value
+bool Compiler::fgRelocateEHRegions()
+{
+ bool result = false; // Our return value
#ifdef DEBUG
- if (verbose)
- printf("*************** In fgRelocateEHRegions()\n");
+ if (verbose)
+ printf("*************** In fgRelocateEHRegions()\n");
#endif
- if (fgCanRelocateEHRegions)
- {
- unsigned XTnum;
- EHblkDsc* HBtab;
+ if (fgCanRelocateEHRegions)
+ {
+ unsigned XTnum;
+ EHblkDsc* HBtab;
- for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ {
+ // Nested EH regions cannot be moved.
+ // Also we don't want to relocate an EH region that has a filter
+ if ((HBtab->ebdHandlerNestingLevel == 0) && !HBtab->HasFilter())
{
- // Nested EH regions cannot be moved.
- // Also we don't want to relocate an EH region that has a filter
- if ((HBtab->ebdHandlerNestingLevel == 0) && !HBtab->HasFilter())
- {
- bool movedTry = false;
+ bool movedTry = false;
#if DEBUG
- bool movedHnd = false;
+ bool movedHnd = false;
#endif // DEBUG
- // Only try to move the outermost try region
- if (HBtab->ebdEnclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+ // Only try to move the outermost try region
+ if (HBtab->ebdEnclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ // Move the entire try region if it can be moved
+ if (HBtab->ebdTryBeg->isRunRarely())
{
- // Move the entire try region if it can be moved
- if (HBtab->ebdTryBeg->isRunRarely())
+ BasicBlock* bTryLastBB = fgRelocateEHRange(XTnum, FG_RELOCATE_TRY);
+ if (bTryLastBB != NULL)
{
- BasicBlock* bTryLastBB = fgRelocateEHRange(XTnum, FG_RELOCATE_TRY);
- if (bTryLastBB != NULL)
- {
- result = true;
- movedTry = true;
- }
+ result = true;
+ movedTry = true;
}
+ }
#if DEBUG
- if (verbose && movedTry)
- {
- printf("\nAfter relocating an EH try region");
- fgDispBasicBlocks();
- fgDispHandlerTab();
+ if (verbose && movedTry)
+ {
+ printf("\nAfter relocating an EH try region");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
- // Make sure that the predecessor lists are accurate
- if (expensiveDebugCheckLevel >= 2)
- {
- fgDebugCheckBBlist();
- }
+ // Make sure that the predecessor lists are accurate
+ if (expensiveDebugCheckLevel >= 2)
+ {
+ fgDebugCheckBBlist();
}
-#endif // DEBUG
}
+#endif // DEBUG
+ }
- // Currently it is not good to move the rarely run handler regions to the end of the method
- // because fgDetermineFirstColdBlock() must put the start of any handler region in the hot section.
- CLANG_FORMAT_COMMENT_ANCHOR;
+ // Currently it is not good to move the rarely run handler regions to the end of the method
+ // because fgDetermineFirstColdBlock() must put the start of any handler region in the hot section.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if 0
// Now try to move the entire handler region if it can be moved.
@@ -12142,38 +12141,38 @@ void Compiler::fgCreateFunclets()
#endif // 0
#if DEBUG
- if (verbose && movedHnd)
- {
- printf("\nAfter relocating an EH handler region");
- fgDispBasicBlocks();
- fgDispHandlerTab();
+ if (verbose && movedHnd)
+ {
+ printf("\nAfter relocating an EH handler region");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
- // Make sure that the predecessor lists are accurate
- if (expensiveDebugCheckLevel >= 2)
- {
- fgDebugCheckBBlist();
- }
+ // Make sure that the predecessor lists are accurate
+ if (expensiveDebugCheckLevel >= 2)
+ {
+ fgDebugCheckBBlist();
}
-#endif // DEBUG
}
+#endif // DEBUG
}
}
+ }
#if DEBUG
- fgVerifyHandlerTab();
+ fgVerifyHandlerTab();
- if (verbose && result)
- {
- printf("\nAfter fgRelocateEHRegions()");
- fgDispBasicBlocks();
- fgDispHandlerTab();
- // Make sure that the predecessor lists are accurate
- fgDebugCheckBBlist();
- }
+ if (verbose && result)
+ {
+ printf("\nAfter fgRelocateEHRegions()");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ // Make sure that the predecessor lists are accurate
+ fgDebugCheckBBlist();
+ }
#endif // DEBUG
- return result;
- }
+ return result;
+}
#endif // !FEATURE_EH_FUNCLETS
@@ -13489,6 +13488,7 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block)
GenTree* switchVal = switchTree->gtOp.gtOp1;
noway_assert(genActualTypeIsIntOrI(switchVal->TypeGet()));
+#ifndef LEGACY_BACKEND
// If we are in LIR, remove the jump table from the block.
if (block->IsLIR())
{
@@ -13496,6 +13496,7 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block)
assert(jumpTable->OperGet() == GT_JMPTABLE);
blockRange->Remove(jumpTable);
}
+#endif
// Change the GT_SWITCH(switchVal) into GT_JTRUE(GT_EQ(switchVal==0)).
// Also mark the node as GTF_DONT_CSE as further down JIT is not capable of handling it.
@@ -13793,7 +13794,7 @@ bool Compiler::fgOptimizeBranchToNext(BasicBlock* block, BasicBlock* bNext, Basi
{
LIR::Range& blockRange = LIR::AsRange(block);
GenTree* jmp = blockRange.LastNode();
- assert(jmp->OperGet() == GT_JTRUE);
+ assert(jmp->OperIsConditionalJump());
bool isClosed;
unsigned sideEffects;
@@ -14034,7 +14035,7 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump)
// we are willing to have more code expansion since we
// won't be running code from this page
//
- if (opts.eeFlags & CORJIT_FLG_PREJIT)
+ if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
{
if (rareJump)
{
@@ -14169,16 +14170,16 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump)
//
gtReverseCond(condTree);
+ // We need to update the following flags of the bJump block if they were set in the bDest block
+ bJump->bbFlags |=
+ (bDest->bbFlags & (BBF_HAS_NEWOBJ | BBF_HAS_NEWARRAY | BBF_HAS_NULLCHECK | BBF_HAS_IDX_LEN | BBF_HAS_VTABREF));
+
bJump->bbJumpKind = BBJ_COND;
bJump->bbJumpDest = bDest->bbNext;
/* Mark the jump dest block as being a jump target */
bJump->bbJumpDest->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
- // We need to update the following flags of the bJump block if they were set in the bbJumpDest block
- bJump->bbFlags |= (bJump->bbJumpDest->bbFlags &
- (BBF_HAS_NEWOBJ | BBF_HAS_NEWARRAY | BBF_HAS_NULLCHECK | BBF_HAS_IDX_LEN | BBF_HAS_VTABREF));
-
/* Update bbRefs and bbPreds */
// bJump now falls through into the next block
@@ -15879,11 +15880,18 @@ bool Compiler::fgUpdateFlowGraph(bool doTailDuplication)
/* Reverse the jump condition */
GenTree* test = block->lastNode();
- noway_assert(test->gtOper == GT_JTRUE);
+ noway_assert(test->OperIsConditionalJump());
- GenTree* cond = gtReverseCond(test->gtOp.gtOp1);
- assert(cond == test->gtOp.gtOp1); // Ensure `gtReverseCond` did not create a new node.
- test->gtOp.gtOp1 = cond;
+ if (test->OperGet() == GT_JTRUE)
+ {
+ GenTree* cond = gtReverseCond(test->gtOp.gtOp1);
+ assert(cond == test->gtOp.gtOp1); // Ensure `gtReverseCond` did not create a new node.
+ test->gtOp.gtOp1 = cond;
+ }
+ else
+ {
+ gtReverseCond(test);
+ }
// Optimize the Conditional JUMP to go to the new target
block->bbJumpDest = bNext->bbJumpDest;
@@ -18020,9 +18028,13 @@ void Compiler::fgSetTreeSeqFinish(GenTreePtr tree, bool isLIR)
{
// If we are sequencing a node that does not appear in LIR,
// do not add it to the list.
- if (isLIR && (((tree->OperGet() == GT_LIST) && !tree->AsArgList()->IsAggregate()) || tree->OperGet() == GT_ARGPLACE))
+ if (isLIR)
{
- return;
+ if ((tree->OperGet() == GT_LIST) || (tree->OperGet() == GT_ARGPLACE) ||
+ (tree->OperGet() == GT_FIELD_LIST && !tree->AsFieldList()->IsFieldListHead()))
+ {
+ return;
+ }
}
/* Append to the node list */
@@ -18359,7 +18371,7 @@ void Compiler::fgSetBlockOrder(BasicBlock* block)
//
// For the (usual) case of GT_BLK or GT_OBJ, the size is always "evaluated" (i.e.
// instantiated into a register) last. In those cases, the GTF_REVERSE_OPS flag
-// on the assignment works as usual.
+// on the assignment works as usual.
// In order to preserve previous possible orderings, the order for evaluating
// the size of a GT_DYN_BLK node is controlled by its gtEvalSizeFirst flag. If
// that is set, the size is evaluated first, and then the src and dst are evaluated
@@ -18549,20 +18561,20 @@ static escapeMapping_t s_EscapeMapping[] =
{'"', "&quot;"},
{0, nullptr}
};
-// clang-formt on
+// clang-format on
-const char* Compiler::fgProcessEscapes(const char* nameIn, escapeMapping_t* map)
+const char* Compiler::fgProcessEscapes(const char* nameIn, escapeMapping_t* map)
{
- const char* nameOut = nameIn;
- unsigned lengthOut;
- unsigned index;
- bool match;
- bool subsitutionRequired;
- const char* pChar;
-
- lengthOut = 1;
+ const char* nameOut = nameIn;
+ unsigned lengthOut;
+ unsigned index;
+ bool match;
+ bool subsitutionRequired;
+ const char* pChar;
+
+ lengthOut = 1;
subsitutionRequired = false;
- pChar = nameIn;
+ pChar = nameIn;
while (*pChar != '\0')
{
match = false;
@@ -18590,8 +18602,8 @@ const char* Compiler::fgProcessEscapes(const char* nameIn, escapeMapping_t* ma
if (subsitutionRequired)
{
- char* newName = (char*) compGetMemA(lengthOut, CMK_DebugOnly);
- char* pDest;
+ char* newName = (char*)compGetMemA(lengthOut, CMK_DebugOnly);
+ char* pDest;
pDest = newName;
pChar = nameIn;
while (*pChar != '\0')
@@ -18619,7 +18631,7 @@ const char* Compiler::fgProcessEscapes(const char* nameIn, escapeMapping_t* ma
pChar++;
}
*pDest++ = '\0';
- nameOut = (const char*) newName;
+ nameOut = (const char*)newName;
}
return nameOut;
@@ -18655,44 +18667,47 @@ static void fprintfDouble(FILE* fgxFile, double value)
// Opens a file to which a flowgraph can be dumped, whose name is based on the current
// config vales.
-FILE* Compiler::fgOpenFlowGraphFile(bool* wbDontClose, Phases phase, LPCWSTR type)
+FILE* Compiler::fgOpenFlowGraphFile(bool* wbDontClose, Phases phase, LPCWSTR type)
{
- FILE* fgxFile;
- LPCWSTR pattern = nullptr;
- LPCWSTR filename = nullptr;
- LPCWSTR pathname = nullptr;
- const char* escapedString;
- bool createDuplicateFgxFiles = true;
+ FILE* fgxFile;
+ LPCWSTR pattern = nullptr;
+ LPCWSTR filename = nullptr;
+ LPCWSTR pathname = nullptr;
+ const char* escapedString;
+ bool createDuplicateFgxFiles = true;
#ifdef DEBUG
- if (opts.eeFlags & CORJIT_FLG_PREJIT)
+ if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
{
- pattern = JitConfig.NgenDumpFg();
+ pattern = JitConfig.NgenDumpFg();
filename = JitConfig.NgenDumpFgFile();
pathname = JitConfig.NgenDumpFgDir();
}
else
{
- pattern = JitConfig.JitDumpFg();
+ pattern = JitConfig.JitDumpFg();
filename = JitConfig.JitDumpFgFile();
pathname = JitConfig.JitDumpFgDir();
}
#endif // DEBUG
- if (fgBBcount <= 1) {
+ if (fgBBcount <= 1)
+ {
return nullptr;
-}
+ }
- if (pattern == nullptr) {
+ if (pattern == nullptr)
+ {
return nullptr;
-}
+ }
- if (wcslen(pattern) == 0) {
+ if (wcslen(pattern) == 0)
+ {
return nullptr;
-}
+ }
LPCWSTR phasePattern = JitConfig.JitDumpFgPhase();
- LPCWSTR phaseName = PhaseShortNames[phase];
+ LPCWSTR phaseName = PhaseShortNames[phase];
if (phasePattern == nullptr)
{
if (phase != PHASE_DETERMINE_FIRST_COLD_BLOCK)
@@ -18723,9 +18738,10 @@ FILE* Compiler::fgOpenFlowGraphFile(bool* wbDontClose, Phases phas
{
while ((*pattern != W(':')) && (*pattern != W('*')))
{
- if (*pattern != *className) {
+ if (*pattern != *className)
+ {
return nullptr;
-}
+ }
pattern++;
className++;
@@ -18736,12 +18752,14 @@ FILE* Compiler::fgOpenFlowGraphFile(bool* wbDontClose, Phases phas
}
else
{
- if (*className != 0) {
+ if (*className != 0)
+ {
return nullptr;
-}
- }
+ }
}
- if (*pattern != W(':')) {
+ }
+ if (*pattern != W(':'))
+ {
return nullptr;
}
@@ -18757,9 +18775,10 @@ FILE* Compiler::fgOpenFlowGraphFile(bool* wbDontClose, Phases phas
{
while ((*pattern != 0) && (*pattern != W('*')))
{
- if (*pattern != *methodName) {
+ if (*pattern != *methodName)
+ {
return nullptr;
-}
+ }
pattern++;
methodName++;
@@ -18770,12 +18789,14 @@ FILE* Compiler::fgOpenFlowGraphFile(bool* wbDontClose, Phases phas
}
else
{
- if (*methodName != 0) {
+ if (*methodName != 0)
+ {
return nullptr;
-}
- }
+ }
}
- if (*pattern != 0) {
+ }
+ if (*pattern != 0)
+ {
return nullptr;
}
}
@@ -18838,15 +18859,15 @@ FILE* Compiler::fgOpenFlowGraphFile(bool* wbDontClose, Phases phas
{
createDuplicateFgxFiles = true;
-ONE_FILE_PER_METHOD:;
+ ONE_FILE_PER_METHOD:;
- escapedString = fgProcessEscapes(info.compFullName, s_EscapeFileMapping);
+ escapedString = fgProcessEscapes(info.compFullName, s_EscapeFileMapping);
size_t wCharCount = strlen(escapedString) + wcslen(phaseName) + 1 + strlen("~999") + wcslen(type) + 1;
if (pathname != nullptr)
{
wCharCount += wcslen(pathname) + 1;
}
- filename = (LPCWSTR) alloca(wCharCount * sizeof(WCHAR));
+ filename = (LPCWSTR)alloca(wCharCount * sizeof(WCHAR));
if (pathname != nullptr)
{
swprintf_s((LPWSTR)filename, wCharCount, W("%s\\%S-%s.%s"), pathname, escapedString, phaseName, type);
@@ -18855,7 +18876,7 @@ ONE_FILE_PER_METHOD:;
{
swprintf_s((LPWSTR)filename, wCharCount, W("%S.%s"), escapedString, type);
}
- fgxFile = _wfopen(filename, W("r")); // Check if this file already exists
+ fgxFile = _wfopen(filename, W("r")); // Check if this file already exists
if (fgxFile != nullptr)
{
// For Generic methods we will have both hot and cold versions
@@ -18876,10 +18897,11 @@ ONE_FILE_PER_METHOD:;
{
swprintf_s((LPWSTR)filename, wCharCount, W("%S~%d.%s"), escapedString, i, type);
}
- fgxFile = _wfopen(filename, W("r")); // Check if this file exists
- if (fgxFile == nullptr) {
+ fgxFile = _wfopen(filename, W("r")); // Check if this file exists
+ if (fgxFile == nullptr)
+ {
break;
- }
+ }
}
// If we have already created 1000 files with this name then just fail
if (fgxFile != nullptr)
@@ -18888,28 +18910,28 @@ ONE_FILE_PER_METHOD:;
return nullptr;
}
}
- fgxFile = _wfopen(filename, W("a+"));
+ fgxFile = _wfopen(filename, W("a+"));
*wbDontClose = false;
}
else if (wcscmp(filename, W("stdout")) == 0)
{
- fgxFile = jitstdout;
+ fgxFile = jitstdout;
*wbDontClose = true;
}
else if (wcscmp(filename, W("stderr")) == 0)
{
- fgxFile = stderr;
+ fgxFile = stderr;
*wbDontClose = true;
}
else
{
LPCWSTR origFilename = filename;
- size_t wCharCount = wcslen(origFilename) + wcslen(type) + 2;
+ size_t wCharCount = wcslen(origFilename) + wcslen(type) + 2;
if (pathname != nullptr)
{
wCharCount += wcslen(pathname) + 1;
}
- filename = (LPCWSTR) alloca(wCharCount * sizeof(WCHAR));
+ filename = (LPCWSTR)alloca(wCharCount * sizeof(WCHAR));
if (pathname != nullptr)
{
swprintf_s((LPWSTR)filename, wCharCount, W("%s\\%s.%s"), pathname, origFilename, type);
@@ -18918,7 +18940,7 @@ ONE_FILE_PER_METHOD:;
{
swprintf_s((LPWSTR)filename, wCharCount, W("%s.%s"), origFilename, type);
}
- fgxFile = _wfopen(filename, W("a+"));
+ fgxFile = _wfopen(filename, W("a+"));
*wbDontClose = false;
}
@@ -18959,39 +18981,39 @@ ONE_FILE_PER_METHOD:;
// phases.
// COMPlus_JitDumpFgDot Set to non-zero to emit Dot instead of Xml Flowgraph dump. (Default is xml format.)
-bool Compiler::fgDumpFlowGraph(Phases phase)
+bool Compiler::fgDumpFlowGraph(Phases phase)
{
- bool result = false;
- bool dontClose = false;
- bool createDotFile = false;
+ bool result = false;
+ bool dontClose = false;
+ bool createDotFile = false;
if (JitConfig.JitDumpFgDot())
{
createDotFile = true;
}
-
- FILE* fgxFile = fgOpenFlowGraphFile(&dontClose, phase, createDotFile ? W("dot") : W("fgx"));
+
+ FILE* fgxFile = fgOpenFlowGraphFile(&dontClose, phase, createDotFile ? W("dot") : W("fgx"));
if (fgxFile == nullptr)
{
return false;
}
- bool validWeights = fgHaveValidEdgeWeights;
- unsigned calledCount = max(fgCalledWeight, BB_UNITY_WEIGHT) / BB_UNITY_WEIGHT;
- double weightDivisor = (double) (calledCount * BB_UNITY_WEIGHT);
- const char* escapedString;
- const char* regionString = "NONE";
+ bool validWeights = fgHaveValidEdgeWeights;
+ unsigned calledCount = max(fgCalledWeight, BB_UNITY_WEIGHT) / BB_UNITY_WEIGHT;
+ double weightDivisor = (double)(calledCount * BB_UNITY_WEIGHT);
+ const char* escapedString;
+ const char* regionString = "NONE";
- if (info.compMethodInfo->regionKind == CORINFO_REGION_HOT)
+ if (info.compMethodInfo->regionKind == CORINFO_REGION_HOT)
{
- regionString="HOT";
+ regionString = "HOT";
}
else if (info.compMethodInfo->regionKind == CORINFO_REGION_COLD)
{
- regionString="COLD";
+ regionString = "COLD";
}
else if (info.compMethodInfo->regionKind == CORINFO_REGION_JIT)
{
- regionString="JIT";
+ regionString = "JIT";
}
if (createDotFile)
@@ -19001,7 +19023,7 @@ bool Compiler::fgDumpFlowGraph(Phases phase)
}
else
{
- fprintf(fgxFile, "<method");
+ fprintf(fgxFile, "<method");
escapedString = fgProcessEscapes(info.compFullName, s_EscapeMapping);
fprintf(fgxFile, "\n name=\"%s\"", escapedString);
@@ -19042,77 +19064,74 @@ bool Compiler::fgDumpFlowGraph(Phases phase)
fprintf(fgxFile, "\n firstColdBlock=\"%d\"", fgFirstColdBlock->bbNum);
}
- fprintf(fgxFile, ">");
+ fprintf(fgxFile, ">");
fprintf(fgxFile, "\n <blocks");
fprintf(fgxFile, "\n blockCount=\"%d\"", fgBBcount);
- fprintf(fgxFile, ">");
+ fprintf(fgxFile, ">");
}
- static const char* kindImage[] = { "EHFINALLYRET", "EHFILTERRET", "EHCATCHRET",
- "THROW", "RETURN", "NONE", "ALWAYS", "LEAVE",
- "CALLFINALLY", "COND", "SWITCH" };
+ static const char* kindImage[] = {"EHFINALLYRET", "EHFILTERRET", "EHCATCHRET", "THROW", "RETURN", "NONE",
+ "ALWAYS", "LEAVE", "CALLFINALLY", "COND", "SWITCH"};
BasicBlock* block;
unsigned blockOrdinal;
- for (block = fgFirstBB , blockOrdinal = 1;
- block != nullptr;
- block = block->bbNext, blockOrdinal++)
+ for (block = fgFirstBB, blockOrdinal = 1; block != nullptr; block = block->bbNext, blockOrdinal++)
{
if (createDotFile)
{
// Add constraint edges to try to keep nodes ordered.
// It seems to work best if these edges are all created first.
- switch(block->bbJumpKind)
+ switch (block->bbJumpKind)
{
- case BBJ_COND:
- case BBJ_NONE:
- assert(block->bbNext != nullptr);
- fprintf(fgxFile, " BB%02u -> BB%02u\n", block->bbNum, block->bbNext->bbNum);
- break;
- default:
- // These may or may not have an edge to the next block.
- // Add a transparent edge to keep nodes ordered.
- if (block->bbNext != nullptr)
- {
- fprintf(fgxFile, " BB%02u -> BB%02u [arrowtail=none,color=transparent]\n", block->bbNum, block->bbNext->bbNum);
- }
+ case BBJ_COND:
+ case BBJ_NONE:
+ assert(block->bbNext != nullptr);
+ fprintf(fgxFile, " BB%02u -> BB%02u\n", block->bbNum, block->bbNext->bbNum);
+ break;
+ default:
+ // These may or may not have an edge to the next block.
+ // Add a transparent edge to keep nodes ordered.
+ if (block->bbNext != nullptr)
+ {
+ fprintf(fgxFile, " BB%02u -> BB%02u [arrowtail=none,color=transparent]\n", block->bbNum,
+ block->bbNext->bbNum);
+ }
}
}
else
{
- fprintf(fgxFile,"\n <block");
- fprintf(fgxFile,"\n id=\"%d\"", block->bbNum);
- fprintf(fgxFile,"\n ordinal=\"%d\"", blockOrdinal);
- fprintf(fgxFile,"\n jumpKind=\"%s\"", kindImage[block->bbJumpKind]);
+ fprintf(fgxFile, "\n <block");
+ fprintf(fgxFile, "\n id=\"%d\"", block->bbNum);
+ fprintf(fgxFile, "\n ordinal=\"%d\"", blockOrdinal);
+ fprintf(fgxFile, "\n jumpKind=\"%s\"", kindImage[block->bbJumpKind]);
if (block->hasTryIndex())
{
- fprintf(fgxFile,"\n inTry=\"%s\"", "true");
+ fprintf(fgxFile, "\n inTry=\"%s\"", "true");
}
if (block->hasHndIndex())
{
- fprintf(fgxFile,"\n inHandler=\"%s\"", "true");
+ fprintf(fgxFile, "\n inHandler=\"%s\"", "true");
}
- if (((fgFirstBB->bbFlags & BBF_PROF_WEIGHT) != 0) &&
- ((block->bbFlags & BBF_COLD) == 0) )
+ if (((fgFirstBB->bbFlags & BBF_PROF_WEIGHT) != 0) && ((block->bbFlags & BBF_COLD) == 0))
{
- fprintf(fgxFile,"\n hot=\"true\"");
+ fprintf(fgxFile, "\n hot=\"true\"");
}
if (block->bbFlags & (BBF_HAS_NEWOBJ | BBF_HAS_NEWARRAY))
{
- fprintf(fgxFile,"\n callsNew=\"true\"");
+ fprintf(fgxFile, "\n callsNew=\"true\"");
}
if (block->bbFlags & BBF_LOOP_HEAD)
{
- fprintf(fgxFile,"\n loopHead=\"true\"");
+ fprintf(fgxFile, "\n loopHead=\"true\"");
}
- fprintf(fgxFile,"\n weight=");
- fprintfDouble(fgxFile, ((double) block->bbWeight) / weightDivisor);
- fprintf(fgxFile,"\n codeEstimate=\"%d\"", fgGetCodeEstimate(block));
- fprintf(fgxFile,"\n startOffset=\"%d\"", block->bbCodeOffs);
- fprintf(fgxFile,"\n endOffset=\"%d\"", block->bbCodeOffsEnd);
- fprintf(fgxFile, ">");
- fprintf(fgxFile,"\n </block>");
+ fprintf(fgxFile, "\n weight=");
+ fprintfDouble(fgxFile, ((double)block->bbWeight) / weightDivisor);
+ fprintf(fgxFile, "\n codeEstimate=\"%d\"", fgGetCodeEstimate(block));
+ fprintf(fgxFile, "\n startOffset=\"%d\"", block->bbCodeOffs);
+ fprintf(fgxFile, "\n endOffset=\"%d\"", block->bbCodeOffsEnd);
+ fprintf(fgxFile, ">");
+ fprintf(fgxFile, "\n </block>");
}
}
@@ -19122,10 +19141,10 @@ bool Compiler::fgDumpFlowGraph(Phases phase)
fprintf(fgxFile, "\n <edges");
fprintf(fgxFile, "\n edgeCount=\"%d\"", fgEdgeCount);
- fprintf(fgxFile, ">");
+ fprintf(fgxFile, ">");
}
- unsigned edgeNum = 1;
+ unsigned edgeNum = 1;
BasicBlock* bTarget;
for (bTarget = fgFirstBB; bTarget != nullptr; bTarget = bTarget->bbNext)
{
@@ -19136,21 +19155,21 @@ bool Compiler::fgDumpFlowGraph(Phases phase)
}
else
{
- targetWeightDivisor = (double) bTarget->bbWeight;
+ targetWeightDivisor = (double)bTarget->bbWeight;
}
flowList* edge;
for (edge = bTarget->bbPreds; edge != nullptr; edge = edge->flNext, edgeNum++)
{
- BasicBlock* bSource = edge->flBlock;
- double sourceWeightDivisor;
+ BasicBlock* bSource = edge->flBlock;
+ double sourceWeightDivisor;
if (bSource->bbWeight == BB_ZERO_WEIGHT)
{
sourceWeightDivisor = 1.0;
}
else
{
- sourceWeightDivisor = (double) bSource->bbWeight;
+ sourceWeightDivisor = (double)bSource->bbWeight;
}
if (createDotFile)
{
@@ -19172,54 +19191,54 @@ bool Compiler::fgDumpFlowGraph(Phases phase)
}
else
{
- fprintf(fgxFile,"\n <edge");
- fprintf(fgxFile,"\n id=\"%d\"", edgeNum);
- fprintf(fgxFile,"\n source=\"%d\"", bSource->bbNum);
- fprintf(fgxFile,"\n target=\"%d\"", bTarget->bbNum);
+ fprintf(fgxFile, "\n <edge");
+ fprintf(fgxFile, "\n id=\"%d\"", edgeNum);
+ fprintf(fgxFile, "\n source=\"%d\"", bSource->bbNum);
+ fprintf(fgxFile, "\n target=\"%d\"", bTarget->bbNum);
if (bSource->bbJumpKind == BBJ_SWITCH)
{
if (edge->flDupCount >= 2)
{
- fprintf(fgxFile,"\n switchCases=\"%d\"", edge->flDupCount);
+ fprintf(fgxFile, "\n switchCases=\"%d\"", edge->flDupCount);
}
if (bSource->bbJumpSwt->getDefault() == bTarget)
{
- fprintf(fgxFile,"\n switchDefault=\"true\"");
+ fprintf(fgxFile, "\n switchDefault=\"true\"");
}
}
if (validWeights)
{
unsigned edgeWeight = (edge->flEdgeWeightMin + edge->flEdgeWeightMax) / 2;
- fprintf(fgxFile,"\n weight=");
- fprintfDouble(fgxFile, ((double) edgeWeight) / weightDivisor);
+ fprintf(fgxFile, "\n weight=");
+ fprintfDouble(fgxFile, ((double)edgeWeight) / weightDivisor);
if (edge->flEdgeWeightMin != edge->flEdgeWeightMax)
{
- fprintf(fgxFile,"\n minWeight=");
- fprintfDouble(fgxFile, ((double) edge->flEdgeWeightMin) / weightDivisor);
- fprintf(fgxFile,"\n maxWeight=");
- fprintfDouble(fgxFile, ((double) edge->flEdgeWeightMax) / weightDivisor);
+ fprintf(fgxFile, "\n minWeight=");
+ fprintfDouble(fgxFile, ((double)edge->flEdgeWeightMin) / weightDivisor);
+ fprintf(fgxFile, "\n maxWeight=");
+ fprintfDouble(fgxFile, ((double)edge->flEdgeWeightMax) / weightDivisor);
}
if (edgeWeight > 0)
{
if (edgeWeight < bSource->bbWeight)
{
- fprintf(fgxFile,"\n out=");
- fprintfDouble(fgxFile, ((double) edgeWeight) / sourceWeightDivisor );
+ fprintf(fgxFile, "\n out=");
+ fprintfDouble(fgxFile, ((double)edgeWeight) / sourceWeightDivisor);
}
if (edgeWeight < bTarget->bbWeight)
{
- fprintf(fgxFile,"\n in=");
- fprintfDouble(fgxFile, ((double) edgeWeight) / targetWeightDivisor);
+ fprintf(fgxFile, "\n in=");
+ fprintfDouble(fgxFile, ((double)edgeWeight) / targetWeightDivisor);
}
}
}
}
if (!createDotFile)
{
- fprintf(fgxFile, ">");
- fprintf(fgxFile,"\n </edge>");
+ fprintf(fgxFile, ">");
+ fprintf(fgxFile, "\n </edge>");
}
}
}
@@ -19251,7 +19270,7 @@ bool Compiler::fgDumpFlowGraph(Phases phase)
/*****************************************************************************/
#ifdef DEBUG
-void Compiler::fgDispReach()
+void Compiler::fgDispReach()
{
printf("------------------------------------------------\n");
printf("BBnum Reachable by \n");
@@ -19269,7 +19288,7 @@ void Compiler::fgDispReach()
}
}
-void Compiler::fgDispDoms()
+void Compiler::fgDispDoms()
{
// Don't bother printing this when we have a large number of BasicBlocks in the method
if (fgBBcount > 256)
@@ -19296,23 +19315,17 @@ void Compiler::fgDispDoms()
/*****************************************************************************/
-void Compiler::fgTableDispBasicBlock(BasicBlock* block,
- int ibcColWidth /* = 0 */)
+void Compiler::fgTableDispBasicBlock(BasicBlock* block, int ibcColWidth /* = 0 */)
{
- unsigned flags = block->bbFlags;
+ const unsigned __int64 flags = block->bbFlags;
+ unsigned bbNumMax = compIsForInlining() ? impInlineInfo->InlinerCompiler->fgBBNumMax : fgBBNumMax;
+ int maxBlockNumWidth = CountDigits(bbNumMax);
+ maxBlockNumWidth = max(maxBlockNumWidth, 2);
+ int blockNumWidth = CountDigits(block->bbNum);
+ blockNumWidth = max(blockNumWidth, 2);
+ int blockNumPadding = maxBlockNumWidth - blockNumWidth;
- unsigned bbNumMax = compIsForInlining() ? impInlineInfo->InlinerCompiler->fgBBNumMax : fgBBNumMax;
- int maxBlockNumWidth = CountDigits(bbNumMax);
- maxBlockNumWidth = max(maxBlockNumWidth, 2);
- int blockNumWidth = CountDigits(block->bbNum);
- blockNumWidth = max(blockNumWidth, 2);
- int blockNumPadding = maxBlockNumWidth - blockNumWidth;
-
- printf("BB%02u%*s [%08p] %2u",
- block->bbNum,
- blockNumPadding, "",
- dspPtr(block),
- block->bbRefs);
+ printf("BB%02u%*s [%08p] %2u", block->bbNum, blockNumPadding, "", dspPtr(block), block->bbRefs);
//
// Display EH 'try' region index
@@ -19406,86 +19419,89 @@ void Compiler::fgTableDispBasicBlock(BasicBlock* block,
// Display block branch target
//
- if (flags & BBF_REMOVED)
+ if (flags & BBF_REMOVED)
{
- printf( "[removed] ");
+ printf("[removed] ");
}
else
{
switch (block->bbJumpKind)
{
- case BBJ_COND:
- printf("-> BB%02u%*s ( cond )", block->bbJumpDest->bbNum, maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), "");
- break;
+ case BBJ_COND:
+ printf("-> BB%02u%*s ( cond )", block->bbJumpDest->bbNum,
+ maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), "");
+ break;
- case BBJ_CALLFINALLY:
- printf("-> BB%02u%*s (callf )", block->bbJumpDest->bbNum, maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), "");
- break;
+ case BBJ_CALLFINALLY:
+ printf("-> BB%02u%*s (callf )", block->bbJumpDest->bbNum,
+ maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), "");
+ break;
- case BBJ_ALWAYS:
- if (flags & BBF_KEEP_BBJ_ALWAYS)
- {
- printf("-> BB%02u%*s (ALWAYS)", block->bbJumpDest->bbNum, maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), "");
- }
- else
- {
- printf("-> BB%02u%*s (always)", block->bbJumpDest->bbNum, maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), "");
- }
- break;
+ case BBJ_ALWAYS:
+ if (flags & BBF_KEEP_BBJ_ALWAYS)
+ {
+ printf("-> BB%02u%*s (ALWAYS)", block->bbJumpDest->bbNum,
+ maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), "");
+ }
+ else
+ {
+ printf("-> BB%02u%*s (always)", block->bbJumpDest->bbNum,
+ maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), "");
+ }
+ break;
- case BBJ_LEAVE:
- printf("-> BB%02u%*s (leave )", block->bbJumpDest->bbNum, maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), "");
- break;
+ case BBJ_LEAVE:
+ printf("-> BB%02u%*s (leave )", block->bbJumpDest->bbNum,
+ maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), "");
+ break;
- case BBJ_EHFINALLYRET:
- printf( "%*s (finret)", maxBlockNumWidth - 2, "");
- break;
+ case BBJ_EHFINALLYRET:
+ printf("%*s (finret)", maxBlockNumWidth - 2, "");
+ break;
- case BBJ_EHFILTERRET:
- printf( "%*s (fltret)", maxBlockNumWidth - 2, "");
- break;
+ case BBJ_EHFILTERRET:
+ printf("%*s (fltret)", maxBlockNumWidth - 2, "");
+ break;
- case BBJ_EHCATCHRET:
- printf("-> BB%02u%*s ( cret )", block->bbJumpDest->bbNum, maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), "");
- break;
+ case BBJ_EHCATCHRET:
+ printf("-> BB%02u%*s ( cret )", block->bbJumpDest->bbNum,
+ maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), "");
+ break;
- case BBJ_THROW:
- printf( "%*s (throw )", maxBlockNumWidth - 2, "");
- break;
+ case BBJ_THROW:
+ printf("%*s (throw )", maxBlockNumWidth - 2, "");
+ break;
- case BBJ_RETURN:
- printf( "%*s (return)", maxBlockNumWidth - 2, "");
- break;
+ case BBJ_RETURN:
+ printf("%*s (return)", maxBlockNumWidth - 2, "");
+ break;
- default:
- printf( "%*s ", maxBlockNumWidth - 2, "");
- break;
+ default:
+ printf("%*s ", maxBlockNumWidth - 2, "");
+ break;
- case BBJ_SWITCH:
- printf("->");
-
- unsigned jumpCnt;
- jumpCnt = block->bbJumpSwt->bbsCount;
- BasicBlock** jumpTab;
- jumpTab = block->bbJumpSwt->bbsDstTab;
- int switchWidth;
- switchWidth = 0;
- do
- {
- printf("%cBB%02u",
- (jumpTab == block->bbJumpSwt->bbsDstTab) ? ' ' : ',',
- (*jumpTab)->bbNum);
- switchWidth += 1 /* space/comma */ + 2 /* BB */ + max(CountDigits((*jumpTab)->bbNum), 2);
- }
- while (++jumpTab, --jumpCnt);
+ case BBJ_SWITCH:
+ printf("->");
- if (switchWidth < 7)
- {
- printf("%*s", 8 - switchWidth, "");
- }
+ unsigned jumpCnt;
+ jumpCnt = block->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTab;
+ jumpTab = block->bbJumpSwt->bbsDstTab;
+ int switchWidth;
+ switchWidth = 0;
+ do
+ {
+ printf("%cBB%02u", (jumpTab == block->bbJumpSwt->bbsDstTab) ? ' ' : ',', (*jumpTab)->bbNum);
+ switchWidth += 1 /* space/comma */ + 2 /* BB */ + max(CountDigits((*jumpTab)->bbNum), 2);
+ } while (++jumpTab, --jumpCnt);
- printf(" (switch)");
- break;
+ if (switchWidth < 7)
+ {
+ printf("%*s", 8 - switchWidth, "");
+ }
+
+ printf(" (switch)");
+ break;
}
}
@@ -19526,12 +19542,28 @@ void Compiler::fgTableDispBasicBlock(BasicBlock* block,
switch (block->bbCatchTyp)
{
- case BBCT_NONE: break;
- case BBCT_FAULT: printf("fault "); cnt += 6; break;
- case BBCT_FINALLY: printf("finally "); cnt += 8; break;
- case BBCT_FILTER: printf("filter "); cnt += 7; break;
- case BBCT_FILTER_HANDLER: printf("filtHnd "); cnt += 8; break;
- default: printf("catch "); cnt += 6; break;
+ case BBCT_NONE:
+ break;
+ case BBCT_FAULT:
+ printf("fault ");
+ cnt += 6;
+ break;
+ case BBCT_FINALLY:
+ printf("finally ");
+ cnt += 8;
+ break;
+ case BBCT_FILTER:
+ printf("filter ");
+ cnt += 7;
+ break;
+ case BBCT_FILTER_HANDLER:
+ printf("filtHnd ");
+ cnt += 8;
+ break;
+ default:
+ printf("catch ");
+ cnt += 6;
+ break;
}
if (block->bbCatchTyp != BBCT_NONE)
@@ -19548,9 +19580,7 @@ void Compiler::fgTableDispBasicBlock(BasicBlock* block,
EHblkDsc* HBtab;
EHblkDsc* HBtabEnd;
- for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount;
- HBtab < HBtabEnd;
- HBtab++)
+ for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount; HBtab < HBtabEnd; HBtab++)
{
if (HBtab->ebdTryBeg == block)
{
@@ -19564,9 +19594,7 @@ void Compiler::fgTableDispBasicBlock(BasicBlock* block,
EHblkDsc* HBtab;
EHblkDsc* HBtabEnd;
- for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount;
- HBtab < HBtabEnd;
- HBtab++)
+ for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount; HBtab < HBtabEnd; HBtab++)
{
if (HBtab->ebdTryLast == block)
{
@@ -19607,9 +19635,7 @@ void Compiler::fgTableDispBasicBlock(BasicBlock* block,
Dump blocks from firstBlock to lastBlock.
*/
-void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock,
- BasicBlock* lastBlock,
- bool dumpTrees)
+void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock, BasicBlock* lastBlock, bool dumpTrees)
{
BasicBlock* block;
@@ -19627,24 +19653,27 @@ void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock,
if (block->bbFlags & BBF_PROF_WEIGHT)
{
int thisIbcWidth = CountDigits(block->bbWeight);
- ibcColWidth = max(ibcColWidth, thisIbcWidth);
+ ibcColWidth = max(ibcColWidth, thisIbcWidth);
}
- if (block == lastBlock) {
+ if (block == lastBlock)
+ {
break;
- }
+ }
}
if (ibcColWidth > 0)
{
ibcColWidth = max(ibcColWidth, 3) + 1; // + 1 for the leading space
}
- unsigned bbNumMax = compIsForInlining() ? impInlineInfo->InlinerCompiler->fgBBNumMax : fgBBNumMax;
- int maxBlockNumWidth = CountDigits(bbNumMax);
- maxBlockNumWidth = max(maxBlockNumWidth, 2);
+ unsigned bbNumMax = compIsForInlining() ? impInlineInfo->InlinerCompiler->fgBBNumMax : fgBBNumMax;
+ int maxBlockNumWidth = CountDigits(bbNumMax);
+ maxBlockNumWidth = max(maxBlockNumWidth, 2);
padWidth += maxBlockNumWidth - 2; // Account for functions with a large number of blocks.
+ // clang-format off
+
printf("\n");
printf("------%*s------------------------------------%*s-----------------------%*s----------------------------------------\n",
padWidth, "------------",
@@ -19665,9 +19694,9 @@ void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock,
ibcColWidth, "------------",
maxBlockNumWidth, "----");
- for (block = firstBlock;
- block;
- block = block->bbNext)
+ // clang-format on
+
+ for (block = firstBlock; block; block = block->bbNext)
{
// First, do some checking on the bbPrev links
if (block->bbPrev)
@@ -19681,36 +19710,34 @@ void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock,
{
printf("bad prev link!\n");
}
-
+
if (block == fgFirstColdBlock)
{
- printf("~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n",
- padWidth, "~~~~~~~~~~~~",
- ibcColWidth, "~~~~~~~~~~~~",
- maxBlockNumWidth, "~~~~");
+ printf("~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~~~"
+ "~~~~~~~~~~~~~~~\n",
+ padWidth, "~~~~~~~~~~~~", ibcColWidth, "~~~~~~~~~~~~", maxBlockNumWidth, "~~~~");
}
#if FEATURE_EH_FUNCLETS
if (block == fgFirstFuncletBB)
{
- printf("++++++%*s++++++++++++++++++++++++++++++++++++%*s+++++++++++++++++++++++%*s++++++++++++++++++++++++++++++++++++++++ funclets follow\n",
- padWidth, "++++++++++++",
- ibcColWidth, "++++++++++++",
- maxBlockNumWidth, "++++");
+ printf("++++++%*s++++++++++++++++++++++++++++++++++++%*s+++++++++++++++++++++++%*s+++++++++++++++++++++++++"
+ "+++++++++++++++ funclets follow\n",
+ padWidth, "++++++++++++", ibcColWidth, "++++++++++++", maxBlockNumWidth, "++++");
}
#endif // FEATURE_EH_FUNCLETS
fgTableDispBasicBlock(block, ibcColWidth);
- if (block == lastBlock) {
+ if (block == lastBlock)
+ {
break;
- }
+ }
}
- printf("------%*s------------------------------------%*s-----------------------%*s----------------------------------------\n",
- padWidth, "------------",
- ibcColWidth, "------------",
- maxBlockNumWidth, "----");
+ printf("------%*s------------------------------------%*s-----------------------%*s---------------------------------"
+ "-------\n",
+ padWidth, "------------", ibcColWidth, "------------", maxBlockNumWidth, "----");
if (dumpTrees)
{
@@ -19720,7 +19747,7 @@ void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock,
/*****************************************************************************/
-void Compiler::fgDispBasicBlocks(bool dumpTrees)
+void Compiler::fgDispBasicBlocks(bool dumpTrees)
{
fgDispBasicBlocks(fgFirstBB, nullptr, dumpTrees);
}
@@ -19728,9 +19755,9 @@ void Compiler::fgDispBasicBlocks(bool dumpTrees)
/*****************************************************************************/
// Increment the stmtNum and dump the tree using gtDispTree
//
-void Compiler::fgDumpStmtTree(GenTreePtr stmt, unsigned blkNum)
+void Compiler::fgDumpStmtTree(GenTreePtr stmt, unsigned blkNum)
{
- compCurStmtNum++; // Increment the current stmtNum
+ compCurStmtNum++; // Increment the current stmtNum
printf("\n***** BB%02u, stmt %d\n", blkNum, compCurStmtNum);
@@ -19750,7 +19777,7 @@ void Compiler::fgDumpStmtTree(GenTreePtr stmt, unsigned blkNum)
// Arguments:
// block - The block to dump.
//
-void Compiler::fgDumpBlock(BasicBlock* block)
+void Compiler::fgDumpBlock(BasicBlock* block)
{
printf("\n------------ ");
block->dspBlockHeader(this);
@@ -19762,7 +19789,7 @@ void Compiler::fgDumpBlock(BasicBlock* block)
fgDumpStmtTree(stmt, block->bbNum);
if (stmt == block->bbTreeList)
{
- block->bbStmtNum = compCurStmtNum; // Set the block->bbStmtNum
+ block->bbStmtNum = compCurStmtNum; // Set the block->bbStmtNum
}
}
}
@@ -19775,63 +19802,81 @@ void Compiler::fgDumpBlock(BasicBlock* block)
/*****************************************************************************/
// Walk the BasicBlock list calling fgDumpTree once per Stmt
//
-void Compiler::fgDumpTrees(BasicBlock* firstBlock,
- BasicBlock* lastBlock)
+void Compiler::fgDumpTrees(BasicBlock* firstBlock, BasicBlock* lastBlock)
{
- compCurStmtNum = 0; // Reset the current stmtNum
+ compCurStmtNum = 0; // Reset the current stmtNum
/* Walk the basic blocks */
- // Note that typically we have already called fgDispBasicBlocks()
+ // Note that typically we have already called fgDispBasicBlocks()
// so we don't need to print the preds and succs again here
//
for (BasicBlock* block = firstBlock; block; block = block->bbNext)
{
fgDumpBlock(block);
- if (block == lastBlock) {
+ if (block == lastBlock)
+ {
break;
+ }
}
- }
- printf("\n-------------------------------------------------------------------------------------------------------------------\n");
+ printf("\n---------------------------------------------------------------------------------------------------------"
+ "----------\n");
}
-
/*****************************************************************************
* Try to create as many candidates for GTF_MUL_64RSLT as possible.
* We convert 'intOp1*intOp2' into 'int(long(nop(intOp1))*long(intOp2))'.
*/
/* static */
-Compiler::fgWalkResult Compiler::fgStress64RsltMulCB(GenTreePtr* pTree, fgWalkData* data)
+Compiler::fgWalkResult Compiler::fgStress64RsltMulCB(GenTreePtr* pTree, fgWalkData* data)
{
- GenTreePtr tree = *pTree;
+ GenTreePtr tree = *pTree;
Compiler* pComp = data->compiler;
-
- if (tree->gtOper != GT_MUL || tree->gtType != TYP_INT || (tree->gtOverflow())) {
+
+ if (tree->gtOper != GT_MUL || tree->gtType != TYP_INT || (tree->gtOverflow()))
+ {
return WALK_CONTINUE;
-}
+ }
+
+#ifdef DEBUG
+ if (pComp->verbose)
+ {
+ printf("STRESS_64RSLT_MUL before:\n");
+ pComp->gtDispTree(tree);
+ }
+#endif // DEBUG
// To ensure optNarrowTree() doesn't fold back to the original tree.
- tree->gtOp.gtOp1 = pComp->gtNewOperNode(GT_NOP, TYP_LONG, tree->gtOp.gtOp1);
tree->gtOp.gtOp1 = pComp->gtNewCastNode(TYP_LONG, tree->gtOp.gtOp1, TYP_LONG);
- tree->gtOp.gtOp2 = pComp->gtNewCastNode(TYP_LONG, tree->gtOp.gtOp2, TYP_LONG);
- tree->gtType = TYP_LONG;
- *pTree = pComp->gtNewCastNode(TYP_INT, tree, TYP_INT);
+ tree->gtOp.gtOp1 = pComp->gtNewOperNode(GT_NOP, TYP_LONG, tree->gtOp.gtOp1);
+ tree->gtOp.gtOp1 = pComp->gtNewCastNode(TYP_LONG, tree->gtOp.gtOp1, TYP_LONG);
+ tree->gtOp.gtOp2 = pComp->gtNewCastNode(TYP_LONG, tree->gtOp.gtOp2, TYP_LONG);
+ tree->gtType = TYP_LONG;
+ *pTree = pComp->gtNewCastNode(TYP_INT, tree, TYP_INT);
+
+#ifdef DEBUG
+ if (pComp->verbose)
+ {
+ printf("STRESS_64RSLT_MUL after:\n");
+ pComp->gtDispTree(*pTree);
+ }
+#endif // DEBUG
return WALK_SKIP_SUBTREES;
}
-void Compiler::fgStress64RsltMul()
+void Compiler::fgStress64RsltMul()
{
- if (!compStressCompile(STRESS_64RSLT_MUL, 20)) {
+ if (!compStressCompile(STRESS_64RSLT_MUL, 20))
+ {
return;
-}
+ }
fgWalkAllTreesPre(fgStress64RsltMulCB, (void*)this);
}
-
// This variable is used to generate "traversal labels": one-time constants with which
// we label basic blocks that are members of the basic block list, in order to have a
// fast, high-probability test for membership in that list. Type is "volatile" because
@@ -19847,8 +19892,7 @@ static volatile int bbTraverseLabel = 1;
*
*****************************************************************************/
-void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */,
- bool checkBBRefs /* = true */)
+void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRefs /* = true */)
{
#ifdef DEBUG
if (verbose)
@@ -19858,7 +19902,7 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */,
#endif // DEBUG
fgDebugCheckBlockLinks();
-
+
if (fgBBcount > 10000 && expensiveDebugCheckLevel < 1)
{
// The basic block checks are too expensive if there are too many blocks,
@@ -19875,7 +19919,7 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */,
unsigned blockRefs;
#if FEATURE_EH_FUNCLETS
- bool reachedFirstFunclet = false;
+ bool reachedFirstFunclet = false;
if (fgFuncletsCreated)
{
//
@@ -19898,15 +19942,13 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */,
block->bbTraversalStamp = curTraversalStamp;
}
- for (prevBlock = nullptr, block = fgFirstBB;
- block;
- prevBlock = block, block = block->bbNext)
+ for (prevBlock = nullptr, block = fgFirstBB; block; prevBlock = block, block = block->bbNext)
{
blockRefs = 0;
/* First basic block has countOfInEdges() >= 1 */
- if (block == fgFirstBB)
+ if (block == fgFirstBB)
{
noway_assert(block->countOfInEdges() >= 1);
blockRefs = 1;
@@ -19920,27 +19962,24 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */,
// If the block is a BBJ_COND, a BBJ_SWITCH or a
// lowered GT_SWITCH_TABLE node then make sure it
- // ends with a GT_JTRUE or a GT_SWITCH
+ // ends with a conditional jump or a GT_SWITCH
if (block->bbJumpKind == BBJ_COND)
{
- noway_assert(block->lastNode()->gtNext == nullptr && block->lastNode()->gtOper == GT_JTRUE);
+ noway_assert(block->lastNode()->gtNext == nullptr && block->lastNode()->OperIsConditionalJump());
}
else if (block->bbJumpKind == BBJ_SWITCH)
{
#ifndef LEGACY_BACKEND
noway_assert(block->lastNode()->gtNext == nullptr &&
- (block->lastNode()->gtOper == GT_SWITCH ||
- block->lastNode()->gtOper == GT_SWITCH_TABLE));
-#else // LEGACY_BACKEND
- noway_assert(block->lastStmt()->gtNext == NULL &&
- block->lastStmt()->gtStmtExpr->gtOper == GT_SWITCH);
+ (block->lastNode()->gtOper == GT_SWITCH || block->lastNode()->gtOper == GT_SWITCH_TABLE));
+#else // LEGACY_BACKEND
+ noway_assert(block->lastStmt()->gtNext == NULL && block->lastStmt()->gtStmtExpr->gtOper == GT_SWITCH);
#endif // LEGACY_BACKEND
}
- else if (!( block->bbJumpKind == BBJ_ALWAYS
- || block->bbJumpKind == BBJ_RETURN))
+ else if (!(block->bbJumpKind == BBJ_ALWAYS || block->bbJumpKind == BBJ_RETURN))
{
- //this block cannot have a poll
+ // this block cannot have a poll
noway_assert(!(block->bbFlags & BBF_NEEDS_GCPOLL));
}
@@ -19981,7 +20020,8 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */,
#endif // FEATURE_EH_FUNCLETS
// Don't check cheap preds.
- for (pred = (fgCheapPredsValid ? nullptr : block->bbPreds); pred != nullptr; blockRefs += pred->flDupCount, pred = pred->flNext)
+ for (pred = (fgCheapPredsValid ? nullptr : block->bbPreds); pred != nullptr;
+ blockRefs += pred->flDupCount, pred = pred->flNext)
{
assert(fgComputePredsDone); // If this isn't set, why do we have a preds list?
@@ -19994,95 +20034,101 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */,
if (ehTryDsc != nullptr)
{
// You can jump to the start of a try
- if (ehTryDsc->ebdTryBeg == block) {
+ if (ehTryDsc->ebdTryBeg == block)
+ {
goto CHECK_HND;
-}
+ }
// You can jump within the same try region
- if (bbInTryRegions(block->getTryIndex(), blockPred)) {
+ if (bbInTryRegions(block->getTryIndex(), blockPred))
+ {
goto CHECK_HND;
-}
+ }
// The catch block can jump back into the middle of the try
- if (bbInCatchHandlerRegions(block, blockPred)) {
+ if (bbInCatchHandlerRegions(block, blockPred))
+ {
goto CHECK_HND;
-}
+ }
// The end of a finally region is a BBJ_EHFINALLYRET block (during importing, BBJ_LEAVE) which
// is marked as "returning" to the BBJ_ALWAYS block following the BBJ_CALLFINALLY
// block that does a local call to the finally. This BBJ_ALWAYS is within
// the try region protected by the finally (for x86, ARM), but that's ok.
- if (prevBlock->bbJumpKind == BBJ_CALLFINALLY &&
- block->bbJumpKind == BBJ_ALWAYS &&
- blockPred->bbJumpKind == BBJ_EHFINALLYRET) {
+ if (prevBlock->bbJumpKind == BBJ_CALLFINALLY && block->bbJumpKind == BBJ_ALWAYS &&
+ blockPred->bbJumpKind == BBJ_EHFINALLYRET)
+ {
goto CHECK_HND;
-}
+ }
- printf("Jump into the middle of try region: BB%02u branches to BB%02u\n", blockPred->bbNum, block->bbNum);
+ printf("Jump into the middle of try region: BB%02u branches to BB%02u\n", blockPred->bbNum,
+ block->bbNum);
noway_assert(!"Jump into middle of try region");
}
-CHECK_HND:;
+ CHECK_HND:;
EHblkDsc* ehHndDsc = ehGetBlockHndDsc(block);
if (ehHndDsc != nullptr)
{
// You can do a BBJ_EHFINALLYRET or BBJ_EHFILTERRET into a handler region
- if ( (blockPred->bbJumpKind == BBJ_EHFINALLYRET)
- || (blockPred->bbJumpKind == BBJ_EHFILTERRET)) {
+ if ((blockPred->bbJumpKind == BBJ_EHFINALLYRET) || (blockPred->bbJumpKind == BBJ_EHFILTERRET))
+ {
goto CHECK_JUMP;
-}
+ }
// Our try block can call our finally block
- if ((block->bbCatchTyp == BBCT_FINALLY) &&
- (blockPred->bbJumpKind == BBJ_CALLFINALLY) &&
+ if ((block->bbCatchTyp == BBCT_FINALLY) && (blockPred->bbJumpKind == BBJ_CALLFINALLY) &&
ehCallFinallyInCorrectRegion(blockPred, block->getHndIndex()))
{
goto CHECK_JUMP;
}
// You can jump within the same handler region
- if (bbInHandlerRegions(block->getHndIndex(), blockPred)) {
+ if (bbInHandlerRegions(block->getHndIndex(), blockPred))
+ {
goto CHECK_JUMP;
-}
+ }
// A filter can jump to the start of the filter handler
- if (ehHndDsc->HasFilter()) {
+ if (ehHndDsc->HasFilter())
+ {
goto CHECK_JUMP;
-}
+ }
- printf("Jump into the middle of handler region: BB%02u branches to BB%02u\n", blockPred->bbNum, block->bbNum);
+ printf("Jump into the middle of handler region: BB%02u branches to BB%02u\n", blockPred->bbNum,
+ block->bbNum);
noway_assert(!"Jump into the middle of handler region");
}
-CHECK_JUMP:;
+ CHECK_JUMP:;
switch (blockPred->bbJumpKind)
{
- case BBJ_COND:
- noway_assert(blockPred->bbNext == block || blockPred->bbJumpDest == block);
- break;
+ case BBJ_COND:
+ noway_assert(blockPred->bbNext == block || blockPred->bbJumpDest == block);
+ break;
- case BBJ_NONE:
- noway_assert(blockPred->bbNext == block);
- break;
+ case BBJ_NONE:
+ noway_assert(blockPred->bbNext == block);
+ break;
- case BBJ_CALLFINALLY:
- case BBJ_ALWAYS:
- case BBJ_EHCATCHRET:
- case BBJ_EHFILTERRET:
- noway_assert(blockPred->bbJumpDest == block);
- break;
+ case BBJ_CALLFINALLY:
+ case BBJ_ALWAYS:
+ case BBJ_EHCATCHRET:
+ case BBJ_EHFILTERRET:
+ noway_assert(blockPred->bbJumpDest == block);
+ break;
- case BBJ_EHFINALLYRET:
+ case BBJ_EHFINALLYRET:
{
// If the current block is a successor to a BBJ_EHFINALLYRET (return from finally),
// then the lexically previous block should be a call to the same finally.
// Verify all of that.
- unsigned hndIndex = blockPred->getHndIndex();
- EHblkDsc* ehDsc = ehGetDsc(hndIndex);
- BasicBlock* finBeg = ehDsc->ebdHndBeg;
+ unsigned hndIndex = blockPred->getHndIndex();
+ EHblkDsc* ehDsc = ehGetDsc(hndIndex);
+ BasicBlock* finBeg = ehDsc->ebdHndBeg;
// Because there is no bbPrev, we have to search for the lexically previous
// block. We can shorten the search by only looking in places where it is legal
@@ -20094,13 +20140,15 @@ CHECK_JUMP:;
for (BasicBlock* bcall = begBlk; bcall != endBlk; bcall = bcall->bbNext)
{
- if (bcall->bbJumpKind != BBJ_CALLFINALLY || bcall->bbJumpDest != finBeg) {
+ if (bcall->bbJumpKind != BBJ_CALLFINALLY || bcall->bbJumpDest != finBeg)
+ {
continue;
-}
+ }
- if (block == bcall->bbNext) {
+ if (block == bcall->bbNext)
+ {
goto PRED_OK;
- }
+ }
}
#if FEATURE_EH_FUNCLETS
@@ -20114,19 +20162,22 @@ CHECK_JUMP:;
for (BasicBlock* bcall = fgFirstFuncletBB; bcall; bcall = bcall->bbNext)
{
- if (bcall->bbJumpKind != BBJ_CALLFINALLY || bcall->bbJumpDest != finBeg) {
+ if (bcall->bbJumpKind != BBJ_CALLFINALLY || bcall->bbJumpDest != finBeg)
+ {
continue;
-}
+ }
- if (block != bcall->bbNext) {
+ if (block != bcall->bbNext)
+ {
continue;
-}
+ }
- if (ehCallFinallyInCorrectRegion(bcall, hndIndex)) {
+ if (ehCallFinallyInCorrectRegion(bcall, hndIndex))
+ {
goto PRED_OK;
+ }
}
}
- }
#endif // FEATURE_EH_FUNCLETS
@@ -20134,34 +20185,34 @@ CHECK_JUMP:;
}
break;
- case BBJ_THROW:
- case BBJ_RETURN:
- noway_assert(!"THROW and RETURN block cannot be in the predecessor list!");
- break;
+ case BBJ_THROW:
+ case BBJ_RETURN:
+ noway_assert(!"THROW and RETURN block cannot be in the predecessor list!");
+ break;
- case BBJ_SWITCH:
- unsigned jumpCnt; jumpCnt = blockPred->bbJumpSwt->bbsCount;
- BasicBlock** jumpTab; jumpTab = blockPred->bbJumpSwt->bbsDstTab;
+ case BBJ_SWITCH:
+ unsigned jumpCnt;
+ jumpCnt = blockPred->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTab;
+ jumpTab = blockPred->bbJumpSwt->bbsDstTab;
- do
- {
- if (block == *jumpTab)
+ do
{
- goto PRED_OK;
- }
- }
- while (++jumpTab, --jumpCnt);
+ if (block == *jumpTab)
+ {
+ goto PRED_OK;
+ }
+ } while (++jumpTab, --jumpCnt);
- noway_assert(!"SWITCH in the predecessor list with no jump label to BLOCK!");
- break;
+ noway_assert(!"SWITCH in the predecessor list with no jump label to BLOCK!");
+ break;
- default:
- noway_assert(!"Unexpected bbJumpKind");
- break;
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
}
-PRED_OK:;
-
+ PRED_OK:;
}
/* Check the bbRefs */
@@ -20200,7 +20251,7 @@ PRED_OK:;
copiedForGenericsCtxt = ((info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_THIS) != 0);
#else // JIT32_GCENCODER
copiedForGenericsCtxt = FALSE;
-#endif // JIT32_GCENCODER
+#endif // JIT32_GCENCODER
// This if only in support of the noway_asserts it contains.
if (info.compIsStatic)
@@ -20213,16 +20264,18 @@ PRED_OK:;
// For instance method:
assert(info.compThisArg != BAD_VAR_NUM);
bool compThisArgAddrExposedOK = !lvaTable[info.compThisArg].lvAddrExposed;
+
#ifndef JIT32_GCENCODER
compThisArgAddrExposedOK = compThisArgAddrExposedOK || copiedForGenericsCtxt;
-#endif // !JIT32_GCENCODER
- noway_assert(compThisArgAddrExposedOK && // should never expose the address of arg 0 or
- !lvaTable[info.compThisArg].lvArgWrite && // write to arg 0.
- ( // In addition,
- lvaArg0Var == info.compThisArg || // lvArg0Var should remain 0 if arg0 is not written to or address-exposed.
- lvaArg0Var != info.compThisArg &&
- (lvaTable[lvaArg0Var].lvAddrExposed || lvaTable[lvaArg0Var].lvArgWrite || copiedForGenericsCtxt)
- ));
+#endif // !JIT32_GCENCODER
+
+ // Should never expose the address of arg 0 or write to arg 0.
+ // In addition, lvArg0Var should remain 0 if arg0 is not
+ // written to or address-exposed.
+ noway_assert(compThisArgAddrExposedOK && !lvaTable[info.compThisArg].lvArgWrite &&
+ (lvaArg0Var == info.compThisArg ||
+ lvaArg0Var != info.compThisArg && (lvaTable[lvaArg0Var].lvAddrExposed ||
+ lvaTable[lvaArg0Var].lvArgWrite || copiedForGenericsCtxt)));
}
}
@@ -20232,40 +20285,40 @@ PRED_OK:;
*
****************************************************************************/
-void Compiler::fgDebugCheckFlags(GenTreePtr tree)
+void Compiler::fgDebugCheckFlags(GenTreePtr tree)
{
noway_assert(tree->gtOper != GT_STMT);
- genTreeOps oper = tree->OperGet();
- unsigned kind = tree->OperKind();
- unsigned treeFlags = tree->gtFlags & GTF_ALL_EFFECT;
- unsigned chkFlags = 0;
+ genTreeOps oper = tree->OperGet();
+ unsigned kind = tree->OperKind();
+ unsigned treeFlags = tree->gtFlags & GTF_ALL_EFFECT;
+ unsigned chkFlags = 0;
/* Is this a leaf node? */
- if (kind & GTK_LEAF)
+ if (kind & GTK_LEAF)
{
switch (oper)
{
- case GT_CLS_VAR:
- chkFlags |= GTF_GLOB_REF;
- break;
+ case GT_CLS_VAR:
+ chkFlags |= GTF_GLOB_REF;
+ break;
- case GT_CATCH_ARG:
- chkFlags |= GTF_ORDER_SIDEEFF;
- break;
+ case GT_CATCH_ARG:
+ chkFlags |= GTF_ORDER_SIDEEFF;
+ break;
- default:
- break;
+ default:
+ break;
}
}
/* Is it a 'simple' unary/binary operator? */
- else if (kind & GTK_SMPOP)
+ else if (kind & GTK_SMPOP)
{
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
// During GS work, we make shadow copies for params.
// In gsParamsToShadows(), we create a shadow var of TYP_INT for every small type param.
@@ -20275,48 +20328,88 @@ void Compiler::fgDebugCheckFlags(GenTreePtr tree)
// TYP_INT up to the GT_ASG tree is only correct if we don't need to propagate the TYP_INT back up.
// The following checks will ensure this.
- // Is the left child of "tree" a GT_ASG?,
+ // Is the left child of "tree" a GT_ASG?
+ //
+ // If parent is a TYP_VOID, we don't no need to propagate TYP_INT up. We are fine.
+ // (or) If GT_ASG is the left child of a GT_COMMA, the type of the GT_COMMA node will
+ // be determined by its right child. So we don't need to propagate TYP_INT up either. We are fine.
if (op1 && op1->gtOper == GT_ASG)
{
- assert(tree->gtType == TYP_VOID || // If parent is a TYP_VOID, we don't no need to propagate TYP_INT up. We are fine.
- tree->gtOper == GT_COMMA); // (or) If GT_ASG is the left child of a GT_COMMA, the type of the GT_COMMA node will
- } // be determined by its right child. So we don't need to propagate TYP_INT up either. We are fine.
+ assert(tree->gtType == TYP_VOID || tree->gtOper == GT_COMMA);
+ }
- // Is the right child of "tree" a GT_ASG?,
+ // Is the right child of "tree" a GT_ASG?
+ //
+ // If parent is a TYP_VOID, we don't no need to propagate TYP_INT up. We are fine.
if (op2 && op2->gtOper == GT_ASG)
{
- assert(tree->gtType == TYP_VOID); // If parent is a TYP_VOID, we don't no need to propagate TYP_INT up. We are fine.
+ assert(tree->gtType == TYP_VOID);
}
switch (oper)
{
- case GT_QMARK:
- if (op1->OperIsCompare())
- {
- noway_assert(op1->gtFlags & GTF_DONT_CSE);
- }
- else
- {
- noway_assert( (op1->gtOper == GT_CNS_INT) &&
- ((op1->gtIntCon.gtIconVal == 0) || (op1->gtIntCon.gtIconVal == 1)) );
- }
- break;
+ case GT_QMARK:
+ if (op1->OperIsCompare())
+ {
+ noway_assert(op1->gtFlags & GTF_DONT_CSE);
+ }
+ else
+ {
+ noway_assert((op1->gtOper == GT_CNS_INT) &&
+ ((op1->gtIntCon.gtIconVal == 0) || (op1->gtIntCon.gtIconVal == 1)));
+ }
+ break;
- default:
- break;
+ case GT_LIST:
+ case GT_FIELD_LIST:
+ if ((op2 != nullptr) && op2->OperIsAnyList())
+ {
+ ArrayStack<GenTree*> stack(this);
+ while ((tree->gtGetOp2() != nullptr) && tree->gtGetOp2()->OperIsAnyList())
+ {
+ stack.Push(tree);
+ tree = tree->gtGetOp2();
+ }
+
+ fgDebugCheckFlags(tree);
+
+ while (stack.Height() > 0)
+ {
+ tree = stack.Pop();
+ assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
+ fgDebugCheckFlags(tree->gtOp.gtOp1);
+ chkFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
+ chkFlags |= (tree->gtGetOp2()->gtFlags & GTF_ALL_EFFECT);
+ fgDebugCheckFlagsHelper(tree, (tree->gtFlags & GTF_ALL_EFFECT), chkFlags);
+ }
+
+ return;
+ }
+ break;
+
+ default:
+ break;
}
/* Recursively check the subtrees */
- if (op1) { fgDebugCheckFlags(op1);
-}
- if (op2) { fgDebugCheckFlags(op2);
-}
+ if (op1)
+ {
+ fgDebugCheckFlags(op1);
+ }
+ if (op2)
+ {
+ fgDebugCheckFlags(op2);
+ }
- if (op1) { chkFlags |= (op1->gtFlags & GTF_ALL_EFFECT);
-}
- if (op2) { chkFlags |= (op2->gtFlags & GTF_ALL_EFFECT);
-}
+ if (op1)
+ {
+ chkFlags |= (op1->gtFlags & GTF_ALL_EFFECT);
+ }
+ if (op2)
+ {
+ chkFlags |= (op2->gtFlags & GTF_ALL_EFFECT);
+ }
// We reuse the value of GTF_REVERSE_OPS for a GT_IND-specific flag,
// so exempt that (unary) operator.
@@ -20331,7 +20424,7 @@ void Compiler::fgDebugCheckFlags(GenTreePtr tree)
was set and thus GTF_ASG cannot be considered here. */
/* For a GT_ASG(GT_IND(x), y) we are interested in the side effects of x */
- GenTreePtr op1p;
+ GenTreePtr op1p;
if ((kind & GTK_ASGOP) && (op1->gtOper == GT_IND))
{
op1p = op1->gtOp.gtOp1;
@@ -20355,20 +20448,18 @@ void Compiler::fgDebugCheckFlags(GenTreePtr tree)
if (kind & GTK_ASGOP)
{
- chkFlags |= GTF_ASG;
+ chkFlags |= GTF_ASG;
}
/* Note that it is OK for treeFlags not to have a GTF_EXCEPT,
AssertionProp's non-Null may have cleared it */
if (tree->OperMayThrow())
{
- chkFlags |= (treeFlags & GTF_EXCEPT);
+ chkFlags |= (treeFlags & GTF_EXCEPT);
}
- if (oper == GT_ADDR &&
- (op1->OperIsLocal() ||
- op1->gtOper == GT_CLS_VAR ||
- (op1->gtOper == GT_IND && op1->gtOp.gtOp1->gtOper == GT_CLS_VAR_ADDR)))
+ if (oper == GT_ADDR && (op1->OperIsLocal() || op1->gtOper == GT_CLS_VAR ||
+ (op1->gtOper == GT_IND && op1->gtOp.gtOp1->gtOper == GT_CLS_VAR_ADDR)))
{
/* &aliasedVar doesn't need GTF_GLOB_REF, though alisasedVar does.
Similarly for clsVar */
@@ -20378,131 +20469,149 @@ void Compiler::fgDebugCheckFlags(GenTreePtr tree)
/* See what kind of a special operator we have here */
- else { switch (tree->OperGet())
+ else
{
- case GT_CALL:
+ switch (tree->OperGet())
+ {
+ case GT_CALL:
- GenTreePtr args;
- GenTreePtr argx;
- GenTreeCall* call;
-
- call = tree->AsCall();
+ GenTreePtr args;
+ GenTreePtr argx;
+ GenTreeCall* call;
- chkFlags |= GTF_CALL;
+ call = tree->AsCall();
- if ((treeFlags & GTF_EXCEPT) && !(chkFlags & GTF_EXCEPT))
- {
- switch (eeGetHelperNum(tree->gtCall.gtCallMethHnd))
- {
- // Is this a helper call that can throw an exception ?
- case CORINFO_HELP_LDIV:
- case CORINFO_HELP_LMOD:
- case CORINFO_HELP_METHOD_ACCESS_CHECK:
- case CORINFO_HELP_FIELD_ACCESS_CHECK:
- case CORINFO_HELP_CLASS_ACCESS_CHECK:
- case CORINFO_HELP_DELEGATE_SECURITY_CHECK:
- chkFlags |= GTF_EXCEPT;
- break;
- default:
- break;
- }
- }
+ chkFlags |= GTF_CALL;
- if (call->gtCallObjp)
- {
- fgDebugCheckFlags(call->gtCallObjp);
- chkFlags |= (call->gtCallObjp->gtFlags & GTF_SIDE_EFFECT);
+ if ((treeFlags & GTF_EXCEPT) && !(chkFlags & GTF_EXCEPT))
+ {
+ switch (eeGetHelperNum(tree->gtCall.gtCallMethHnd))
+ {
+ // Is this a helper call that can throw an exception ?
+ case CORINFO_HELP_LDIV:
+ case CORINFO_HELP_LMOD:
+ case CORINFO_HELP_METHOD_ACCESS_CHECK:
+ case CORINFO_HELP_FIELD_ACCESS_CHECK:
+ case CORINFO_HELP_CLASS_ACCESS_CHECK:
+ case CORINFO_HELP_DELEGATE_SECURITY_CHECK:
+ chkFlags |= GTF_EXCEPT;
+ break;
+ default:
+ break;
+ }
+ }
- if (call->gtCallObjp->gtFlags & GTF_ASG)
- {
- treeFlags |= GTF_ASG;
- }
- }
+ if (call->gtCallObjp)
+ {
+ fgDebugCheckFlags(call->gtCallObjp);
+ chkFlags |= (call->gtCallObjp->gtFlags & GTF_SIDE_EFFECT);
- for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2)
- {
- argx = args->gtOp.gtOp1;
- fgDebugCheckFlags(argx);
+ if (call->gtCallObjp->gtFlags & GTF_ASG)
+ {
+ treeFlags |= GTF_ASG;
+ }
+ }
- chkFlags |= (argx->gtFlags & GTF_SIDE_EFFECT);
+ for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2)
+ {
+ argx = args->gtOp.gtOp1;
+ fgDebugCheckFlags(argx);
- if (argx->gtFlags & GTF_ASG)
- {
- treeFlags |= GTF_ASG;
- }
- }
+ chkFlags |= (argx->gtFlags & GTF_SIDE_EFFECT);
- for (args = call->gtCallLateArgs; args; args = args->gtOp.gtOp2)
- {
- argx = args->gtOp.gtOp1;
- fgDebugCheckFlags(argx);
+ if (argx->gtFlags & GTF_ASG)
+ {
+ treeFlags |= GTF_ASG;
+ }
+ }
- chkFlags |= (argx->gtFlags & GTF_SIDE_EFFECT);
+ for (args = call->gtCallLateArgs; args; args = args->gtOp.gtOp2)
+ {
+ argx = args->gtOp.gtOp1;
+ fgDebugCheckFlags(argx);
- if (argx->gtFlags & GTF_ASG)
- {
- treeFlags |= GTF_ASG;
- }
- }
+ chkFlags |= (argx->gtFlags & GTF_SIDE_EFFECT);
- if ((call->gtCallType == CT_INDIRECT) && (call->gtCallCookie != nullptr))
- {
- fgDebugCheckFlags(call->gtCallCookie);
- chkFlags |= (call->gtCallCookie->gtFlags & GTF_SIDE_EFFECT);
- }
+ if (argx->gtFlags & GTF_ASG)
+ {
+ treeFlags |= GTF_ASG;
+ }
+ }
- if (call->gtCallType == CT_INDIRECT)
- {
- fgDebugCheckFlags(call->gtCallAddr);
- chkFlags |= (call->gtCallAddr->gtFlags & GTF_SIDE_EFFECT);
- }
+ if ((call->gtCallType == CT_INDIRECT) && (call->gtCallCookie != nullptr))
+ {
+ fgDebugCheckFlags(call->gtCallCookie);
+ chkFlags |= (call->gtCallCookie->gtFlags & GTF_SIDE_EFFECT);
+ }
- if (call->IsUnmanaged() &&
- (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL))
- {
- if (call->gtCallArgs->gtOp.gtOp1->OperGet() == GT_NOP)
- {
- noway_assert(call->gtCallLateArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL ||
- call->gtCallLateArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF);
- }
- else
- {
- noway_assert(call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL ||
- call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF);
- }
- }
- break;
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ fgDebugCheckFlags(call->gtCallAddr);
+ chkFlags |= (call->gtCallAddr->gtFlags & GTF_SIDE_EFFECT);
+ }
- case GT_ARR_ELEM:
+ if (call->IsUnmanaged() && (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL))
+ {
+ if (call->gtCallArgs->gtOp.gtOp1->OperGet() == GT_NOP)
+ {
+ noway_assert(call->gtCallLateArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL ||
+ call->gtCallLateArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF);
+ }
+ else
+ {
+ noway_assert(call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL ||
+ call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF);
+ }
+ }
+ break;
- GenTreePtr arrObj;
- unsigned dim;
+ case GT_ARR_ELEM:
- arrObj = tree->gtArrElem.gtArrObj;
- fgDebugCheckFlags(arrObj);
- chkFlags |= (arrObj->gtFlags & GTF_ALL_EFFECT);
+ GenTreePtr arrObj;
+ unsigned dim;
- for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
- {
- fgDebugCheckFlags(tree->gtArrElem.gtArrInds[dim]);
- chkFlags |= tree->gtArrElem.gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT;
- }
- break;
+ arrObj = tree->gtArrElem.gtArrObj;
+ fgDebugCheckFlags(arrObj);
+ chkFlags |= (arrObj->gtFlags & GTF_ALL_EFFECT);
- case GT_ARR_OFFSET:
- fgDebugCheckFlags(tree->gtArrOffs.gtOffset);
- chkFlags |= (tree->gtArrOffs.gtOffset->gtFlags & GTF_ALL_EFFECT);
- fgDebugCheckFlags(tree->gtArrOffs.gtIndex);
- chkFlags |= (tree->gtArrOffs.gtIndex->gtFlags & GTF_ALL_EFFECT);
- fgDebugCheckFlags(tree->gtArrOffs.gtArrObj);
- chkFlags |= (tree->gtArrOffs.gtArrObj->gtFlags & GTF_ALL_EFFECT);
- break;
+ for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+ {
+ fgDebugCheckFlags(tree->gtArrElem.gtArrInds[dim]);
+ chkFlags |= tree->gtArrElem.gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT;
+ }
+ break;
- default:
- break;
+ case GT_ARR_OFFSET:
+ fgDebugCheckFlags(tree->gtArrOffs.gtOffset);
+ chkFlags |= (tree->gtArrOffs.gtOffset->gtFlags & GTF_ALL_EFFECT);
+ fgDebugCheckFlags(tree->gtArrOffs.gtIndex);
+ chkFlags |= (tree->gtArrOffs.gtIndex->gtFlags & GTF_ALL_EFFECT);
+ fgDebugCheckFlags(tree->gtArrOffs.gtArrObj);
+ chkFlags |= (tree->gtArrOffs.gtArrObj->gtFlags & GTF_ALL_EFFECT);
+ break;
+
+ default:
+ break;
+ }
}
+
+ fgDebugCheckFlagsHelper(tree, treeFlags, chkFlags);
}
+//------------------------------------------------------------------------------
+// fgDebugCheckFlagsHelper : Check if all bits that are set in chkFlags are also set in treeFlags.
+//
+//
+// Arguments:
+// tree - Tree whose flags are being checked
+// treeFlags - Actual flags on the tree
+// chkFlags - Expected flags
+//
+// Note:
+// Checking that all bits that are set in treeFlags are also set in chkFlags is currently disabled.
+
+void Compiler::fgDebugCheckFlagsHelper(GenTreePtr tree, unsigned treeFlags, unsigned chkFlags)
+{
if (chkFlags & ~treeFlags)
{
// Print the tree so we can see it in the log.
@@ -20524,12 +20633,12 @@ void Compiler::fgDebugCheckFlags(GenTreePtr tree)
#if 0
// TODO-Cleanup:
/* The tree has extra flags set. However, this will happen if we
- replace a subtree with something, but don't clear the flags up
- the tree. Can't flag this unless we start clearing flags above.
+ replace a subtree with something, but don't clear the flags up
+ the tree. Can't flag this unless we start clearing flags above.
- Note: we need this working for GTF_CALL and CSEs, so I'm enabling
- it for calls.
- */
+ Note: we need this working for GTF_CALL and CSEs, so I'm enabling
+ it for calls.
+ */
if (tree->OperGet() != GT_CALL && (treeFlags & GTF_CALL) && !(chkFlags & GTF_CALL))
{
// Print the tree so we can see it in the log.
@@ -20545,7 +20654,7 @@ void Compiler::fgDebugCheckFlags(GenTreePtr tree)
GenTree::gtDispFlags(treeFlags & ~chkFlags, GTF_DEBUG_NONE);
printf("\n");
gtDispTree(tree);
- }
+ }
#endif // 0
}
}
@@ -20569,14 +20678,13 @@ void Compiler::fgDebugCheckNodeLinks(BasicBlock* block, GenTree* node)
noway_assert(stmt->gtStmtList);
// The first node's gtPrev must be nullptr (the gtPrev list is not circular).
- // The last node's gtNext must be nullptr (the gtNext list is not circular). This is tested if the loop below terminates.
+ // The last node's gtNext must be nullptr (the gtNext list is not circular). This is tested if the loop below
+ // terminates.
assert(stmt->gtStmtList->gtPrev == nullptr);
- for (GenTreePtr tree = stmt->gtStmtList;
- tree != nullptr;
- tree = tree->gtNext)
+ for (GenTreePtr tree = stmt->gtStmtList; tree != nullptr; tree = tree->gtNext)
{
- if (tree->gtPrev)
+ if (tree->gtPrev)
{
noway_assert(tree->gtPrev->gtNext == tree);
}
@@ -20585,7 +20693,7 @@ void Compiler::fgDebugCheckNodeLinks(BasicBlock* block, GenTree* node)
noway_assert(tree == stmt->gtStmtList);
}
- if (tree->gtNext)
+ if (tree->gtNext)
{
noway_assert(tree->gtNext->gtPrev == tree);
}
@@ -20621,40 +20729,40 @@ void Compiler::fgDebugCheckNodeLinks(BasicBlock* block, GenTree* node)
{
switch (tree->gtOper)
{
- case GT_QMARK:
- expectedPrevTree = tree->gtOp.gtOp2->AsColon()->ThenNode(); // "then" operand of the GT_COLON (generated second).
- break;
+ case GT_QMARK:
+ expectedPrevTree =
+ tree->gtOp.gtOp2->AsColon()->ThenNode(); // "then" operand of the GT_COLON (generated second).
+ break;
- case GT_COLON:
- expectedPrevTree = tree->AsColon()->ElseNode(); // "else" branch result (generated first).
- break;
+ case GT_COLON:
+ expectedPrevTree = tree->AsColon()->ElseNode(); // "else" branch result (generated first).
+ break;
- default:
- if (tree->gtOp.gtOp2)
- {
- if (tree->gtFlags & GTF_REVERSE_OPS)
+ default:
+ if (tree->gtOp.gtOp2)
{
- expectedPrevTree = tree->gtOp.gtOp1;
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ expectedPrevTree = tree->gtOp.gtOp1;
+ }
+ else
+ {
+ expectedPrevTree = tree->gtOp.gtOp2;
+ }
}
else
{
- expectedPrevTree = tree->gtOp.gtOp2;
+ expectedPrevTree = tree->gtOp.gtOp1;
}
- }
- else
- {
- expectedPrevTree = tree->gtOp.gtOp1;
- }
- break;
+ break;
}
}
- noway_assert(expectedPrevTree == nullptr || // No expectations about the prev node
- tree->gtPrev == expectedPrevTree); // The "normal" case
+ noway_assert(expectedPrevTree == nullptr || // No expectations about the prev node
+ tree->gtPrev == expectedPrevTree); // The "normal" case
}
}
-
/*****************************************************************************
*
* A DEBUG routine to check the correctness of the links between GT_STMT nodes
@@ -20662,15 +20770,14 @@ void Compiler::fgDebugCheckNodeLinks(BasicBlock* block, GenTree* node)
*
****************************************************************************/
-void Compiler::fgDebugCheckLinks(bool morphTrees)
+void Compiler::fgDebugCheckLinks(bool morphTrees)
{
// This used to be only on for stress, and there was a comment stating that
// it was "quite an expensive operation" but I did not find that to be true.
// Set DO_SANITY_DEBUG_CHECKS to false to revert to that behavior.
const bool DO_SANITY_DEBUG_CHECKS = true;
- if (!DO_SANITY_DEBUG_CHECKS &&
- !compStressCompile(STRESS_CHK_FLOW_UPDATE, 30))
+ if (!DO_SANITY_DEBUG_CHECKS && !compStressCompile(STRESS_CHK_FLOW_UPDATE, 30))
{
return;
}
@@ -20680,7 +20787,7 @@ void Compiler::fgDebugCheckLinks(bool morphTrees)
/* For each basic block check the bbTreeList links */
for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
{
-PROCESS_BLOCK_AGAIN:;
+ PROCESS_BLOCK_AGAIN:;
if (block->IsLIR())
{
LIR::AsRange(block).CheckLIR(this);
@@ -20690,11 +20797,12 @@ PROCESS_BLOCK_AGAIN:;
for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
{
/* Verify that bbTreeList is threaded correctly */
- /* Note that for the GT_STMT list, the gtPrev list is circular. The gtNext list is not: gtNext of the last GT_STMT in a block is nullptr. */
+ /* Note that for the GT_STMT list, the gtPrev list is circular. The gtNext list is not: gtNext of the
+ * last GT_STMT in a block is nullptr. */
noway_assert(stmt->gtPrev);
- if (stmt == block->bbTreeList)
+ if (stmt == block->bbTreeList)
{
noway_assert(stmt->gtPrev->gtNext == nullptr);
}
@@ -20703,7 +20811,7 @@ PROCESS_BLOCK_AGAIN:;
noway_assert(stmt->gtPrev->gtNext == stmt);
}
- if (stmt->gtNext)
+ if (stmt->gtNext)
{
noway_assert(stmt->gtNext->gtPrev == stmt);
}
@@ -20782,9 +20890,9 @@ void Compiler::fgDebugCheckBlockLinks()
// Create a set with all the successors. Don't use BlockSet, so we don't need to worry
// about the BlockSet epoch.
BitVecTraits bitVecTraits(fgBBNumMax + 1, this);
- BitVec BITVEC_INIT_NOCOPY(succBlocks, BitVecOps::MakeEmpty(&bitVecTraits));
+ BitVec BITVEC_INIT_NOCOPY(succBlocks, BitVecOps::MakeEmpty(&bitVecTraits));
BasicBlock** jumpTable = block->bbJumpSwt->bbsDstTab;
- unsigned jumpCount = block->bbJumpSwt->bbsCount;
+ unsigned jumpCount = block->bbJumpSwt->bbsCount;
for (unsigned i = 0; i < jumpCount; i++)
{
BitVecOps::AddElemD(&bitVecTraits, succBlocks, jumpTable[i]->bbNum);
@@ -20822,10 +20930,10 @@ void Compiler::fgDebugCheckBlockLinks()
// Likewise the depth limit is a policy consideration, and serves mostly
// as a safeguard to prevent runaway inlining of small methods.
-unsigned Compiler::fgCheckInlineDepthAndRecursion(InlineInfo* inlineInfo)
+unsigned Compiler::fgCheckInlineDepthAndRecursion(InlineInfo* inlineInfo)
{
BYTE* candidateCode = inlineInfo->inlineCandidateInfo->methInfo.ILCode;
- InlineContext* inlineContext = inlineInfo->iciStmt->gtStmt.gtInlineContext;
+ InlineContext* inlineContext = inlineInfo->iciStmt->gtInlineContext;
InlineResult* inlineResult = inlineInfo->inlineResult;
// There should be a context for all candidates.
@@ -20860,17 +20968,18 @@ unsigned Compiler::fgCheckInlineDepthAndRecursion(InlineInfo* inlineInfo)
* Inlining phase
*/
-
-void Compiler::fgInline()
+void Compiler::fgInline()
{
- if (!opts.OptEnabled(CLFLG_INLINING)) {
+ if (!opts.OptEnabled(CLFLG_INLINING))
+ {
return;
-}
+ }
#ifdef DEBUG
- if (verbose) {
+ if (verbose)
+ {
printf("*************** In fgInline()\n");
-}
+ }
#endif // DEBUG
BasicBlock* block = fgFirstBB;
@@ -20881,9 +20990,7 @@ void Compiler::fgInline()
for (; block != nullptr; block = block->bbNext)
{
- for (GenTreeStmt* stmt = block->firstStmt();
- stmt;
- stmt = stmt->gtNextStmt)
+ for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
{
stmt->gtInlineContext = rootContext;
}
@@ -20901,9 +21008,7 @@ void Compiler::fgInline()
GenTreeStmt* stmt;
GenTreePtr expr;
- for (stmt = block->firstStmt();
- stmt != nullptr;
- stmt = stmt->gtNextStmt)
+ for (stmt = block->firstStmt(); stmt != nullptr; stmt = stmt->gtNextStmt)
{
expr = stmt->gtStmtExpr;
@@ -20932,14 +21037,11 @@ void Compiler::fgInline()
}
// See if we need to replace the return value place holder.
- fgWalkTreePre(&stmt->gtStmtExpr,
- fgUpdateInlineReturnExpressionPlaceHolder,
- (void *) this);
+ fgWalkTreePre(&stmt->gtStmtExpr, fgUpdateInlineReturnExpressionPlaceHolder, (void*)this);
// See if stmt is of the form GT_COMMA(call, nop)
- // If yes, we can get rid of GT_COMMA.
- if (expr->OperGet() == GT_COMMA &&
- expr->gtOp.gtOp1->OperGet() == GT_CALL &&
+ // If yes, we can get rid of GT_COMMA.
+ if (expr->OperGet() == GT_COMMA && expr->gtOp.gtOp1->OperGet() == GT_CALL &&
expr->gtOp.gtOp2->OperGet() == GT_NOP)
{
stmt->gtStmtExpr = expr->gtOp.gtOp1;
@@ -20961,9 +21063,7 @@ void Compiler::fgInline()
{
GenTreeStmt* stmt;
- for (stmt = block->firstStmt();
- stmt;
- stmt = stmt->gtNextStmt)
+ for (stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
{
// Call Compiler::fgDebugCheckInlineCandidates on each node
fgWalkTreePre(&stmt->gtStmtExpr, fgDebugCheckInlineCandidates);
@@ -20975,17 +21075,17 @@ void Compiler::fgInline()
fgVerifyHandlerTab();
- if (verbose)
+ if (verbose)
{
printf("*************** After fgInline()\n");
fgDispBasicBlocks(true);
fgDispHandlerTab();
}
- if (verbose || fgPrintInlinedMethods)
+ if (verbose || fgPrintInlinedMethods)
{
- printf("**************** Inline Tree\n");
- m_inlineStrategy->Dump();
+ printf("**************** Inline Tree\n");
+ m_inlineStrategy->Dump();
}
#endif // DEBUG
@@ -21007,14 +21107,13 @@ void Compiler::fgInline()
// Note:
// Invokes fgNoteNonInlineCandidate on the nodes it finds.
-Compiler::fgWalkResult Compiler::fgFindNonInlineCandidate(GenTreePtr* pTree,
- fgWalkData* data)
+Compiler::fgWalkResult Compiler::fgFindNonInlineCandidate(GenTreePtr* pTree, fgWalkData* data)
{
GenTreePtr tree = *pTree;
if (tree->gtOper == GT_CALL)
{
Compiler* compiler = data->compiler;
- GenTreePtr stmt = (GenTreePtr) data->pCallbackData;
+ GenTreeStmt* stmt = (GenTreeStmt*)data->pCallbackData;
GenTreeCall* call = tree->AsCall();
compiler->fgNoteNonInlineCandidate(stmt, call);
@@ -21027,17 +21126,16 @@ Compiler::fgWalkResult Compiler::fgFindNonInlineCandidate(GenTreePtr* pTree
// not marked as inline candidates.
//
// Arguments:
-// tree - statement containing the call
+// stmt - statement containing the call
// call - the call itself
//
// Notes:
// Used in debug only to try and place descriptions of inline failures
// into the proper context in the inline tree.
-void Compiler::fgNoteNonInlineCandidate(GenTreePtr tree,
- GenTreeCall* call)
+void Compiler::fgNoteNonInlineCandidate(GenTreeStmt* stmt, GenTreeCall* call)
{
- InlineResult inlineResult(this, call, nullptr, "fgNotInlineCandidate");
+ InlineResult inlineResult(this, call, nullptr, "fgNotInlineCandidate");
InlineObservation currentObservation = InlineObservation::CALLSITE_NOT_CANDIDATE;
// Try and recover the reason left behind when the jit decided
@@ -21070,7 +21168,7 @@ void Compiler::fgNoteNonInlineCandidate(GenTreePtr tree,
if (call->gtCallType == CT_USER_FUNC)
{
// Create InlineContext for the failure
- m_inlineStrategy->NewFailure(tree, &inlineResult);
+ m_inlineStrategy->NewFailure(stmt, &inlineResult);
}
}
@@ -21088,12 +21186,8 @@ void Compiler::fgNoteNonInlineCandidate(GenTreePtr tree,
*/
GenTreePtr Compiler::fgGetStructAsStructPtr(GenTreePtr tree)
{
- noway_assert((tree->gtOper == GT_LCL_VAR) ||
- (tree->gtOper == GT_FIELD) ||
- (tree->gtOper == GT_IND) ||
- (tree->gtOper == GT_BLK) ||
- (tree->gtOper == GT_OBJ) ||
- tree->OperIsSIMD() ||
+ noway_assert((tree->gtOper == GT_LCL_VAR) || (tree->gtOper == GT_FIELD) || (tree->gtOper == GT_IND) ||
+ (tree->gtOper == GT_BLK) || (tree->gtOper == GT_OBJ) || tree->OperIsSIMD() ||
// tree->gtOper == GT_CALL || cannot get address of call.
// tree->gtOper == GT_MKREFANY || inlining should've been aborted due to mkrefany opcode.
// tree->gtOper == GT_RET_EXPR || cannot happen after fgUpdateInlineReturnExpressionPlaceHolder
@@ -21101,18 +21195,18 @@ GenTreePtr Compiler::fgGetStructAsStructPtr(GenTreePtr tree)
switch (tree->OperGet())
{
- case GT_BLK:
- case GT_OBJ:
- case GT_IND:
- return tree->gtOp.gtOp1;
+ case GT_BLK:
+ case GT_OBJ:
+ case GT_IND:
+ return tree->gtOp.gtOp1;
- case GT_COMMA:
- tree->gtOp.gtOp2 = fgGetStructAsStructPtr(tree->gtOp.gtOp2);
- tree->gtType = TYP_BYREF;
- return tree;
+ case GT_COMMA:
+ tree->gtOp.gtOp2 = fgGetStructAsStructPtr(tree->gtOp.gtOp2);
+ tree->gtType = TYP_BYREF;
+ return tree;
- default:
- return gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
+ default:
+ return gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
}
}
@@ -21137,15 +21231,15 @@ GenTreePtr Compiler::fgAssignStructInlineeToVar(GenTreePtr child, CORINFO_CLASS_
// we have a ", , , call()" -- this is very defensive as we may never get
// an inlinee that is made of commas. If the inlinee is not a call, then
// we use a copy block to do the assignment.
- GenTreePtr src = child;
- GenTreePtr lastComma = NULL;
+ GenTreePtr src = child;
+ GenTreePtr lastComma = nullptr;
while (src->gtOper == GT_COMMA)
{
lastComma = src;
- src = src->gtOp.gtOp2;
+ src = src->gtOp.gtOp2;
}
- GenTreePtr newInlinee = NULL;
+ GenTreePtr newInlinee = nullptr;
if (src->gtOper == GT_CALL)
{
// If inlinee was just a call, new inlinee is v05 = call()
@@ -21162,16 +21256,16 @@ GenTreePtr Compiler::fgAssignStructInlineeToVar(GenTreePtr child, CORINFO_CLASS_
if (child->gtOper == GT_COMMA)
{
lastComma->gtOp.gtOp2 = newInlinee;
- newInlinee = child;
+ newInlinee = child;
}
}
else
{
// Inlinee is not a call, so just create a copy block to the tmp.
- src = child;
+ src = child;
GenTreePtr dstAddr = fgGetStructAsStructPtr(dst);
GenTreePtr srcAddr = fgGetStructAsStructPtr(src);
- newInlinee = gtNewCpObjNode(dstAddr, srcAddr, retClsHnd, false);
+ newInlinee = gtNewCpObjNode(dstAddr, srcAddr, retClsHnd, false);
}
GenTreePtr production = gtNewLclvNode(tmpNum, structType);
@@ -21197,15 +21291,17 @@ void Compiler::fgAttachStructInlineeToAsg(GenTreePtr tree, GenTreePtr child, COR
assert(tree->gtOper == GT_ASG);
// We have an assignment, we codegen only V05 = call().
- if (child->gtOper == GT_CALL && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
+ // However, if it is a multireg return on x64/ux we want to assign it to a temp.
+ if (child->gtOper == GT_CALL && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR && !child->AsCall()->HasMultiRegRetVal())
{
return;
}
GenTreePtr dstAddr = fgGetStructAsStructPtr(tree->gtOp.gtOp1);
- GenTreePtr srcAddr = fgGetStructAsStructPtr((child->gtOper == GT_CALL)
- ? fgAssignStructInlineeToVar(child, retClsHnd) // Assign to a variable if it is a call.
- : child); // Just get the address, if not a call.
+ GenTreePtr srcAddr = fgGetStructAsStructPtr(
+ (child->gtOper == GT_CALL)
+ ? fgAssignStructInlineeToVar(child, retClsHnd) // Assign to a variable if it is a call.
+ : child); // Just get the address, if not a call.
tree->CopyFrom(gtNewCpObjNode(dstAddr, srcAddr, retClsHnd, false), this);
}
@@ -21217,16 +21313,15 @@ void Compiler::fgAttachStructInlineeToAsg(GenTreePtr tree, GenTreePtr child, COR
*/
/* static */
-Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(GenTreePtr* pTree,
- fgWalkData* data)
+Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(GenTreePtr* pTree, fgWalkData* data)
{
- GenTreePtr tree = *pTree;
- Compiler* comp = data->compiler;
+ GenTreePtr tree = *pTree;
+ Compiler* comp = data->compiler;
CORINFO_CLASS_HANDLE retClsHnd = NO_CLASS_HANDLE;
if (tree->gtOper == GT_RET_EXPR)
{
- // We are going to copy the tree from the inlinee,
+ // We are going to copy the tree from the inlinee,
// so record the handle now.
//
if (varTypeIsStruct(tree))
@@ -21242,7 +21337,7 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(
#ifdef DEBUG
if (comp->verbose)
{
- printf("\nReplacing the return expression placeholder ");
+ printf("\nReplacing the return expression placeholder ");
printTreeID(tree);
printf(" with ");
printTreeID(inlineCandidate);
@@ -21252,7 +21347,7 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(
}
#endif // DEBUG
- tree->CopyFrom(inlineCandidate, comp);
+ tree->CopyFrom(inlineCandidate, comp);
#ifdef DEBUG
if (comp->verbose)
@@ -21262,8 +21357,7 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(
printf("\n");
}
#endif // DEBUG
- }
- while (tree->gtOper == GT_RET_EXPR);
+ } while (tree->gtOper == GT_RET_EXPR);
}
#if FEATURE_MULTIREG_RET
@@ -21305,15 +21399,12 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(
if ((tree->gtOper == GT_ASG) && (tree->gtOp.gtOp2->gtOper == GT_COMMA))
{
GenTreePtr comma;
- for (comma = tree->gtOp.gtOp2;
- comma->gtOper == GT_COMMA;
- comma = comma->gtOp.gtOp2)
+ for (comma = tree->gtOp.gtOp2; comma->gtOper == GT_COMMA; comma = comma->gtOp.gtOp2)
{
// empty
}
- noway_assert(!varTypeIsStruct(comma) ||
- comma->gtOper != GT_RET_EXPR ||
+ noway_assert(!varTypeIsStruct(comma) || comma->gtOper != GT_RET_EXPR ||
!comp->IsMultiRegReturnedType(comma->gtRetExpr.gtRetClsHnd));
}
@@ -21330,8 +21421,7 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(
*/
/* static */
-Compiler::fgWalkResult Compiler::fgDebugCheckInlineCandidates(GenTreePtr* pTree,
- fgWalkData* data)
+Compiler::fgWalkResult Compiler::fgDebugCheckInlineCandidates(GenTreePtr* pTree, fgWalkData* data)
{
GenTreePtr tree = *pTree;
if (tree->gtOper == GT_CALL)
@@ -21348,9 +21438,7 @@ Compiler::fgWalkResult Compiler::fgDebugCheckInlineCandidates(GenTreePtr* p
#endif // DEBUG
-
-void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call,
- InlineResult* inlineResult)
+void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* inlineResult)
{
noway_assert(call->gtOper == GT_CALL);
noway_assert((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0);
@@ -21393,92 +21481,95 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call,
// Set the trap to catch all errors (including recoverable ones from the EE)
struct Param
{
- Compiler* pThis;
- GenTree* call;
+ Compiler* pThis;
+ GenTree* call;
CORINFO_METHOD_HANDLE fncHandle;
- InlineCandidateInfo* inlineCandidateInfo;
- InlineInfo* inlineInfo;
+ InlineCandidateInfo* inlineCandidateInfo;
+ InlineInfo* inlineInfo;
} param = {nullptr};
- param.pThis = this;
- param.call = call;
- param.fncHandle = fncHandle;
+ param.pThis = this;
+ param.call = call;
+ param.fncHandle = fncHandle;
param.inlineCandidateInfo = inlineCandidateInfo;
- param.inlineInfo = &inlineInfo;
- bool success = eeRunWithErrorTrap<Param>([](Param* pParam)
- {
- // Init the local var info of the inlinee
- pParam->pThis->impInlineInitVars(pParam->inlineInfo);
+ param.inlineInfo = &inlineInfo;
+ bool success = eeRunWithErrorTrap<Param>(
+ [](Param* pParam) {
+ // Init the local var info of the inlinee
+ pParam->pThis->impInlineInitVars(pParam->inlineInfo);
- if (pParam->inlineInfo->inlineResult->IsCandidate())
- {
- /* Clear the temp table */
- memset(pParam->inlineInfo->lclTmpNum, -1, sizeof(pParam->inlineInfo->lclTmpNum));
+ if (pParam->inlineInfo->inlineResult->IsCandidate())
+ {
+ /* Clear the temp table */
+ memset(pParam->inlineInfo->lclTmpNum, -1, sizeof(pParam->inlineInfo->lclTmpNum));
- //
- // Prepare the call to jitNativeCode
- //
+ //
+ // Prepare the call to jitNativeCode
+ //
- pParam->inlineInfo->InlinerCompiler = pParam->pThis;
- if (pParam->pThis->impInlineInfo == nullptr)
- {
- pParam->inlineInfo->InlineRoot = pParam->pThis;
- }
- else
- {
- pParam->inlineInfo->InlineRoot = pParam->pThis->impInlineInfo->InlineRoot;
- }
- pParam->inlineInfo->argCnt = pParam->inlineCandidateInfo->methInfo.args.totalILArgs();
- pParam->inlineInfo->tokenLookupContextHandle = pParam->inlineCandidateInfo->exactContextHnd;
+ pParam->inlineInfo->InlinerCompiler = pParam->pThis;
+ if (pParam->pThis->impInlineInfo == nullptr)
+ {
+ pParam->inlineInfo->InlineRoot = pParam->pThis;
+ }
+ else
+ {
+ pParam->inlineInfo->InlineRoot = pParam->pThis->impInlineInfo->InlineRoot;
+ }
+ pParam->inlineInfo->argCnt = pParam->inlineCandidateInfo->methInfo.args.totalILArgs();
+ pParam->inlineInfo->tokenLookupContextHandle = pParam->inlineCandidateInfo->exactContextHnd;
- JITLOG_THIS(pParam->pThis,
- (LL_INFO100000,
- "INLINER: inlineInfo.tokenLookupContextHandle for %s set to 0x%p:\n",
- pParam->pThis->eeGetMethodFullName(pParam->fncHandle),
- pParam->pThis->dspPtr(pParam->inlineInfo->tokenLookupContextHandle)));
+ JITLOG_THIS(pParam->pThis,
+ (LL_INFO100000, "INLINER: inlineInfo.tokenLookupContextHandle for %s set to 0x%p:\n",
+ pParam->pThis->eeGetMethodFullName(pParam->fncHandle),
+ pParam->pThis->dspPtr(pParam->inlineInfo->tokenLookupContextHandle)));
- CORJIT_FLAGS compileFlagsForInlinee;
- memcpy(&compileFlagsForInlinee, pParam->pThis->opts.jitFlags, sizeof(compileFlagsForInlinee));
- compileFlagsForInlinee.corJitFlags &= ~CORJIT_FLG_LOST_WHEN_INLINING;
- compileFlagsForInlinee.corJitFlags |= CORJIT_FLG_SKIP_VERIFICATION;
+ JitFlags compileFlagsForInlinee = *pParam->pThis->opts.jitFlags;
+
+ // The following flags are lost when inlining.
+ // (This is checked in Compiler::compInitOptions().)
+ compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_BBOPT);
+ compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_BBINSTR);
+ compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_PROF_ENTERLEAVE);
+ compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_DEBUG_EnC);
+ compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_DEBUG_INFO);
+
+ compileFlagsForInlinee.Set(JitFlags::JIT_FLAG_SKIP_VERIFICATION);
#ifdef DEBUG
- if (pParam->pThis->verbose)
- {
- printf("\nInvoking compiler for the inlinee method %s :\n",
- pParam->pThis->eeGetMethodFullName(pParam->fncHandle));
- }
+ if (pParam->pThis->verbose)
+ {
+ printf("\nInvoking compiler for the inlinee method %s :\n",
+ pParam->pThis->eeGetMethodFullName(pParam->fncHandle));
+ }
#endif // DEBUG
- int result = jitNativeCode(pParam->fncHandle,
- pParam->inlineCandidateInfo->methInfo.scope,
- pParam->pThis->info.compCompHnd,
- &pParam->inlineCandidateInfo->methInfo,
- (void**)pParam->inlineInfo,
- nullptr,
- &compileFlagsForInlinee,
- pParam->inlineInfo);
-
- if (result != CORJIT_OK)
- {
- // If we haven't yet determined why this inline fails, use
- // a catch-all something bad happened observation.
- InlineResult* innerInlineResult = pParam->inlineInfo->inlineResult;
+ int result =
+ jitNativeCode(pParam->fncHandle, pParam->inlineCandidateInfo->methInfo.scope,
+ pParam->pThis->info.compCompHnd, &pParam->inlineCandidateInfo->methInfo,
+ (void**)pParam->inlineInfo, nullptr, &compileFlagsForInlinee, pParam->inlineInfo);
- if (!innerInlineResult->IsFailure())
+ if (result != CORJIT_OK)
{
- innerInlineResult->NoteFatal(InlineObservation::CALLSITE_COMPILATION_FAILURE);
+ // If we haven't yet determined why this inline fails, use
+ // a catch-all something bad happened observation.
+ InlineResult* innerInlineResult = pParam->inlineInfo->inlineResult;
+
+ if (!innerInlineResult->IsFailure())
+ {
+ innerInlineResult->NoteFatal(InlineObservation::CALLSITE_COMPILATION_FAILURE);
+ }
}
}
- }
- }, &param);
+ },
+ &param);
if (!success)
{
#ifdef DEBUG
if (verbose)
{
- printf("\nInlining failed due to an exception during invoking the compiler for the inlinee method %s.\n",
- eeGetMethodFullName(fncHandle));
+ printf("\nInlining failed due to an exception during invoking the compiler for the inlinee method %s.\n",
+ eeGetMethodFullName(fncHandle));
}
#endif // DEBUG
@@ -21498,8 +21589,7 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call,
#ifdef DEBUG
if (0 && verbose)
{
- printf("\nDone invoking compiler for the inlinee method %s\n",
- eeGetMethodFullName(fncHandle));
+ printf("\nDone invoking compiler for the inlinee method %s\n", eeGetMethodFullName(fncHandle));
}
#endif // DEBUG
@@ -21514,7 +21604,7 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call,
if (verbose)
{
printf("\nInlining failed because pInlineInfo->retExpr is not set in the inlinee method %s.\n",
- eeGetMethodFullName(fncHandle));
+ eeGetMethodFullName(fncHandle));
}
#endif // DEBUG
inlineResult->NoteFatal(InlineObservation::CALLEE_LACKS_RETURN);
@@ -21526,7 +21616,8 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call,
// we defer the call to initClass() until inlining is completed in case it fails. If inlining succeeds,
// we will call initClass().
if (!(info.compCompHnd->initClass(nullptr /* field */, fncHandle /* method */,
- inlineCandidateInfo->exactContextHnd /* context */) & CORINFO_INITCLASS_INITIALIZED))
+ inlineCandidateInfo->exactContextHnd /* context */) &
+ CORINFO_INITCLASS_INITIALIZED))
{
inlineResult->NoteFatal(InlineObservation::CALLEE_CLASS_INIT_FAILURE);
return;
@@ -21545,11 +21636,8 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call,
if (verbose || fgPrintInlinedMethods)
{
- printf("Successfully inlined %s (%d IL bytes) (depth %d) [%s]\n",
- eeGetMethodFullName(fncHandle),
- inlineCandidateInfo->methInfo.ILCodeSize,
- inlineDepth,
- inlineResult->ReasonString());
+ printf("Successfully inlined %s (%d IL bytes) (depth %d) [%s]\n", eeGetMethodFullName(fncHandle),
+ inlineCandidateInfo->methInfo.ILCodeSize, inlineDepth, inlineResult->ReasonString());
}
if (verbose)
@@ -21566,20 +21654,39 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call,
inlineResult->NoteSuccess();
}
-// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-// The inlining attempt cannot be failed starting from this point.
-// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+//------------------------------------------------------------------------
+// fgInsertInlineeBlocks: incorporate statements for an inline into the
+// root method.
+//
+// Arguments:
+// inlineInfo -- info for the inline
+//
+// Notes:
+// The inlining attempt cannot be failed once this method is called.
+//
+// Adds all inlinee statements, plus any glue statements needed
+// either before or after the inlined call.
+//
+// Updates flow graph and assigns weights to inlinee
+// blocks. Currently does not attempt to read IBC data for the
+// inlinee.
+//
+// Updates relevant root method status flags (eg optMethodFlags) to
+// include information from the inlinee.
+//
+// Marks newly added statements with an appropriate inline context.
+
void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
{
- GenTreePtr iciCall = pInlineInfo->iciCall;
- GenTreePtr iciStmt = pInlineInfo->iciStmt;
- BasicBlock* iciBlock = pInlineInfo->iciBlock;
+ GenTreeCall* iciCall = pInlineInfo->iciCall;
+ GenTreeStmt* iciStmt = pInlineInfo->iciStmt;
+ BasicBlock* iciBlock = pInlineInfo->iciBlock;
BasicBlock* block;
// We can write better assert here. For example, we can check that
// iciBlock contains iciStmt, which in turn contains iciCall.
noway_assert(iciBlock->bbTreeList != nullptr);
- noway_assert(iciStmt->gtStmt.gtStmtExpr != nullptr);
+ noway_assert(iciStmt->gtStmtExpr != nullptr);
noway_assert(iciCall->gtOper == GT_CALL);
#ifdef DEBUG
@@ -21591,33 +21698,23 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
printf("\n\n----------- Statements (and blocks) added due to the inlining of call ");
printTreeID(iciCall);
printf(" -----------\n");
- // gtDispTree(iciStmt);
}
#endif // DEBUG
- //
// Create a new inline context and mark the inlined statements with it
- //
InlineContext* calleeContext = m_inlineStrategy->NewSuccess(pInlineInfo);
- for (block = InlineeCompiler->fgFirstBB;
- block != nullptr;
- block = block->bbNext)
+ for (block = InlineeCompiler->fgFirstBB; block != nullptr; block = block->bbNext)
{
- for (GenTreeStmt* stmt = block->firstStmt();
- stmt;
- stmt = stmt->gtNextStmt)
+ for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
{
stmt->gtInlineContext = calleeContext;
}
}
- //
- // Prepend statements.
- //
- GenTreePtr stmtAfter;
- stmtAfter = fgInlinePrependStatements(pInlineInfo);
+ // Prepend statements
+ GenTreePtr stmtAfter = fgInlinePrependStatements(pInlineInfo);
#ifdef DEBUG
if (verbose)
@@ -21627,6 +21724,9 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
}
#endif // DEBUG
+ BasicBlock* topBlock = iciBlock;
+ BasicBlock* bottomBlock = nullptr;
+
if (InlineeCompiler->fgBBcount == 1)
{
// When fgBBCount is 1 we will always have a non-NULL fgFirstBB
@@ -21641,22 +21741,21 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
// Inlinee contains just one BB. So just insert its statement list to topBlock.
if (InlineeCompiler->fgFirstBB->bbTreeList)
{
- stmtAfter = fgInsertStmtListAfter(iciBlock,
- stmtAfter,
- InlineeCompiler->fgFirstBB->bbTreeList);
+ stmtAfter = fgInsertStmtListAfter(iciBlock, stmtAfter, InlineeCompiler->fgFirstBB->bbTreeList);
// Copy inlinee bbFlags to caller bbFlags.
- const unsigned int inlineeBlockFlags = InlineeCompiler->fgFirstBB->bbFlags;
+ const unsigned __int64 inlineeBlockFlags = InlineeCompiler->fgFirstBB->bbFlags;
noway_assert((inlineeBlockFlags & BBF_HAS_JMP) == 0);
noway_assert((inlineeBlockFlags & BBF_KEEP_BBJ_ALWAYS) == 0);
iciBlock->bbFlags |= inlineeBlockFlags;
}
+
#ifdef DEBUG
if (verbose)
{
noway_assert(currentDumpStmt);
- if (currentDumpStmt != stmtAfter)
+ if (currentDumpStmt != stmtAfter)
{
do
{
@@ -21669,10 +21768,14 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
gtDispTree(currentDumpStmt);
printf("\n");
- } while (currentDumpStmt != stmtAfter);
+ } while (currentDumpStmt != stmtAfter);
}
}
#endif // DEBUG
+
+ // Append statements to unpin, if necessary.
+ fgInlineAppendStatements(pInlineInfo, iciBlock, stmtAfter);
+
goto _Done;
}
}
@@ -21681,24 +21784,20 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
// ======= Inserting inlinee's basic blocks ===============
//
- BasicBlock* topBlock;
- BasicBlock* bottomBlock;
-
- topBlock = iciBlock;
-
- bottomBlock = fgNewBBafter(topBlock->bbJumpKind, topBlock, true);
- bottomBlock->bbRefs = 1;
+ bottomBlock = fgNewBBafter(topBlock->bbJumpKind, topBlock, true);
+ bottomBlock->bbRefs = 1;
bottomBlock->bbJumpDest = topBlock->bbJumpDest;
bottomBlock->inheritWeight(topBlock);
topBlock->bbJumpKind = BBJ_NONE;
// Update block flags
- unsigned originalFlags;
- originalFlags = topBlock->bbFlags;
- noway_assert((originalFlags & BBF_SPLIT_NONEXIST) == 0);
- topBlock->bbFlags &= ~(BBF_SPLIT_LOST);
- bottomBlock->bbFlags |= originalFlags & BBF_SPLIT_GAINED;
+ {
+ const unsigned __int64 originalFlags = topBlock->bbFlags;
+ noway_assert((originalFlags & BBF_SPLIT_NONEXIST) == 0);
+ topBlock->bbFlags &= ~(BBF_SPLIT_LOST);
+ bottomBlock->bbFlags |= originalFlags & BBF_SPLIT_GAINED;
+ }
//
// Split statements between topBlock and bottomBlock
@@ -21708,10 +21807,10 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
GenTreePtr bottomBlock_Begin;
GenTreePtr bottomBlock_End;
- topBlock_Begin = nullptr;
- topBlock_End = nullptr;
+ topBlock_Begin = nullptr;
+ topBlock_End = nullptr;
bottomBlock_Begin = nullptr;
- bottomBlock_End = nullptr;
+ bottomBlock_End = nullptr;
//
// First figure out bottomBlock_Begin
@@ -21724,7 +21823,7 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
// topBlock is empty before the split.
// In this case, both topBlock and bottomBlock should be empty
noway_assert(bottomBlock_Begin == nullptr);
- topBlock->bbTreeList = nullptr;
+ topBlock->bbTreeList = nullptr;
bottomBlock->bbTreeList = nullptr;
}
else if (topBlock->bbTreeList == bottomBlock_Begin)
@@ -21735,7 +21834,7 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
// And the split is before the first statement.
// In this case, topBlock should be empty, and everything else should be moved to the bottonBlock.
bottomBlock->bbTreeList = topBlock->bbTreeList;
- topBlock->bbTreeList = nullptr;
+ topBlock->bbTreeList = nullptr;
}
else if (bottomBlock_Begin == nullptr)
{
@@ -21753,9 +21852,9 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
noway_assert(bottomBlock_Begin);
// This is the normal case where both blocks should contain at least one statement.
- topBlock_Begin = topBlock->bbTreeList;
+ topBlock_Begin = topBlock->bbTreeList;
noway_assert(topBlock_Begin);
- topBlock_End = bottomBlock_Begin->gtPrev;
+ topBlock_End = bottomBlock_Begin->gtPrev;
noway_assert(topBlock_End);
bottomBlock_End = topBlock->lastStmt();
noway_assert(bottomBlock_End);
@@ -21778,25 +21877,23 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
bool inheritWeight;
inheritWeight = true; // The firstBB does inherit the weight from the iciBlock
- for (block = InlineeCompiler->fgFirstBB;
- block != nullptr;
- block = block->bbNext)
+ for (block = InlineeCompiler->fgFirstBB; block != nullptr; block = block->bbNext)
{
noway_assert(!block->hasTryIndex());
noway_assert(!block->hasHndIndex());
block->copyEHRegion(iciBlock);
- block->bbFlags |= iciBlock->bbFlags & BBF_BACKWARD_JUMP;
+ block->bbFlags |= iciBlock->bbFlags & BBF_BACKWARD_JUMP;
- if (iciStmt->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
+ if (iciStmt->gtStmtILoffsx != BAD_IL_OFFSET)
{
- block->bbCodeOffs = jitGetILoffs(iciStmt->gtStmt.gtStmtILoffsx);
- block->bbCodeOffsEnd = block->bbCodeOffs + 1; // TODO: is code size of 1 some magic number for inlining?
+ block->bbCodeOffs = jitGetILoffs(iciStmt->gtStmtILoffsx);
+ block->bbCodeOffsEnd = block->bbCodeOffs + 1; // TODO: is code size of 1 some magic number for inlining?
}
else
{
- block->bbCodeOffs = 0; // TODO: why not BAD_IL_OFFSET?
- block->bbCodeOffsEnd = 0;
- block->bbFlags |= BBF_INTERNAL;
+ block->bbCodeOffs = 0; // TODO: why not BAD_IL_OFFSET?
+ block->bbCodeOffsEnd = 0;
+ block->bbFlags |= BBF_INTERNAL;
}
if (block->bbJumpKind == BBJ_RETURN)
@@ -21810,8 +21907,8 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
#ifdef DEBUG
if (verbose)
{
- printf("\nConvert bbJumpKind of BB%02u to BBJ_ALWAYS to bottomBlock BB%02u\n",
- block->bbNum, bottomBlock->bbNum);
+ printf("\nConvert bbJumpKind of BB%02u to BBJ_ALWAYS to bottomBlock BB%02u\n", block->bbNum,
+ bottomBlock->bbNum);
}
#endif // DEBUG
}
@@ -21846,6 +21943,9 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
//
fgBBcount += InlineeCompiler->fgBBcount;
+ // Append statements to unpin if necessary.
+ fgInlineAppendStatements(pInlineInfo, bottomBlock, nullptr);
+
#ifdef DEBUG
if (verbose)
{
@@ -21862,15 +21962,18 @@ _Done:
//
// Copy out some flags
//
- compLongUsed |= InlineeCompiler->compLongUsed;
- compFloatingPointUsed |= InlineeCompiler->compFloatingPointUsed;
- compLocallocUsed |= InlineeCompiler->compLocallocUsed;
- compQmarkUsed |= InlineeCompiler->compQmarkUsed;
- compUnsafeCastUsed |= InlineeCompiler->compUnsafeCastUsed;
+ compLongUsed |= InlineeCompiler->compLongUsed;
+ compFloatingPointUsed |= InlineeCompiler->compFloatingPointUsed;
+ compLocallocUsed |= InlineeCompiler->compLocallocUsed;
+ compQmarkUsed |= InlineeCompiler->compQmarkUsed;
+ compUnsafeCastUsed |= InlineeCompiler->compUnsafeCastUsed;
compNeedsGSSecurityCookie |= InlineeCompiler->compNeedsGSSecurityCookie;
- compGSReorderStackLayout |= InlineeCompiler->compGSReorderStackLayout;
+ compGSReorderStackLayout |= InlineeCompiler->compGSReorderStackLayout;
+
+ // Update unmanaged call count
+ info.compCallUnmanaged += InlineeCompiler->info.compCallUnmanaged;
- // Update optMethodFlags
+// Update optMethodFlags
#ifdef DEBUG
unsigned optMethodFlagsBefore = optMethodFlags;
@@ -21881,8 +21984,8 @@ _Done:
#ifdef DEBUG
if (optMethodFlags != optMethodFlagsBefore)
{
- JITDUMP("INLINER: Updating optMethodFlags -- root:%0x callee:%0x new:%0x\n",
- optMethodFlagsBefore, InlineeCompiler->optMethodFlags, optMethodFlags);
+ JITDUMP("INLINER: Updating optMethodFlags -- root:%0x callee:%0x new:%0x\n", optMethodFlagsBefore,
+ InlineeCompiler->optMethodFlags, optMethodFlags);
}
#endif
@@ -21908,24 +22011,41 @@ _Done:
// Detach the GT_CALL node from the original statement by hanging a "nothing" node under it,
// so that fgMorphStmts can remove the statement once we return from here.
//
- iciStmt->gtStmt.gtStmtExpr = gtNewNothingNode();
+ iciStmt->gtStmtExpr = gtNewNothingNode();
}
-// Prepend the statements that are needed before the inlined call.
-// Return the last statement that is prepended.
+//------------------------------------------------------------------------
+// fgInlinePrependStatements: prepend statements needed to match up
+// caller and inlined callee
+//
+// Arguments:
+// inlineInfo -- info for the inline
+//
+// Return Value:
+// The last statement that was added, or the original call if no
+// statements were added.
+//
+// Notes:
+// Statements prepended may include the following:
+// * This pointer null check
+// * Class initialization
+// * Zeroing of must-init locals in the callee
+// * Passing of call arguments via temps
+//
+// Newly added statements are placed just after the original call
+// and are are given the same inline context as the call any calls
+// added here will appear to have been part of the immediate caller.
-GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
+GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
{
- BasicBlock* block = inlineInfo->iciBlock;
+ BasicBlock* block = inlineInfo->iciBlock;
+ GenTreeStmt* callStmt = inlineInfo->iciStmt;
+ IL_OFFSETX callILOffset = callStmt->gtStmtILoffsx;
+ GenTreeStmt* postStmt = callStmt->gtNextStmt;
+ GenTreePtr afterStmt = callStmt; // afterStmt is the place where the new statements should be inserted after.
+ GenTreePtr newStmt = nullptr;
+ GenTreePtr call = inlineInfo->iciCall;
- GenTreePtr callStmt = inlineInfo->iciStmt;
- noway_assert(callStmt->gtOper == GT_STMT);
- IL_OFFSETX callILOffset = callStmt->gtStmt.gtStmtILoffsx;
-
- GenTreePtr afterStmt = callStmt; // afterStmt is the place where the new statements should be inserted after.
- GenTreePtr newStmt;
-
- GenTreePtr call = inlineInfo->iciCall;
noway_assert(call->gtOper == GT_CALL);
#ifdef DEBUG
@@ -21939,12 +22059,13 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
// Prepend statements for any initialization / side effects
- InlArgInfo* inlArgInfo = inlineInfo->inlArgInfo;
- InlLclVarInfo* lclVarInfo = inlineInfo->lclVarInfo;
+ InlArgInfo* inlArgInfo = inlineInfo->inlArgInfo;
+ InlLclVarInfo* lclVarInfo = inlineInfo->lclVarInfo;
GenTreePtr tree;
- // Create the null check statement (but not appending it to the statement list yet) for the 'this' pointer if necessary.
+ // Create the null check statement (but not appending it to the statement list yet) for the 'this' pointer if
+ // necessary.
// The NULL check should be done after "argument setup statements".
// The only reason we move it here is for calling "impInlineFetchArg(0,..." to reserve a temp
// for the "this" pointer.
@@ -21956,8 +22077,7 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
if (call->gtFlags & GTF_CALL_NULLCHECK && !inlineInfo->thisDereferencedFirst)
{
// Call impInlineFetchArg to "reserve" a temp for the "this" pointer.
- nullcheck = gtNewOperNode(GT_IND, TYP_INT,
- impInlineFetchArg(0, inlArgInfo, lclVarInfo));
+ nullcheck = gtNewOperNode(GT_IND, TYP_INT, impInlineFetchArg(0, inlArgInfo, lclVarInfo));
nullcheck->gtFlags |= GTF_EXCEPT;
// The NULL-check statement will be inserted to the statement list after those statements
@@ -21995,10 +22115,8 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
GenTreePtr argSingleUseNode = inlArgInfo[argNum].argBashTmpNode;
- if (argSingleUseNode &&
- !(argSingleUseNode->gtFlags & GTF_VAR_CLONED) &&
- !inlArgInfo[argNum].argHasLdargaOp &&
- !inlArgInfo[argNum].argHasStargOp)
+ if (argSingleUseNode && !(argSingleUseNode->gtFlags & GTF_VAR_CLONED) &&
+ !inlArgInfo[argNum].argHasLdargaOp && !inlArgInfo[argNum].argHasStargOp)
{
// Change the temp in-place to the actual argument.
// We currently do not support this for struct arguments, so it must not be a GT_OBJ.
@@ -22019,15 +22137,12 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
noway_assert(structHnd != NO_CLASS_HANDLE);
}
- // Unsafe value cls check is not needed for argTmpNum here since in-linee compiler instance would have
- // iterated over these and marked them accordingly.
- impAssignTempGen(inlArgInfo[argNum].argTmpNum,
- inlArgInfo[argNum].argNode,
- structHnd,
- (unsigned)CHECK_SPILL_NONE,
- & afterStmt,
- callILOffset,
- block);
+ // Unsafe value cls check is not needed for
+ // argTmpNum here since in-linee compiler instance
+ // would have iterated over these and marked them
+ // accordingly.
+ impAssignTempGen(inlArgInfo[argNum].argTmpNum, inlArgInfo[argNum].argNode, structHnd,
+ (unsigned)CHECK_SPILL_NONE, &afterStmt, callILOffset, block);
#ifdef DEBUG
if (verbose)
@@ -22035,7 +22150,6 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
gtDispTree(afterStmt);
}
#endif // DEBUG
-
}
}
else if (inlArgInfo[argNum].argIsByRefToStructLocal)
@@ -22046,19 +22160,18 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
{
/* The argument is either not used or a const or lcl var */
- noway_assert(!inlArgInfo[argNum].argIsUsed ||
- inlArgInfo[argNum].argIsInvariant ||
- inlArgInfo[argNum].argIsLclVar );
+ noway_assert(!inlArgInfo[argNum].argIsUsed || inlArgInfo[argNum].argIsInvariant ||
+ inlArgInfo[argNum].argIsLclVar);
/* Make sure we didnt change argNode's along the way, or else
subsequent uses of the arg would have worked with the bashed value */
if (inlArgInfo[argNum].argIsInvariant)
{
- assert(inlArgInfo[argNum].argNode->OperIsConst() ||
- inlArgInfo[argNum].argNode->gtOper == GT_ADDR);
+ assert(inlArgInfo[argNum].argNode->OperIsConst() || inlArgInfo[argNum].argNode->gtOper == GT_ADDR);
}
noway_assert((inlArgInfo[argNum].argIsLclVar == 0) ==
- (inlArgInfo[argNum].argNode->gtOper != GT_LCL_VAR || (inlArgInfo[argNum].argNode->gtFlags & GTF_GLOB_REF)));
+ (inlArgInfo[argNum].argNode->gtOper != GT_LCL_VAR ||
+ (inlArgInfo[argNum].argNode->gtFlags & GTF_GLOB_REF)));
/* If the argument has side effects, append it */
@@ -22086,7 +22199,6 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
gtDispTree(afterStmt);
}
#endif // DEBUG
-
}
}
}
@@ -22101,7 +22213,7 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
if (inlineInfo->inlineCandidateInfo->initClassResult & CORINFO_INITCLASS_USE_HELPER)
{
CORINFO_CONTEXT_HANDLE exactContext = inlineInfo->inlineCandidateInfo->exactContextHnd;
- CORINFO_CLASS_HANDLE exactClass;
+ CORINFO_CLASS_HANDLE exactClass;
if (((SIZE_T)exactContext & CORINFO_CONTEXTFLAGS_MASK) == CORINFO_CONTEXTFLAGS_CLASS)
{
@@ -22109,18 +22221,19 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
}
else
{
- exactClass = info.compCompHnd->getMethodClass(CORINFO_METHOD_HANDLE((SIZE_T)exactContext & ~CORINFO_CONTEXTFLAGS_MASK));
+ exactClass = info.compCompHnd->getMethodClass(
+ CORINFO_METHOD_HANDLE((SIZE_T)exactContext & ~CORINFO_CONTEXTFLAGS_MASK));
}
- tree = fgGetSharedCCtor(exactClass);
- newStmt = gtNewStmt(tree, callILOffset);
+ tree = fgGetSharedCCtor(exactClass);
+ newStmt = gtNewStmt(tree, callILOffset);
afterStmt = fgInsertStmtAfter(block, afterStmt, newStmt);
}
// Insert the nullcheck statement now.
if (nullcheck)
{
- newStmt = gtNewStmt(nullcheck, callILOffset);
+ newStmt = gtNewStmt(nullcheck, callILOffset);
afterStmt = fgInsertStmtAfter(block, afterStmt, newStmt);
}
@@ -22133,8 +22246,7 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
unsigned lclCnt = InlineeMethodInfo->locals.numArgs;
// Does callee contain any zero-init local?
- if ((lclCnt != 0) &&
- (InlineeMethodInfo->options & CORINFO_OPT_INIT_LOCALS) != 0)
+ if ((lclCnt != 0) && (InlineeMethodInfo->options & CORINFO_OPT_INIT_LOCALS) != 0)
{
#ifdef DEBUG
@@ -22146,7 +22258,7 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
for (unsigned lclNum = 0; lclNum < lclCnt; lclNum++)
{
- unsigned tmpNum = inlineInfo->lclTmpNum[lclNum];
+ unsigned tmpNum = inlineInfo->lclTmpNum[lclNum];
// Is the local used at all?
if (tmpNum != BAD_VAR_NUM)
@@ -22158,25 +22270,21 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
{
// Unsafe value cls check is not needed here since in-linee compiler instance would have
// iterated over locals and marked accordingly.
- impAssignTempGen(tmpNum,
- gtNewZeroConNode(genActualType(lclTyp)),
- NO_CLASS_HANDLE,
- (unsigned)CHECK_SPILL_NONE,
- & afterStmt,
- callILOffset,
- block);
+ impAssignTempGen(tmpNum, gtNewZeroConNode(genActualType(lclTyp)), NO_CLASS_HANDLE,
+ (unsigned)CHECK_SPILL_NONE, &afterStmt, callILOffset, block);
}
else
{
- CORINFO_CLASS_HANDLE structType = lclVarInfo[lclNum + inlineInfo->argCnt].lclVerTypeInfo.GetClassHandle();
+ CORINFO_CLASS_HANDLE structType =
+ lclVarInfo[lclNum + inlineInfo->argCnt].lclVerTypeInfo.GetClassHandle();
- tree = gtNewBlkOpNode(gtNewLclvNode(tmpNum, lclTyp), // Dest
- gtNewIconNode(0), // Value
+ tree = gtNewBlkOpNode(gtNewLclvNode(tmpNum, lclTyp), // Dest
+ gtNewIconNode(0), // Value
info.compCompHnd->getClassSize(structType), // Size
- false, // isVolatile
- false); // not copyBlock
+ false, // isVolatile
+ false); // not copyBlock
- newStmt = gtNewStmt(tree, callILOffset);
+ newStmt = gtNewStmt(tree, callILOffset);
afterStmt = fgInsertStmtAfter(block, afterStmt, newStmt);
}
@@ -22190,14 +22298,102 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
}
}
+ // Update any newly added statements with the appropriate context.
+ InlineContext* context = callStmt->gtInlineContext;
+ assert(context != nullptr);
+ for (GenTreeStmt* addedStmt = callStmt->gtNextStmt; addedStmt != postStmt; addedStmt = addedStmt->gtNextStmt)
+ {
+ assert(addedStmt->gtInlineContext == nullptr);
+ addedStmt->gtInlineContext = context;
+ }
+
return afterStmt;
}
+//------------------------------------------------------------------------
+// fgInlineAppendStatements: Append statements that are needed
+// after the inlined call.
+//
+// Arguments:
+// inlineInfo - information about the inline
+// block - basic block for the new statements
+// stmtAfter - (optional) insertion point for mid-block cases
+
+void Compiler::fgInlineAppendStatements(InlineInfo* inlineInfo, BasicBlock* block, GenTreePtr stmtAfter)
+{
+ // Null out any inline pinned locals
+ if (!inlineInfo->hasPinnedLocals)
+ {
+ // No pins, nothing to do
+ return;
+ }
+
+ JITDUMP("Unpin inlinee locals:\n");
+
+ GenTreePtr callStmt = inlineInfo->iciStmt;
+ IL_OFFSETX callILOffset = callStmt->gtStmt.gtStmtILoffsx;
+ CORINFO_METHOD_INFO* InlineeMethodInfo = InlineeCompiler->info.compMethodInfo;
+ unsigned lclCnt = InlineeMethodInfo->locals.numArgs;
+ InlLclVarInfo* lclVarInfo = inlineInfo->lclVarInfo;
+
+ noway_assert(callStmt->gtOper == GT_STMT);
+
+ for (unsigned lclNum = 0; lclNum < lclCnt; lclNum++)
+ {
+ unsigned tmpNum = inlineInfo->lclTmpNum[lclNum];
+
+ // Is the local used at all?
+ if (tmpNum == BAD_VAR_NUM)
+ {
+ // Nope, nothing to unpin.
+ continue;
+ }
+
+ // Is the local pinned?
+ if (!lvaTable[tmpNum].lvPinned)
+ {
+ // Nope, nothing to unpin.
+ continue;
+ }
+
+ // Does the local we're about to unpin appear in the return
+ // expression? If so we somehow messed up and didn't properly
+ // spill the return value. See impInlineFetchLocal.
+ GenTreePtr retExpr = inlineInfo->retExpr;
+ if (retExpr != nullptr)
+ {
+ const bool interferesWithReturn = gtHasRef(inlineInfo->retExpr, tmpNum, false);
+ noway_assert(!interferesWithReturn);
+ }
+
+ // Emit the unpin, by assigning null to the local.
+ var_types lclTyp = (var_types)lvaTable[tmpNum].lvType;
+ noway_assert(lclTyp == lclVarInfo[lclNum + inlineInfo->argCnt].lclTypeInfo);
+ noway_assert(!varTypeIsStruct(lclTyp));
+ GenTreePtr unpinExpr = gtNewTempAssign(tmpNum, gtNewZeroConNode(genActualType(lclTyp)));
+ GenTreePtr unpinStmt = gtNewStmt(unpinExpr, callILOffset);
+
+ if (stmtAfter == nullptr)
+ {
+ stmtAfter = fgInsertStmtAtBeg(block, unpinStmt);
+ }
+ else
+ {
+ stmtAfter = fgInsertStmtAfter(block, stmtAfter, unpinStmt);
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ gtDispTree(unpinStmt);
+ }
+#endif // DEBUG
+ }
+}
/*****************************************************************************/
/*static*/
-Compiler::fgWalkResult Compiler::fgChkThrowCB(GenTreePtr* pTree,
- fgWalkData* data)
+Compiler::fgWalkResult Compiler::fgChkThrowCB(GenTreePtr* pTree, fgWalkData* data)
{
GenTreePtr tree = *pTree;
@@ -22210,28 +22406,30 @@ Compiler::fgWalkResult Compiler::fgChkThrowCB(GenTreePtr* pTree,
switch (tree->gtOper)
{
- case GT_MUL:
- case GT_ADD:
- case GT_SUB:
- case GT_ASG_ADD:
- case GT_ASG_SUB:
- case GT_CAST:
- if (tree->gtOverflow()) {
- return Compiler::WALK_ABORT;
-}
- break;
+ case GT_MUL:
+ case GT_ADD:
+ case GT_SUB:
+ case GT_ASG_ADD:
+ case GT_ASG_SUB:
+ case GT_CAST:
+ if (tree->gtOverflow())
+ {
+ return Compiler::WALK_ABORT;
+ }
+ break;
- case GT_INDEX:
- if (tree->gtFlags & GTF_INX_RNGCHK) {
- return Compiler::WALK_ABORT;
-}
- break;
+ case GT_INDEX:
+ if (tree->gtFlags & GTF_INX_RNGCHK)
+ {
+ return Compiler::WALK_ABORT;
+ }
+ break;
- case GT_ARR_BOUNDS_CHECK:
- return Compiler::WALK_ABORT;
+ case GT_ARR_BOUNDS_CHECK:
+ return Compiler::WALK_ABORT;
- default:
- break;
+ default:
+ break;
}
return Compiler::WALK_CONTINUE;
@@ -22239,33 +22437,32 @@ Compiler::fgWalkResult Compiler::fgChkThrowCB(GenTreePtr* pTree,
/*****************************************************************************/
/*static*/
-Compiler::fgWalkResult Compiler::fgChkLocAllocCB(GenTreePtr* pTree,
- fgWalkData* data)
+Compiler::fgWalkResult Compiler::fgChkLocAllocCB(GenTreePtr* pTree, fgWalkData* data)
{
GenTreePtr tree = *pTree;
- if (tree->gtOper == GT_LCLHEAP) {
+ if (tree->gtOper == GT_LCLHEAP)
+ {
return Compiler::WALK_ABORT;
-}
+ }
return Compiler::WALK_CONTINUE;
}
/*****************************************************************************/
/*static*/
-Compiler::fgWalkResult Compiler::fgChkQmarkCB(GenTreePtr* pTree,
- fgWalkData* data)
+Compiler::fgWalkResult Compiler::fgChkQmarkCB(GenTreePtr* pTree, fgWalkData* data)
{
GenTreePtr tree = *pTree;
- if (tree->gtOper == GT_QMARK) {
+ if (tree->gtOper == GT_QMARK)
+ {
return Compiler::WALK_ABORT;
-}
+ }
return Compiler::WALK_CONTINUE;
}
-
void Compiler::fgLclFldAssign(unsigned lclNum)
{
assert(varTypeIsStruct(lvaTable[lclNum].lvType));
diff --git a/src/jit/gcencode.cpp b/src/jit/gcencode.cpp
index f20183b25a..128fc4addb 100644
--- a/src/jit/gcencode.cpp
+++ b/src/jit/gcencode.cpp
@@ -23,6 +23,89 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "gcinfotypes.h"
+ReturnKind GCTypeToReturnKind(CorInfoGCType gcType)
+{
+ switch (gcType)
+ {
+ case TYPE_GC_NONE:
+ return RT_Scalar;
+ case TYPE_GC_REF:
+ return RT_Object;
+ case TYPE_GC_BYREF:
+ return RT_ByRef;
+ default:
+ _ASSERTE(!"TYP_GC_OTHER is unexpected");
+ return RT_Illegal;
+ }
+}
+
+ReturnKind GCInfo::getReturnKind()
+{
+ switch (compiler->info.compRetType)
+ {
+ case TYP_REF:
+ case TYP_ARRAY:
+ return RT_Object;
+ case TYP_BYREF:
+ return RT_ByRef;
+ case TYP_STRUCT:
+ {
+ CORINFO_CLASS_HANDLE structType = compiler->info.compMethodInfo->args.retTypeClass;
+ var_types retType = compiler->getReturnTypeForStruct(structType);
+
+ switch (retType)
+ {
+ case TYP_ARRAY:
+ _ASSERTE(false && "TYP_ARRAY unexpected from getReturnTypeForStruct()");
+ // fall through
+ case TYP_REF:
+ return RT_Object;
+
+ case TYP_BYREF:
+ return RT_ByRef;
+
+ case TYP_STRUCT:
+ if (compiler->IsHfa(structType))
+ {
+#ifdef _TARGET_X86_
+ _ASSERTE(false && "HFAs not expected for X86");
+#endif // _TARGET_X86_
+
+ return RT_Scalar;
+ }
+ else
+ {
+ // Multi-reg return
+ BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
+ compiler->info.compCompHnd->getClassGClayout(structType, gcPtrs);
+
+ ReturnKind first = GCTypeToReturnKind((CorInfoGCType)gcPtrs[0]);
+ ReturnKind second = GCTypeToReturnKind((CorInfoGCType)gcPtrs[1]);
+
+ return GetStructReturnKind(first, second);
+ }
+
+#ifdef _TARGET_X86_
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+ return RT_Float;
+#endif // _TARGET_X86_
+ default:
+ return RT_Scalar;
+ }
+ }
+
+#ifdef _TARGET_X86_
+ case TYP_FLOAT:
+ case TYP_DOUBLE:
+ return RT_Float;
+#endif // _TARGET_X86_
+
+ default:
+ return RT_Scalar;
+ }
+}
+
#ifdef JIT32_GCENCODER
#include "emit.h"
@@ -104,18 +187,21 @@ static void regenLog(unsigned encoding, InfoHdr* header, InfoHdr* state)
fprintf(logFile, "InfoHdr( %2d, %2d, %1d, %1d, %1d,"
" %1d, %1d, %1d, %1d, %1d,"
- " %1d, %1d, %1d, %1d, %1d,"
- " %1d, %2d, %2d, %2d, %2d,"
- " %2d, %2d), \n",
+ " %1d, %1d, %1d, %1d, %1d, %1d,"
+ " %1d, %1d, %1d,"
+ " %1d, %2d, %2d,"
+ " %2d, %2d, %2d, %2d, %2d, %2d), \n",
state->prologSize, state->epilogSize, state->epilogCount, state->epilogAtEnd, state->ediSaved,
state->esiSaved, state->ebxSaved, state->ebpSaved, state->ebpFrame, state->interruptible,
state->doubleAlign, state->security, state->handlers, state->localloc, state->editNcontinue, state->varargs,
- state->profCallbacks, state->argCount, state->frameSize,
+ state->profCallbacks, state->genericsContext, state->genericsContextIsMethodDesc, state->returnKind,
+ state->argCount, state->frameSize,
(state->untrackedCnt <= SET_UNTRACKED_MAX) ? state->untrackedCnt : HAS_UNTRACKED,
(state->varPtrTableSize == 0) ? 0 : HAS_VARPTR,
(state->gsCookieOffset == INVALID_GS_COOKIE_OFFSET) ? 0 : HAS_GS_COOKIE_OFFSET,
(state->syncStartOffset == INVALID_SYNC_OFFSET) ? 0 : HAS_SYNC_OFFSET,
- (state->syncStartOffset == INVALID_SYNC_OFFSET) ? 0 : HAS_SYNC_OFFSET);
+ (state->syncStartOffset == INVALID_SYNC_OFFSET) ? 0 : HAS_SYNC_OFFSET,
+ (state->revPInvokeOffset == INVALID_REV_PINVOKE_OFFSET) ? 0 : HAS_REV_PINVOKE_FRAME_OFFSET);
fflush(logFile);
@@ -265,9 +351,11 @@ static int bigEncoding4(unsigned cur, unsigned tgt, unsigned max)
return cnt;
}
-BYTE FASTCALL encodeHeaderNext(const InfoHdr& header, InfoHdr* state)
+BYTE FASTCALL encodeHeaderNext(const InfoHdr& header, InfoHdr* state, BYTE& codeSet)
{
BYTE encoding = 0xff;
+ codeSet = 1; // codeSet is 1 or 2, depending on whether the returned encoding
+ // corresponds to InfoHdrAdjust, or InfoHdrAdjust2 enumerations.
if (state->argCount != header.argCount)
{
@@ -547,6 +635,15 @@ BYTE FASTCALL encodeHeaderNext(const InfoHdr& header, InfoHdr* state)
goto DO_RETURN;
}
+ if (GCInfoEncodesReturnKind() && (state->returnKind != header.returnKind))
+ {
+ state->returnKind = header.returnKind;
+ codeSet = 2; // Two byte encoding
+ encoding = header.returnKind;
+ _ASSERTE(encoding < SET_RET_KIND_MAX);
+ goto DO_RETURN;
+ }
+
if (state->gsCookieOffset != header.gsCookieOffset)
{
assert(state->gsCookieOffset == INVALID_GS_COOKIE_OFFSET || state->gsCookieOffset == HAS_GS_COOKIE_OFFSET);
@@ -587,10 +684,31 @@ BYTE FASTCALL encodeHeaderNext(const InfoHdr& header, InfoHdr* state)
}
}
+ if (GCInfoEncodesRevPInvokeFrame() && (state->revPInvokeOffset != header.revPInvokeOffset))
+ {
+ assert(state->revPInvokeOffset == INVALID_REV_PINVOKE_OFFSET ||
+ state->revPInvokeOffset == HAS_REV_PINVOKE_FRAME_OFFSET);
+
+ if (state->revPInvokeOffset == INVALID_REV_PINVOKE_OFFSET)
+ {
+ // header.revPInvokeOffset is non-zero.
+ state->revPInvokeOffset = HAS_REV_PINVOKE_FRAME_OFFSET;
+ encoding = FLIP_REV_PINVOKE_FRAME;
+ goto DO_RETURN;
+ }
+ else if (header.revPInvokeOffset == INVALID_REV_PINVOKE_OFFSET)
+ {
+ state->revPInvokeOffset = INVALID_REV_PINVOKE_OFFSET;
+ encoding = FLIP_REV_PINVOKE_FRAME;
+ goto DO_RETURN;
+ }
+ }
+
DO_RETURN:
- assert(encoding < 0x80);
+ _ASSERTE(encoding < MORE_BYTES_TO_FOLLOW);
if (!state->isHeaderMatch(header))
- encoding |= 0x80;
+ encoding |= MORE_BYTES_TO_FOLLOW;
+
return encoding;
}
@@ -806,6 +924,14 @@ static int measureDistance(const InfoHdr& header, const InfoHdrSmall* p, int clo
return distance;
}
+ if (p->returnKind != header.returnKind)
+ {
+ // Setting the ReturnKind requires two bytes of encoding.
+ distance += 2;
+ if (distance >= closeness)
+ return distance;
+ }
+
if (header.gsCookieOffset != INVALID_GS_COOKIE_OFFSET)
{
distance += 1;
@@ -820,6 +946,13 @@ static int measureDistance(const InfoHdr& header, const InfoHdrSmall* p, int clo
return distance;
}
+ if (header.revPInvokeOffset != INVALID_REV_PINVOKE_OFFSET)
+ {
+ distance += 1;
+ if (distance >= closeness)
+ return distance;
+ }
+
return distance;
}
@@ -1164,6 +1297,16 @@ size_t GCInfo::gcInfoBlockHdrSave(
header->genericsContext = compiler->lvaReportParamTypeArg();
header->genericsContextIsMethodDesc =
header->genericsContext && (compiler->info.compMethodInfo->options & (CORINFO_GENERICS_CTXT_FROM_METHODDESC));
+
+ if (GCInfoEncodesReturnKind())
+ {
+ ReturnKind returnKind = getReturnKind();
+ _ASSERTE(IsValidReturnKind(returnKind) && "Return Kind must be valid");
+ _ASSERTE(!IsStructReturnKind(returnKind) && "Struct Return Kinds Unexpected for JIT32");
+ _ASSERTE(((int)returnKind < (int)SET_RET_KIND_MAX) && "ReturnKind has no legal encoding");
+ header->returnKind = returnKind;
+ }
+
header->gsCookieOffset = INVALID_GS_COOKIE_OFFSET;
if (compiler->getNeedsGSSecurityCookie())
{
@@ -1190,6 +1333,8 @@ size_t GCInfo::gcInfoBlockHdrSave(
assert(header->epilogCount <= 1);
}
+ header->revPInvokeOffset = INVALID_REV_PINVOKE_OFFSET;
+
assert((compiler->compArgSize & 0x3) == 0);
size_t argCount =
@@ -1224,12 +1369,21 @@ size_t GCInfo::gcInfoBlockHdrSave(
*dest++ = headerEncoding;
BYTE encoding = headerEncoding;
- while (encoding & 0x80)
+ BYTE codeSet = 1;
+ while (encoding & MORE_BYTES_TO_FOLLOW)
{
- encoding = encodeHeaderNext(*header, &state);
+ encoding = encodeHeaderNext(*header, &state, codeSet);
+
#if REGEN_SHORTCUTS
regenLog(headerEncoding, header, &state);
#endif
+ _ASSERTE(codeSet == 1 || codeSet == 2 && "Encoding must correspond to InfoHdrAdjust or InfoHdrAdjust2");
+ if (codeSet == 2)
+ {
+ *dest++ = NEXT_OPCODE | MORE_BYTES_TO_FOLLOW;
+ ++size;
+ }
+
*dest++ = encoding;
++size;
}
@@ -1771,12 +1925,12 @@ size_t GCInfo::gcMakeRegPtrTable(BYTE* dest, int mask, const InfoHdr& header, un
}
else
{
- /* Stack-passed arguments which are not enregistered
- * are always reported in this "untracked stack
- * pointers" section of the GC info even if lvTracked==true
- */
+/* Stack-passed arguments which are not enregistered
+ * are always reported in this "untracked stack
+ * pointers" section of the GC info even if lvTracked==true
+ */
- /* Has this argument been enregistered? */
+/* Has this argument been enregistered? */
#ifndef LEGACY_BACKEND
if (!varDsc->lvOnFrame)
#else // LEGACY_BACKEND
@@ -3277,7 +3431,7 @@ void GCInfo::gcFindPtrsInFrame(const void* infoBlock, const void* codeBlock, uns
GCDump gcDump(GCINFO_VERSION);
gcDump.gcPrintf = gcDump_logf; // use my printf (which logs to VM)
- gcDump.DumpPtrsInFrame((const BYTE*)infoBlock, (const BYTE*)codeBlock, offs, verifyGCTables);
+ gcDump.DumpPtrsInFrame((PTR_CBYTE)infoBlock, (const BYTE*)codeBlock, offs, verifyGCTables);
}
#endif // DUMP_GC_TABLES
@@ -3504,23 +3658,6 @@ public:
#endif // DEBUG
-ReturnKind GCTypeToReturnKind(CorInfoGCType gcType)
-{
-
- switch (gcType)
- {
- case TYPE_GC_NONE:
- return RT_Scalar;
- case TYPE_GC_REF:
- return RT_Object;
- case TYPE_GC_BYREF:
- return RT_ByRef;
- default:
- _ASSERTE(!"TYP_GC_OTHER is unexpected");
- return RT_Illegal;
- }
-}
-
void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSize, unsigned prologSize)
{
#ifdef DEBUG
@@ -3536,65 +3673,7 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz
gcInfoEncoderWithLog->SetCodeLength(methodSize);
- ReturnKind returnKind = RT_Illegal;
-
- switch (compiler->info.compRetType)
- {
- case TYP_REF:
- case TYP_ARRAY:
- returnKind = RT_Object;
- break;
- case TYP_BYREF:
- returnKind = RT_ByRef;
- break;
- case TYP_STRUCT:
- {
- CORINFO_CLASS_HANDLE structType = compiler->info.compMethodInfo->args.retTypeClass;
- var_types retType = compiler->getReturnTypeForStruct(structType);
-
- switch (retType)
- {
- case TYP_ARRAY:
- _ASSERTE(false && "TYP_ARRAY unexpected from getReturnTypeForStruct()");
-
- case TYP_REF:
- returnKind = RT_Object;
- break;
-
- case TYP_BYREF:
- returnKind = RT_ByRef;
- break;
-
- case TYP_STRUCT:
- if (compiler->IsHfa(structType))
- {
- returnKind = RT_Scalar;
- }
- else
- {
- // Multi-reg return
- BYTE gcPtrs[2] = { TYPE_GC_NONE, TYPE_GC_NONE };
- compiler->info.compCompHnd->getClassGClayout(structType, gcPtrs);
-
- ReturnKind first = GCTypeToReturnKind((CorInfoGCType)gcPtrs[0]);
- ReturnKind second = GCTypeToReturnKind((CorInfoGCType)gcPtrs[1]);
-
- returnKind = GetStructReturnKind(first, second);
- }
- break;
-
- default:
- returnKind = RT_Scalar;
- break;
- }
- break;
- }
- default:
- returnKind = RT_Scalar;
- }
-
- _ASSERTE(returnKind != RT_Illegal);
- gcInfoEncoderWithLog->SetReturnKind(returnKind);
+ gcInfoEncoderWithLog->SetReturnKind(getReturnKind());
if (compiler->isFramePointerUsed())
{
@@ -3682,10 +3761,8 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz
}
#if FEATURE_EH_FUNCLETS
- if (compiler->ehNeedsPSPSym())
+ if (compiler->lvaPSPSym != BAD_VAR_NUM)
{
- assert(compiler->lvaPSPSym != BAD_VAR_NUM);
-
#ifdef _TARGET_AMD64_
// The PSPSym is relative to InitialSP on X64 and CallerSP on other platforms.
gcInfoEncoderWithLog->SetPSPSymStackSlot(compiler->lvaGetInitialSPRelativeOffset(compiler->lvaPSPSym));
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
index 67474e11ec..4a6cc740c6 100644
--- a/src/jit/gentree.cpp
+++ b/src/jit/gentree.cpp
@@ -21,7 +21,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
/*****************************************************************************/
const unsigned short GenTree::gtOperKindTable[] = {
-#define GTNODE(en, sn, cm, ok) ok + GTK_COMMUTE *cm,
+#define GTNODE(en, sn, st, cm, ok) ok + GTK_COMMUTE *cm,
#include "gtlist.h"
};
@@ -209,7 +209,7 @@ static void printIndent(IndentStack* indentStack)
}
static const char* nodeNames[] = {
-#define GTNODE(en, sn, cm, ok) sn,
+#define GTNODE(en, sn, st, cm, ok) sn,
#include "gtlist.h"
};
@@ -220,8 +220,12 @@ const char* GenTree::NodeName(genTreeOps op)
return nodeNames[op];
}
+#endif
+
+#if defined(DEBUG) || NODEBASH_STATS || MEASURE_NODE_SIZE || COUNT_AST_OPERS
+
static const char* opNames[] = {
-#define GTNODE(en, sn, cm, ok) #en,
+#define GTNODE(en, sn, st, cm, ok) #en,
#include "gtlist.h"
};
@@ -234,12 +238,27 @@ const char* GenTree::OpName(genTreeOps op)
#endif
+#if MEASURE_NODE_SIZE && SMALL_TREE_NODES
+
+static const char* opStructNames[] = {
+#define GTNODE(en, sn, st, cm, ok) #st,
+#include "gtlist.h"
+};
+
+const char* GenTree::OpStructName(genTreeOps op)
+{
+ assert((unsigned)op < sizeof(opStructNames) / sizeof(opStructNames[0]));
+
+ return opStructNames[op];
+}
+
+#endif
+
/*****************************************************************************
*
* When 'SMALL_TREE_NODES' is enabled, we allocate tree nodes in 2 different
- * sizes: 'GTF_DEBUG_NODE_SMALL' for most nodes and 'GTF_DEBUG_NODE_LARGE' for
- * the few nodes (such as calls and statement list nodes) that have more fields
- * and take up a lot more space.
+ * sizes: 'TREE_NODE_SZ_SMALL' for most nodes and 'TREE_NODE_SZ_LARGE' for the
+ * few nodes (such as calls) that have more fields and take up a lot more space.
*/
#if SMALL_TREE_NODES
@@ -248,6 +267,19 @@ const char* GenTree::OpName(genTreeOps op)
/* static */
unsigned char GenTree::s_gtNodeSizes[GT_COUNT + 1];
+#if NODEBASH_STATS || MEASURE_NODE_SIZE || COUNT_AST_OPERS
+
+unsigned char GenTree::s_gtTrueSizes[GT_COUNT + 1]{
+#define GTNODE(en, sn, st, cm, ok) sizeof(st),
+#include "gtlist.h"
+};
+
+#endif // NODEBASH_STATS || MEASURE_NODE_SIZE || COUNT_AST_OPERS
+
+#if COUNT_AST_OPERS
+LONG GenTree::s_gtNodeCounts[GT_COUNT + 1] = {0};
+#endif // COUNT_AST_OPERS
+
/* static */
void GenTree::InitNodeSize()
{
@@ -265,12 +297,13 @@ void GenTree::InitNodeSize()
// Now set all of the appropriate entries to 'large'
CLANG_FORMAT_COMMENT_ANCHOR;
+// clang-format off
#if defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
// On ARM32, ARM64 and System V for struct returning
// there is code that does GT_ASG-tree.CopyObj call.
// CopyObj is a large node and the GT_ASG is small, which triggers an exception.
- GenTree::s_gtNodeSizes[GT_ASG] = TREE_NODE_SZ_LARGE;
- GenTree::s_gtNodeSizes[GT_RETURN] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_ASG] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_RETURN] = TREE_NODE_SZ_LARGE;
#endif // defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
GenTree::s_gtNodeSizes[GT_CALL] = TREE_NODE_SZ_LARGE;
@@ -282,30 +315,32 @@ void GenTree::InitNodeSize()
#ifdef FEATURE_SIMD
GenTree::s_gtNodeSizes[GT_SIMD_CHK] = TREE_NODE_SZ_LARGE;
#endif // FEATURE_SIMD
- GenTree::s_gtNodeSizes[GT_ARR_ELEM] = TREE_NODE_SZ_LARGE;
- GenTree::s_gtNodeSizes[GT_ARR_INDEX] = TREE_NODE_SZ_LARGE;
- GenTree::s_gtNodeSizes[GT_ARR_OFFSET] = TREE_NODE_SZ_LARGE;
- GenTree::s_gtNodeSizes[GT_RET_EXPR] = TREE_NODE_SZ_LARGE;
- GenTree::s_gtNodeSizes[GT_OBJ] = TREE_NODE_SZ_LARGE;
- GenTree::s_gtNodeSizes[GT_FIELD] = TREE_NODE_SZ_LARGE;
- GenTree::s_gtNodeSizes[GT_STMT] = TREE_NODE_SZ_LARGE;
- GenTree::s_gtNodeSizes[GT_CMPXCHG] = TREE_NODE_SZ_LARGE;
- GenTree::s_gtNodeSizes[GT_QMARK] = TREE_NODE_SZ_LARGE;
- GenTree::s_gtNodeSizes[GT_LEA] = TREE_NODE_SZ_LARGE;
- GenTree::s_gtNodeSizes[GT_STORE_OBJ] = TREE_NODE_SZ_LARGE;
- GenTree::s_gtNodeSizes[GT_DYN_BLK] = TREE_NODE_SZ_LARGE;
- GenTree::s_gtNodeSizes[GT_STORE_DYN_BLK] = TREE_NODE_SZ_LARGE;
- GenTree::s_gtNodeSizes[GT_INTRINSIC] = TREE_NODE_SZ_LARGE;
- GenTree::s_gtNodeSizes[GT_ALLOCOBJ] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_ARR_ELEM] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_ARR_INDEX] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_ARR_OFFSET] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_RET_EXPR] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_OBJ] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_FIELD] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_STMT] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_CMPXCHG] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_QMARK] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_LEA] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_STORE_OBJ] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_DYN_BLK] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_STORE_DYN_BLK] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_INTRINSIC] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_ALLOCOBJ] = TREE_NODE_SZ_LARGE;
#if USE_HELPERS_FOR_INT_DIV
- GenTree::s_gtNodeSizes[GT_DIV] = TREE_NODE_SZ_LARGE;
- GenTree::s_gtNodeSizes[GT_UDIV] = TREE_NODE_SZ_LARGE;
- GenTree::s_gtNodeSizes[GT_MOD] = TREE_NODE_SZ_LARGE;
- GenTree::s_gtNodeSizes[GT_UMOD] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_DIV] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_UDIV] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_MOD] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_UMOD] = TREE_NODE_SZ_LARGE;
#endif
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- GenTree::s_gtNodeSizes[GT_PUTARG_STK] = TREE_NODE_SZ_LARGE;
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
+ // TODO-Throughput: This should not need to be a large node. The object info should be
+ // obtained from the child node.
+ GenTree::s_gtNodeSizes[GT_PUTARG_STK] = TREE_NODE_SZ_LARGE;
+#endif // FEATURE_PUT_STRUCT_ARG_STK
assert(GenTree::s_gtNodeSizes[GT_RETURN] == GenTree::s_gtNodeSizes[GT_ASG]);
@@ -314,60 +349,65 @@ void GenTree::InitNodeSize()
assert(sizeof(GenTreeLclFld) <= GenTree::s_gtNodeSizes[GT_LCL_FLD]);
assert(sizeof(GenTreeLclVar) <= GenTree::s_gtNodeSizes[GT_LCL_VAR]);
- static_assert_no_msg(sizeof(GenTree) <= TREE_NODE_SZ_SMALL);
- static_assert_no_msg(sizeof(GenTreeUnOp) <= TREE_NODE_SZ_SMALL);
- static_assert_no_msg(sizeof(GenTreeOp) <= TREE_NODE_SZ_SMALL);
- static_assert_no_msg(sizeof(GenTreeVal) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTree) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeUnOp) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeOp) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeVal) <= TREE_NODE_SZ_SMALL);
static_assert_no_msg(sizeof(GenTreeIntConCommon) <= TREE_NODE_SZ_SMALL);
- static_assert_no_msg(sizeof(GenTreePhysReg) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreePhysReg) <= TREE_NODE_SZ_SMALL);
#ifndef LEGACY_BACKEND
- static_assert_no_msg(sizeof(GenTreeJumpTable) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeJumpTable) <= TREE_NODE_SZ_SMALL);
#endif // !LEGACY_BACKEND
- static_assert_no_msg(sizeof(GenTreeIntCon) <= TREE_NODE_SZ_SMALL);
- static_assert_no_msg(sizeof(GenTreeLngCon) <= TREE_NODE_SZ_SMALL);
- static_assert_no_msg(sizeof(GenTreeDblCon) <= TREE_NODE_SZ_SMALL);
- static_assert_no_msg(sizeof(GenTreeStrCon) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeIntCon) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeLngCon) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeDblCon) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeStrCon) <= TREE_NODE_SZ_SMALL);
static_assert_no_msg(sizeof(GenTreeLclVarCommon) <= TREE_NODE_SZ_SMALL);
- static_assert_no_msg(sizeof(GenTreeLclVar) <= TREE_NODE_SZ_SMALL);
- static_assert_no_msg(sizeof(GenTreeLclFld) <= TREE_NODE_SZ_SMALL);
- static_assert_no_msg(sizeof(GenTreeRegVar) <= TREE_NODE_SZ_SMALL);
- static_assert_no_msg(sizeof(GenTreeCast) <= TREE_NODE_SZ_LARGE); // *** large node
- static_assert_no_msg(sizeof(GenTreeBox) <= TREE_NODE_SZ_LARGE); // *** large node
- static_assert_no_msg(sizeof(GenTreeField) <= TREE_NODE_SZ_LARGE); // *** large node
- static_assert_no_msg(sizeof(GenTreeArgList) <= TREE_NODE_SZ_SMALL);
- static_assert_no_msg(sizeof(GenTreeColon) <= TREE_NODE_SZ_SMALL);
- static_assert_no_msg(sizeof(GenTreeCall) <= TREE_NODE_SZ_LARGE); // *** large node
- static_assert_no_msg(sizeof(GenTreeCmpXchg) <= TREE_NODE_SZ_LARGE); // *** large node
- static_assert_no_msg(sizeof(GenTreeFptrVal) <= TREE_NODE_SZ_LARGE); // *** large node
- static_assert_no_msg(sizeof(GenTreeQmark) <= TREE_NODE_SZ_LARGE); // *** large node
- static_assert_no_msg(sizeof(GenTreeIntrinsic) <= TREE_NODE_SZ_LARGE); // *** large node
- static_assert_no_msg(sizeof(GenTreeIndex) <= TREE_NODE_SZ_LARGE); // *** large node
- static_assert_no_msg(sizeof(GenTreeArrLen) <= TREE_NODE_SZ_LARGE); // *** large node
- static_assert_no_msg(sizeof(GenTreeBoundsChk) <= TREE_NODE_SZ_LARGE); // *** large node
- static_assert_no_msg(sizeof(GenTreeArrElem) <= TREE_NODE_SZ_LARGE); // *** large node
- static_assert_no_msg(sizeof(GenTreeArrIndex) <= TREE_NODE_SZ_LARGE); // *** large node
- static_assert_no_msg(sizeof(GenTreeArrOffs) <= TREE_NODE_SZ_LARGE); // *** large node
- static_assert_no_msg(sizeof(GenTreeIndir) <= TREE_NODE_SZ_SMALL);
- static_assert_no_msg(sizeof(GenTreeStoreInd) <= TREE_NODE_SZ_SMALL);
- static_assert_no_msg(sizeof(GenTreeAddrMode) <= TREE_NODE_SZ_SMALL);
- static_assert_no_msg(sizeof(GenTreeObj) <= TREE_NODE_SZ_LARGE); // *** large node
- static_assert_no_msg(sizeof(GenTreeBlk) <= TREE_NODE_SZ_SMALL);
- static_assert_no_msg(sizeof(GenTreeRetExpr) <= TREE_NODE_SZ_LARGE); // *** large node
- static_assert_no_msg(sizeof(GenTreeStmt) <= TREE_NODE_SZ_LARGE); // *** large node
- static_assert_no_msg(sizeof(GenTreeClsVar) <= TREE_NODE_SZ_SMALL);
- static_assert_no_msg(sizeof(GenTreeArgPlace) <= TREE_NODE_SZ_SMALL);
- static_assert_no_msg(sizeof(GenTreeLabel) <= TREE_NODE_SZ_SMALL);
- static_assert_no_msg(sizeof(GenTreePhiArg) <= TREE_NODE_SZ_SMALL);
- static_assert_no_msg(sizeof(GenTreeAllocObj) <= TREE_NODE_SZ_LARGE); // *** large node
-#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
- static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_SMALL);
-#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
- static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_LARGE);
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ static_assert_no_msg(sizeof(GenTreeLclVar) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeLclFld) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeRegVar) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeJumpCC) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeCast) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeBox) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeField) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeArgList) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeFieldList) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeColon) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeCall) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeCmpXchg) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeFptrVal) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeQmark) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeIntrinsic) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeIndex) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeArrLen) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeBoundsChk) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeArrElem) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeArrIndex) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeArrOffs) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeIndir) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeStoreInd) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeAddrMode) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeObj) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeBlk) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeRetExpr) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeStmt) <= TREE_NODE_SZ_LARGE); // *** large node
+ static_assert_no_msg(sizeof(GenTreeClsVar) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeArgPlace) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeLabel) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreePhiArg) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeAllocObj) <= TREE_NODE_SZ_LARGE); // *** large node
+#ifndef FEATURE_PUT_STRUCT_ARG_STK
+ static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_SMALL);
+#else // FEATURE_PUT_STRUCT_ARG_STK
+ // TODO-Throughput: This should not need to be a large node. The object info should be
+ // obtained from the child node.
+ static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_LARGE);
+#endif // FEATURE_PUT_STRUCT_ARG_STK
#ifdef FEATURE_SIMD
- static_assert_no_msg(sizeof(GenTreeSIMD) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeSIMD) <= TREE_NODE_SZ_SMALL);
#endif // FEATURE_SIMD
+ // clang-format on
}
size_t GenTree::GetNodeSize() const
@@ -394,6 +434,88 @@ bool GenTree::IsNodeProperlySized() const
}
#endif
+/*****************************************************************************
+ *
+ * When 'NODEBASH_STATS' is enabled in "jit.h" we record all instances of
+ * an existing GenTree node having its operator changed. This can be useful
+ * for two (related) things - to see what is being bashed (and what isn't),
+ * and to verify that the existing choices for what nodes are marked 'large'
+ * are reasonable (to minimize "wasted" space).
+ *
+ * And yes, the hash function / logic is simplistic, but it is conflict-free
+ * and transparent for what we need.
+ */
+
+#if NODEBASH_STATS
+
+#define BASH_HASH_SIZE 211
+
+inline hashme(genTreeOps op1, genTreeOps op2)
+{
+ return ((op1 * 104729) ^ (op2 * 56569)) % BASH_HASH_SIZE;
+}
+
+struct BashHashDsc
+{
+ unsigned __int32 bhFullHash; // the hash value (unique for all old->new pairs)
+ unsigned __int32 bhCount; // the same old->new bashings seen so far
+ unsigned __int8 bhOperOld; // original gtOper
+ unsigned __int8 bhOperNew; // new gtOper
+};
+
+static BashHashDsc BashHash[BASH_HASH_SIZE];
+
+void GenTree::RecordOperBashing(genTreeOps operOld, genTreeOps operNew)
+{
+ unsigned hash = hashme(operOld, operNew);
+ BashHashDsc* desc = BashHash + hash;
+
+ if (desc->bhFullHash != hash)
+ {
+ noway_assert(desc->bhCount == 0); // if this ever fires, need fix the hash fn
+ desc->bhFullHash = hash;
+ }
+
+ desc->bhCount += 1;
+ desc->bhOperOld = operOld;
+ desc->bhOperNew = operNew;
+}
+
+void GenTree::ReportOperBashing(FILE* f)
+{
+ unsigned total = 0;
+
+ fflush(f);
+
+ fprintf(f, "\n");
+ fprintf(f, "Bashed gtOper stats:\n");
+ fprintf(f, "\n");
+ fprintf(f, " Old operator New operator #bytes old->new Count\n");
+ fprintf(f, " ---------------------------------------------------------------\n");
+
+ for (unsigned h = 0; h < BASH_HASH_SIZE; h++)
+ {
+ unsigned count = BashHash[h].bhCount;
+ if (count == 0)
+ continue;
+
+ unsigned opOld = BashHash[h].bhOperOld;
+ unsigned opNew = BashHash[h].bhOperNew;
+
+ fprintf(f, " GT_%-13s -> GT_%-13s [size: %3u->%3u] %c %7u\n", OpName((genTreeOps)opOld),
+ OpName((genTreeOps)opNew), s_gtTrueSizes[opOld], s_gtTrueSizes[opNew],
+ (s_gtTrueSizes[opOld] < s_gtTrueSizes[opNew]) ? 'X' : ' ', count);
+ total += count;
+ }
+ fprintf(f, "\n");
+ fprintf(f, "Total bashings: %u\n", total);
+ fprintf(f, "\n");
+
+ fflush(f);
+}
+
+#endif // NODEBASH_STATS
+
#else // SMALL_TREE_NODES
#ifdef DEBUG
@@ -407,6 +529,71 @@ bool GenTree::IsNodeProperlySized() const
/*****************************************************************************/
+#if MEASURE_NODE_SIZE
+
+void GenTree::DumpNodeSizes(FILE* fp)
+{
+// Dump the sizes of the various GenTree flavors
+
+#if SMALL_TREE_NODES
+ fprintf(fp, "Small tree node size = %3u bytes\n", TREE_NODE_SZ_SMALL);
+#endif
+ fprintf(fp, "Large tree node size = %3u bytes\n", TREE_NODE_SZ_LARGE);
+ fprintf(fp, "\n");
+
+#if SMALL_TREE_NODES
+
+ // Verify that node sizes are set kosherly and dump sizes
+ for (unsigned op = GT_NONE + 1; op < GT_COUNT; op++)
+ {
+ unsigned needSize = s_gtTrueSizes[op];
+ unsigned nodeSize = s_gtNodeSizes[op];
+
+ const char* structNm = OpStructName((genTreeOps)op);
+ const char* operName = OpName((genTreeOps)op);
+
+ bool repeated = false;
+
+ // Have we seen this struct flavor before?
+ for (unsigned mop = GT_NONE + 1; mop < op; mop++)
+ {
+ if (strcmp(structNm, OpStructName((genTreeOps)mop)) == 0)
+ {
+ repeated = true;
+ break;
+ }
+ }
+
+ // Don't repeat the same GenTree flavor unless we have an error
+ if (!repeated || needSize > nodeSize)
+ {
+ unsigned sizeChar = '?';
+
+ if (nodeSize == TREE_NODE_SZ_SMALL)
+ sizeChar = 'S';
+ else if (nodeSize == TREE_NODE_SZ_LARGE)
+ sizeChar = 'L';
+
+ fprintf(fp, "GT_%-16s ... %-19s = %3u bytes (%c)", operName, structNm, needSize, sizeChar);
+ if (needSize > nodeSize)
+ {
+ fprintf(fp, " -- ERROR -- allocation is only %u bytes!", nodeSize);
+ }
+ else if (needSize <= TREE_NODE_SZ_SMALL && nodeSize == TREE_NODE_SZ_LARGE)
+ {
+ fprintf(fp, " ... could be small");
+ }
+
+ fprintf(fp, "\n");
+ }
+ }
+
+#endif
+}
+
+#endif // MEASURE_NODE_SIZE
+/*****************************************************************************/
+
// make sure these get instantiated, because it's not in a header file
// (emulating the c++ 'export' keyword here)
// VC appears to be somewhat unpredictable about whether they end up in the .obj file without this
@@ -965,11 +1152,12 @@ Compiler::fgWalkResult Compiler::fgWalkTreePostRec(GenTreePtr* pTree, fgWalkData
}
break;
- case GT_LIST:
+ case GT_FIELD_LIST:
{
- GenTreeArgList* list = tree->AsArgList();
- if (list->IsAggregate())
+ GenTreeFieldList* list = tree->AsFieldList();
+ if (list->IsFieldListHead())
{
+ GenTreeFieldList* list = tree->AsFieldList();
for (; list != nullptr; list = list->Rest())
{
result = fgWalkTreePostRec<computeStack>(&list->gtOp1, fgWalkData);
@@ -978,12 +1166,8 @@ Compiler::fgWalkResult Compiler::fgWalkTreePostRec(GenTreePtr* pTree, fgWalkData
return result;
}
}
- break;
}
-
- // GT_LIST nodes that do not represent aggregate arguments intentionally fall through to the
- // default node processing below.
- __fallthrough;
+ break;
}
default:
@@ -1765,6 +1949,66 @@ bool GenTreeCall::IsHelperCall(Compiler* compiler, unsigned helper) const
return IsHelperCall(compiler->eeFindHelper(helper));
}
+//------------------------------------------------------------------------
+// GenTreeCall::ReplaceCallOperand:
+// Replaces a given operand to a call node and updates the call
+// argument table if necessary.
+//
+// Arguments:
+// useEdge - the use edge that points to the operand to be replaced.
+// replacement - the replacement node.
+//
+void GenTreeCall::ReplaceCallOperand(GenTree** useEdge, GenTree* replacement)
+{
+ assert(useEdge != nullptr);
+ assert(replacement != nullptr);
+ assert(TryGetUse(*useEdge, &useEdge));
+
+ GenTree* originalOperand = *useEdge;
+ *useEdge = replacement;
+
+ const bool isArgument =
+ (replacement != gtControlExpr) &&
+ ((gtCallType != CT_INDIRECT) || ((replacement != gtCallCookie) && (replacement != gtCallAddr)));
+
+ if (isArgument)
+ {
+ if ((originalOperand->gtFlags & GTF_LATE_ARG) != 0)
+ {
+ replacement->gtFlags |= GTF_LATE_ARG;
+ }
+ else
+ {
+ assert((replacement->gtFlags & GTF_LATE_ARG) == 0);
+
+ fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(this, originalOperand);
+ assert(fp->node == originalOperand);
+ fp->node = replacement;
+ }
+ }
+}
+
+//-------------------------------------------------------------------------
+// AreArgsComplete: Determine if this GT_CALL node's arguments have been processed.
+//
+// Return Value:
+// Returns true if fgMorphArgs has processed the arguments.
+//
+bool GenTreeCall::AreArgsComplete() const
+{
+ if (fgArgInfo == nullptr)
+ {
+ return false;
+ }
+ if (fgArgInfo->AreArgsComplete())
+ {
+ assert((gtCallLateArgs != nullptr) || !fgArgInfo->HasRegArgs());
+ return true;
+ }
+ assert(gtCallArgs == nullptr);
+ return false;
+}
+
/*****************************************************************************
*
* Returns non-zero if the two trees are identical.
@@ -2071,7 +2315,9 @@ AGAIN:
#ifdef FEATURE_READYTORUN_COMPILER
if (op1->gtCall.gtEntryPoint.addr != op2->gtCall.gtEntryPoint.addr)
+ {
return false;
+ }
#endif
}
else
@@ -2560,8 +2806,8 @@ AGAIN:
hash = genTreeHashAdd(hash, tree->gtAllocObj.gtNewHelper);
break;
case GT_OBJ:
- hash = genTreeHashAdd(hash, static_cast<unsigned>(
- reinterpret_cast<uintptr_t>(tree->gtObj.gtClass)));
+ hash =
+ genTreeHashAdd(hash, static_cast<unsigned>(reinterpret_cast<uintptr_t>(tree->gtObj.gtClass)));
break;
// For the ones below no extra argument matters for comparison.
@@ -3196,6 +3442,11 @@ GenTreePtr Compiler::gtReverseCond(GenTree* tree)
tree->gtFlags ^= GTF_RELOP_NAN_UN;
}
}
+ else if (tree->OperGet() == GT_JCC)
+ {
+ GenTreeJumpCC* jcc = tree->AsJumpCC();
+ jcc->gtCondition = GenTree::ReverseRelop(jcc->gtCondition);
+ }
else
{
tree = gtNewOperNode(GT_NOT, TYP_INT, tree);
@@ -3257,77 +3508,136 @@ bool GenTree::gtIsValid64RsltMul()
#endif // DEBUG
-/*****************************************************************************
- *
- * Figure out the evaluation order for a list of values.
- */
+//------------------------------------------------------------------------------
+// gtSetListOrder : Figure out the evaluation order for a list of values.
+//
+//
+// Arguments:
+// list - List to figure out the evaluation order for
+// isListCallArgs - True iff the list is a list of call arguments
+// callArgsInRegs - True iff the list is a list of call arguments and they are passed in registers
+//
+// Return Value:
+// True if the operation can be a root of a bitwise rotation tree; false otherwise.
-unsigned Compiler::gtSetListOrder(GenTree* list, bool regs)
+unsigned Compiler::gtSetListOrder(GenTree* list, bool isListCallArgs, bool callArgsInRegs)
{
- assert(list && list->IsList());
+ assert((list != nullptr) && list->OperIsAnyList());
+ assert(!callArgsInRegs || isListCallArgs);
- unsigned level = 0;
- unsigned ftreg = 0;
- unsigned costSz = 0;
- unsigned costEx = 0;
+ ArrayStack<GenTree*> listNodes(this);
+ do
+ {
+ listNodes.Push(list);
+ list = list->gtOp.gtOp2;
+ } while ((list != nullptr) && (list->OperIsAnyList()));
+
+ unsigned nxtlvl = (list == nullptr) ? 0 : gtSetEvalOrder(list);
+ while (listNodes.Height() > 0)
+ {
#if FEATURE_STACK_FP_X87
- /* Save the current FP stack level since an argument list
- * will implicitly pop the FP stack when pushing the argument */
- unsigned FPlvlSave = codeGen->genGetFPstkLevel();
+ /* Save the current FP stack level since an argument list
+ * will implicitly pop the FP stack when pushing the argument */
+ unsigned FPlvlSave = codeGen->genGetFPstkLevel();
#endif // FEATURE_STACK_FP_X87
- GenTreePtr next = list->gtOp.gtOp2;
+ list = listNodes.Pop();
+ assert(list && list->OperIsAnyList());
+ GenTreePtr next = list->gtOp.gtOp2;
- if (next)
- {
- unsigned nxtlvl = gtSetListOrder(next, regs);
+ unsigned level = 0;
+ unsigned ftreg = 0;
- ftreg |= next->gtRsvdRegs;
+ // TODO: Do we have to compute costs differently for argument lists and
+ // all other lists?
+ // https://github.com/dotnet/coreclr/issues/7095
+ unsigned costSz = (isListCallArgs || (next == nullptr)) ? 0 : 1;
+ unsigned costEx = (isListCallArgs || (next == nullptr)) ? 0 : 1;
- if (level < nxtlvl)
+ if (next != nullptr)
{
- level = nxtlvl;
+ ftreg |= next->gtRsvdRegs;
+ if (isListCallArgs)
+ {
+ if (level < nxtlvl)
+ {
+ level = nxtlvl;
+ }
+ }
+ costEx += next->gtCostEx;
+ costSz += next->gtCostSz;
}
- costEx += next->gtCostEx;
- costSz += next->gtCostSz;
- }
- GenTreePtr op1 = list->gtOp.gtOp1;
- unsigned lvl = gtSetEvalOrder(op1);
+ GenTreePtr op1 = list->gtOp.gtOp1;
+ unsigned lvl = gtSetEvalOrder(op1);
#if FEATURE_STACK_FP_X87
- /* restore the FP level */
- codeGen->genResetFPstkLevel(FPlvlSave);
+ // restore the FP level
+ codeGen->genResetFPstkLevel(FPlvlSave);
#endif // FEATURE_STACK_FP_X87
- list->gtRsvdRegs = (regMaskSmall)(ftreg | op1->gtRsvdRegs);
+ list->gtRsvdRegs = (regMaskSmall)(ftreg | op1->gtRsvdRegs);
- if (level < lvl)
- {
- level = lvl;
- }
+ // Swap the level counts
+ if (list->gtFlags & GTF_REVERSE_OPS)
+ {
+ unsigned tmpl;
- if (op1->gtCostEx != 0)
- {
- costEx += op1->gtCostEx;
- costEx += regs ? 0 : IND_COST_EX;
- }
+ tmpl = lvl;
+ lvl = nxtlvl;
+ nxtlvl = tmpl;
+ }
- if (op1->gtCostSz != 0)
- {
- costSz += op1->gtCostSz;
+ // TODO: Do we have to compute levels differently for argument lists and
+ // all other lists?
+ // https://github.com/dotnet/coreclr/issues/7095
+ if (isListCallArgs)
+ {
+ if (level < lvl)
+ {
+ level = lvl;
+ }
+ }
+ else
+ {
+ if (lvl < 1)
+ {
+ level = nxtlvl;
+ }
+ else if (lvl == nxtlvl)
+ {
+ level = lvl + 1;
+ }
+ else
+ {
+ level = lvl;
+ }
+ }
+
+ if (op1->gtCostEx != 0)
+ {
+ costEx += op1->gtCostEx;
+ costEx += (callArgsInRegs || !isListCallArgs) ? 0 : IND_COST_EX;
+ }
+
+ if (op1->gtCostSz != 0)
+ {
+ costSz += op1->gtCostSz;
#ifdef _TARGET_XARCH_
- if (regs) // push is smaller than mov to reg
+ if (callArgsInRegs) // push is smaller than mov to reg
#endif
- {
- costSz += 1;
+ {
+ costSz += 1;
+ }
}
- }
- list->SetCosts(costEx, costSz);
+ list->SetCosts(costEx, costSz);
- return level;
+ nxtlvl = level;
+ }
+
+ return nxtlvl;
}
/*****************************************************************************
@@ -3363,17 +3673,8 @@ void Compiler::gtWalkOp(GenTree** op1WB, GenTree** op2WB, GenTree* adr, bool con
{
GenTreePtr op1 = *op1WB;
GenTreePtr op2 = *op2WB;
- GenTreePtr op1EffectiveVal;
- if (op1->gtOper == GT_COMMA)
- {
- op1EffectiveVal = op1->gtEffectiveVal();
- if ((op1EffectiveVal->gtOper == GT_ADD) && (!op1EffectiveVal->gtOverflow()) &&
- (!constOnly || (op1EffectiveVal->gtOp.gtOp2->IsCnsIntOrI())))
- {
- op1 = op1EffectiveVal;
- }
- }
+ op1 = op1->gtEffectiveVal();
// Now we look for op1's with non-overflow GT_ADDs [of constants]
while ((op1->gtOper == GT_ADD) && (!op1->gtOverflow()) && (!constOnly || (op1->gtOp.gtOp2->IsCnsIntOrI())))
@@ -3398,20 +3699,12 @@ void Compiler::gtWalkOp(GenTree** op1WB, GenTree** op2WB, GenTree* adr, bool con
op2 = tmp;
}
- if (op1->gtOper == GT_COMMA)
- {
- op1EffectiveVal = op1->gtEffectiveVal();
- if ((op1EffectiveVal->gtOper == GT_ADD) && (!op1EffectiveVal->gtOverflow()) &&
- (!constOnly || (op1EffectiveVal->gtOp.gtOp2->IsCnsIntOrI())))
- {
- op1 = op1EffectiveVal;
- }
- }
-
if (!constOnly && ((op2 == adr) || (!op2->IsCnsIntOrI())))
{
break;
}
+
+ op1 = op1->gtEffectiveVal();
}
*op1WB = op1;
@@ -3445,15 +3738,7 @@ GenTreePtr Compiler::gtWalkOpEffectiveVal(GenTreePtr op)
{
for (;;)
{
- if (op->gtOper == GT_COMMA)
- {
- GenTreePtr opEffectiveVal = op->gtEffectiveVal();
- if ((opEffectiveVal->gtOper == GT_ADD) && (!opEffectiveVal->gtOverflow()) &&
- (opEffectiveVal->gtOp.gtOp2->IsCnsIntOrI()))
- {
- op = opEffectiveVal;
- }
- }
+ op = op->gtEffectiveVal();
if ((op->gtOper != GT_ADD) || op->gtOverflow() || !op->gtOp.gtOp2->IsCnsIntOrI())
{
@@ -3980,6 +4265,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
break;
case GT_LIST:
+ case GT_FIELD_LIST:
case GT_NOP:
costEx = 0;
costSz = 0;
@@ -4671,6 +4957,14 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
goto DONE;
+ case GT_LIST:
+ case GT_FIELD_LIST:
+ {
+ const bool isListCallArgs = false;
+ const bool callArgsInRegs = false;
+ return gtSetListOrder(tree, isListCallArgs, callArgsInRegs);
+ }
+
default:
break;
}
@@ -5025,6 +5319,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
break;
case GT_LIST:
+ case GT_FIELD_LIST:
break;
case GT_SUB:
@@ -5123,7 +5418,9 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
#if FEATURE_STACK_FP_X87
FPlvlSave = codeGen->genGetFPstkLevel();
#endif // FEATURE_STACK_FP_X87
- lvl2 = gtSetListOrder(tree->gtCall.gtCallArgs, false);
+ const bool isListCallArgs = true;
+ const bool callArgsInRegs = false;
+ lvl2 = gtSetListOrder(tree->gtCall.gtCallArgs, isListCallArgs, callArgsInRegs);
if (level < lvl2)
{
level = lvl2;
@@ -5145,7 +5442,9 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
#if FEATURE_STACK_FP_X87
FPlvlSave = codeGen->genGetFPstkLevel();
#endif // FEATURE_STACK_FP_X87
- lvl2 = gtSetListOrder(tree->gtCall.gtCallLateArgs, true);
+ const bool isListCallArgs = true;
+ const bool callArgsInRegs = true;
+ lvl2 = gtSetListOrder(tree->gtCall.gtCallLateArgs, isListCallArgs, callArgsInRegs);
if (level < lvl2)
{
level = lvl2;
@@ -5189,7 +5488,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
costSz += 2;
}
}
- else if ((opts.eeFlags & CORJIT_FLG_PREJIT) == 0)
+ else if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
{
costEx += 2;
costSz += 6;
@@ -5789,11 +6088,11 @@ bool GenTree::IsAddWithI32Const(GenTreePtr* addr, int* offset)
// 'parent' must be non-null
//
// Notes:
-// When FEATURE_MULTIREG_ARGS is defined we can get here with GT_LDOBJ tree.
+// When FEATURE_MULTIREG_ARGS is defined we can get here with GT_OBJ tree.
// This happens when we have a struct that is passed in multiple registers.
//
// Also note that when FEATURE_UNIX_AMD64_STRUCT_PASSING is defined the GT_LDOBJ
-// later gets converted to a GT_LIST with two GT_LCL_FLDs in Lower/LowerXArch.
+// later gets converted to a GT_FIELD_LIST with two GT_LCL_FLDs in Lower/LowerXArch.
//
GenTreePtr* GenTree::gtGetChildPointer(GenTreePtr parent)
@@ -5952,6 +6251,9 @@ GenTreePtr* GenTree::gtGetChildPointer(GenTreePtr parent)
bool GenTree::TryGetUse(GenTree* def, GenTree*** use)
{
+ assert(def != nullptr);
+ assert(use != nullptr);
+
for (GenTree** useEdge : UseEdges())
{
if (*useEdge == def)
@@ -5965,6 +6267,32 @@ bool GenTree::TryGetUse(GenTree* def, GenTree*** use)
}
//------------------------------------------------------------------------
+// GenTree::ReplaceOperand:
+// Replace a given operand to this node with a new operand. If the
+// current node is a call node, this will also udpate the call
+// argument table if necessary.
+//
+// Arguments:
+// useEdge - the use edge that points to the operand to be replaced.
+// replacement - the replacement node.
+//
+void GenTree::ReplaceOperand(GenTree** useEdge, GenTree* replacement)
+{
+ assert(useEdge != nullptr);
+ assert(replacement != nullptr);
+ assert(TryGetUse(*useEdge, &useEdge));
+
+ if (OperGet() == GT_CALL)
+ {
+ AsCall()->ReplaceCallOperand(useEdge, replacement);
+ }
+ else
+ {
+ *useEdge = replacement;
+ }
+}
+
+//------------------------------------------------------------------------
// gtGetParent: Get the parent of this node, and optionally capture the
// pointer to the child so that it can be modified.
//
@@ -6500,16 +6828,15 @@ GenTreeCall* Compiler::gtNewCallNode(
#endif // LEGACY_BACKEND
#ifdef FEATURE_READYTORUN_COMPILER
- node->gtCall.gtEntryPoint.addr = nullptr;
+ node->gtEntryPoint.addr = nullptr;
#endif
#if defined(DEBUG) || defined(INLINE_DATA)
// These get updated after call node is built.
- node->gtCall.gtInlineObservation = InlineObservation::CALLEE_UNUSED_INITIAL;
- node->gtCall.gtRawILOffset = BAD_IL_OFFSET;
+ node->gtInlineObservation = InlineObservation::CALLEE_UNUSED_INITIAL;
+ node->gtRawILOffset = BAD_IL_OFFSET;
#endif
-#ifdef DEBUGGING_SUPPORT
// Spec: Managed Retval sequence points needs to be generated while generating debug info for debuggable code.
//
// Implementation note: if not generating MRV info genCallSite2ILOffsetMap will be NULL and
@@ -6537,7 +6864,6 @@ GenTreeCall* Compiler::gtNewCallNode(
assert(!genCallSite2ILOffsetMap->Lookup(node, &value));
genCallSite2ILOffsetMap->Set(node, ilOffset);
}
-#endif
// Initialize gtOtherRegs
node->ClearOtherRegs();
@@ -6545,6 +6871,22 @@ GenTreeCall* Compiler::gtNewCallNode(
// Initialize spill flags of gtOtherRegs
node->ClearOtherRegFlags();
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+ // Initialize the multi-reg long return info if necessary
+ if (varTypeIsLong(node))
+ {
+ // The return type will remain as the incoming long type
+ node->gtReturnType = node->gtType;
+
+ // Initialize Return type descriptor of call node
+ ReturnTypeDesc* retTypeDesc = node->GetReturnTypeDesc();
+ retTypeDesc->InitializeLongReturnType(this);
+
+ // must be a long returned in two registers
+ assert(retTypeDesc->GetReturnRegCount() == 2);
+ }
+#endif // defined(_TARGET_X86_) && !defined(_LEGACY_BACKEND_)
+
return node;
}
@@ -6648,29 +6990,6 @@ GenTreeArgList* Compiler::gtNewArgList(GenTreePtr arg1, GenTreePtr arg2)
return new (this, GT_LIST) GenTreeArgList(arg1, gtNewArgList(arg2));
}
-//------------------------------------------------------------------------
-// Compiler::gtNewAggregate:
-// Creates a new aggregate argument node. These nodes are used to
-// represent arguments that are composed of multiple values (e.g.
-// the lclVars that represent the fields of a promoted struct).
-//
-// Note that aggregate arguments are currently represented by GT_LIST
-// nodes that are marked with the GTF_LIST_AGGREGATE flag. This
-// representation may be changed in the future to instead use its own
-// node type (e.g. GT_AGGREGATE).
-//
-// Arguments:
-// firstElement - The first element in the aggregate's list of values.
-//
-// Returns:
-// The newly-created aggregate node.
-GenTreeArgList* Compiler::gtNewAggregate(GenTree* firstElement)
-{
- GenTreeArgList* agg = gtNewArgList(firstElement);
- agg->gtFlags |= GTF_LIST_AGGREGATE;
- return agg;
-}
-
/*****************************************************************************
*
* Create a list out of the three values.
@@ -6741,7 +7060,7 @@ fgArgTabEntryPtr Compiler::gtArgEntryByNode(GenTreePtr call, GenTreePtr node)
#endif // PROTO_JIT
else if (curArgTabEntry->parent != nullptr)
{
- assert(curArgTabEntry->parent->IsList());
+ assert(curArgTabEntry->parent->OperIsList());
if (curArgTabEntry->parent->Current() == node)
{
return curArgTabEntry;
@@ -6956,17 +7275,32 @@ GenTree* Compiler::gtNewBlockVal(GenTreePtr addr, unsigned size)
{
// By default we treat this as an opaque struct type with known size.
var_types blkType = TYP_STRUCT;
-#if FEATURE_SIMD
if ((addr->gtOper == GT_ADDR) && (addr->gtGetOp1()->OperGet() == GT_LCL_VAR))
{
GenTree* val = addr->gtGetOp1();
- if (varTypeIsSIMD(val) && (genTypeSize(val->TypeGet()) == size))
+#if FEATURE_SIMD
+ if (varTypeIsSIMD(val))
{
- blkType = val->TypeGet();
- return addr->gtGetOp1();
+ if (genTypeSize(val->TypeGet()) == size)
+ {
+ blkType = val->TypeGet();
+ return addr->gtGetOp1();
+ }
}
- }
+ else
#endif // FEATURE_SIMD
+#ifndef LEGACY_BACKEND
+ if (val->TypeGet() == TYP_STRUCT)
+ {
+ GenTreeLclVarCommon* lcl = addr->gtGetOp1()->AsLclVarCommon();
+ LclVarDsc* varDsc = &(lvaTable[lcl->gtLclNum]);
+ if ((varDsc->TypeGet() == TYP_STRUCT) && (varDsc->lvExactSize == size))
+ {
+ return addr->gtGetOp1();
+ }
+ }
+#endif // !LEGACY_BACKEND
+ }
return new (this, GT_BLK) GenTreeBlk(GT_BLK, blkType, addr, size);
}
@@ -6979,7 +7313,10 @@ GenTree* Compiler::gtNewBlockVal(GenTreePtr addr, unsigned size)
// if FEATURE_SIMD is enabled and the source has a SIMD type.
// isVolatile - Is this marked as volatile memory?
-GenTree* Compiler::gtNewCpObjNode(GenTreePtr dstAddr, GenTreePtr srcAddr, CORINFO_CLASS_HANDLE structHnd, bool isVolatile)
+GenTree* Compiler::gtNewCpObjNode(GenTreePtr dstAddr,
+ GenTreePtr srcAddr,
+ CORINFO_CLASS_HANDLE structHnd,
+ bool isVolatile)
{
GenTreePtr lhs = gtNewStructVal(structHnd, dstAddr);
GenTree* src = nullptr;
@@ -7046,10 +7383,10 @@ void GenTreeIntCon::FixupInitBlkValue(var_types asgType)
}
#endif // _TARGET_64BIT_
- // Make the type used in the GT_IND node match for evaluation types.
+ // Make the type match for evaluation types.
gtType = asgType;
- // if we are using an GT_INITBLK on a GC type the value being assigned has to be zero (null).
+ // if we are initializing a GC type the value being assigned must be zero (null).
assert(!varTypeIsGC(asgType) || (cns == 0));
}
@@ -7057,7 +7394,7 @@ void GenTreeIntCon::FixupInitBlkValue(var_types asgType)
}
}
-//
+//
//------------------------------------------------------------------------
// gtBlockOpInit: Initializes a BlkOp GenTree
//
@@ -7066,7 +7403,7 @@ void GenTreeIntCon::FixupInitBlkValue(var_types asgType)
// dst - the target (destination) we want to either initialize or copy to.
// src - the init value for InitBlk or the source struct for CpBlk/CpObj.
// isVolatile - specifies whether this node is a volatile memory operation.
-//
+//
// Assumptions:
// 'result' is an assignment that is newly constructed.
// If 'dst' is TYP_STRUCT, then it must be a block node or lclVar.
@@ -7156,9 +7493,6 @@ void Compiler::gtBlockOpInit(GenTreePtr result, GenTreePtr dst, GenTreePtr srcOr
result->gtFlags |= dst->gtFlags & GTF_ALL_EFFECT;
result->gtFlags |= result->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT;
- // TODO-1stClassStructs: This should be done only if the destination is non-local.
- result->gtFlags |= (GTF_GLOB_REF | GTF_ASG);
-
// REVERSE_OPS is necessary because the use must occur before the def
result->gtFlags |= GTF_REVERSE_OPS;
@@ -7229,12 +7563,20 @@ GenTree* Compiler::gtNewBlkOpNode(
srcOrFillVal = srcOrFillVal->gtGetOp1()->gtGetOp1();
}
}
-
- GenTree* result = gtNewAssignNode(dst, srcOrFillVal);
- if (!isCopyBlock)
+ else
{
- result->gtFlags |= GTF_BLK_INIT;
+ // InitBlk
+ assert(varTypeIsIntegral(srcOrFillVal));
+ if (varTypeIsStruct(dst))
+ {
+ if (!srcOrFillVal->IsIntegralConst(0))
+ {
+ srcOrFillVal = gtNewOperNode(GT_INIT_VAL, TYP_INT, srcOrFillVal);
+ }
+ }
}
+
+ GenTree* result = gtNewAssignNode(dst, srcOrFillVal);
gtBlockOpInit(result, dst, srcOrFillVal, isVolatile);
return result;
}
@@ -7376,17 +7718,30 @@ GenTreePtr Compiler::gtClone(GenTree* tree, bool complexOK)
return copy;
}
-/*****************************************************************************
- *
- * Clones the given tree value and returns a copy of the given tree. Any
- * references to local variable varNum will be replaced with the integer
- * constant varVal.
- */
+//------------------------------------------------------------------------
+// gtCloneExpr: Create a copy of `tree`, adding flags `addFlags`, mapping
+// local `varNum` to int constant `varVal` if it appears at
+// the root, and mapping uses of local `deepVarNum` to constant
+// `deepVarVal` if they occur beyond the root.
+//
+// Arguments:
+// tree - GenTree to create a copy of
+// addFlags - GTF_* flags to add to the copied tree nodes
+// varNum - lclNum to replace at the root, or ~0 for no root replacement
+// varVal - If replacing at root, replace local `varNum` with IntCns `varVal`
+// deepVarNum - lclNum to replace uses of beyond the root, or ~0 for no replacement
+// deepVarVal - If replacing beyond root, replace `deepVarNum` with IntCns `deepVarVal`
+//
+// Return Value:
+// A copy of the given tree with the replacements and added flags specified.
+//
+// Notes:
+// Top-level callers should generally call the overload that doesn't have
+// the explicit `deepVarNum` and `deepVarVal` parameters; those are used in
+// recursive invocations to avoid replacing defs.
-GenTreePtr Compiler::gtCloneExpr(GenTree* tree,
- unsigned addFlags,
- unsigned varNum, // = (unsigned)-1
- int varVal)
+GenTreePtr Compiler::gtCloneExpr(
+ GenTree* tree, unsigned addFlags, unsigned varNum, int varVal, unsigned deepVarNum, int deepVarVal)
{
if (tree == nullptr)
{
@@ -7442,6 +7797,10 @@ GenTreePtr Compiler::gtCloneExpr(GenTree* tree,
if (tree->gtLclVarCommon.gtLclNum == varNum)
{
copy = gtNewIconNode(varVal, tree->gtType);
+ if (tree->gtFlags & GTF_VAR_ARR_INDEX)
+ {
+ copy->LabelIndex(this);
+ }
}
else
{
@@ -7572,16 +7931,16 @@ GenTreePtr Compiler::gtCloneExpr(GenTree* tree,
// The nodes below this are not bashed, so they can be allocated at their individual sizes.
case GT_LIST:
- // This is ridiculous, but would go away if we made a stronger distinction between argument lists, whose
- // second argument *must* be an arglist*, and the uses of LIST in copyblk and initblk.
- if (tree->gtOp.gtOp2 != nullptr && tree->gtOp.gtOp2->OperGet() == GT_LIST)
- {
- copy = new (this, GT_LIST) GenTreeArgList(tree->gtOp.gtOp1, tree->gtOp.gtOp2->AsArgList());
- }
- else
- {
- copy = new (this, GT_LIST) GenTreeOp(GT_LIST, TYP_VOID, tree->gtOp.gtOp1, tree->gtOp.gtOp2);
- }
+ assert((tree->gtOp.gtOp2 == nullptr) || tree->gtOp.gtOp2->OperIsList());
+ copy = new (this, GT_LIST) GenTreeArgList(tree->gtOp.gtOp1);
+ copy->gtOp.gtOp2 = tree->gtOp.gtOp2;
+ break;
+
+ case GT_FIELD_LIST:
+ copy = new (this, GT_FIELD_LIST) GenTreeFieldList(tree->gtOp.gtOp1, tree->AsFieldList()->gtFieldOffset,
+ tree->AsFieldList()->gtFieldType, nullptr);
+ copy->gtOp.gtOp2 = tree->gtOp.gtOp2;
+ copy->gtFlags = (copy->gtFlags & ~GTF_FIELD_LIST_HEAD) | (tree->gtFlags & GTF_FIELD_LIST_HEAD);
break;
case GT_INDEX:
@@ -7608,8 +7967,9 @@ GenTreePtr Compiler::gtCloneExpr(GenTree* tree,
case GT_ARR_INDEX:
copy = new (this, GT_ARR_INDEX)
- GenTreeArrIndex(tree->TypeGet(), gtCloneExpr(tree->gtArrIndex.ArrObj(), addFlags, varNum, varVal),
- gtCloneExpr(tree->gtArrIndex.IndexExpr(), addFlags, varNum, varVal),
+ GenTreeArrIndex(tree->TypeGet(),
+ gtCloneExpr(tree->gtArrIndex.ArrObj(), addFlags, deepVarNum, deepVarVal),
+ gtCloneExpr(tree->gtArrIndex.IndexExpr(), addFlags, deepVarNum, deepVarVal),
tree->gtArrIndex.gtCurrDim, tree->gtArrIndex.gtArrRank,
tree->gtArrIndex.gtArrElemType);
break;
@@ -7708,12 +8068,20 @@ GenTreePtr Compiler::gtCloneExpr(GenTree* tree,
if (tree->gtOp.gtOp1)
{
- copy->gtOp.gtOp1 = gtCloneExpr(tree->gtOp.gtOp1, addFlags, varNum, varVal);
+ if (tree->gtOper == GT_ASG)
+ {
+ // Don't replace varNum if it appears as the LHS of an assign.
+ copy->gtOp.gtOp1 = gtCloneExpr(tree->gtOp.gtOp1, addFlags, -1, 0, deepVarNum, deepVarVal);
+ }
+ else
+ {
+ copy->gtOp.gtOp1 = gtCloneExpr(tree->gtOp.gtOp1, addFlags, deepVarNum, deepVarVal);
+ }
}
if (tree->gtGetOp2())
{
- copy->gtOp.gtOp2 = gtCloneExpr(tree->gtOp.gtOp2, addFlags, varNum, varVal);
+ copy->gtOp.gtOp2 = gtCloneExpr(tree->gtOp.gtOp2, addFlags, deepVarNum, deepVarVal);
}
/* Flags */
@@ -7775,18 +8143,6 @@ GenTreePtr Compiler::gtCloneExpr(GenTree* tree,
copy->CopyReg(tree);
}
- // We can call gtCloneExpr() before we have called fgMorph when we expand a GT_INDEX node in fgMorphArrayIndex()
- // The method gtFoldExpr() expects to be run after fgMorph so it will set the GTF_DEBUG_NODE_MORPHED
- // flag on nodes that it adds/modifies. Then when we call fgMorph we will assert.
- // We really only will need to fold when this method is used to replace references to
- // local variable with an integer.
- //
- if (varNum != (unsigned)-1)
- {
- /* Try to do some folding */
- copy = gtFoldExpr(copy);
- }
-
goto DONE;
}
@@ -7795,7 +8151,7 @@ GenTreePtr Compiler::gtCloneExpr(GenTree* tree,
switch (oper)
{
case GT_STMT:
- copy = gtCloneExpr(tree->gtStmt.gtStmtExpr, addFlags, varNum, varVal);
+ copy = gtCloneExpr(tree->gtStmt.gtStmtExpr, addFlags, deepVarNum, deepVarVal);
copy = gtNewStmt(copy, tree->gtStmt.gtStmtILoffsx);
goto DONE;
@@ -7803,15 +8159,17 @@ GenTreePtr Compiler::gtCloneExpr(GenTree* tree,
copy = new (this, GT_CALL) GenTreeCall(tree->TypeGet());
- copy->gtCall.gtCallObjp =
- tree->gtCall.gtCallObjp ? gtCloneExpr(tree->gtCall.gtCallObjp, addFlags, varNum, varVal) : nullptr;
- copy->gtCall.gtCallArgs = tree->gtCall.gtCallArgs
- ? gtCloneExpr(tree->gtCall.gtCallArgs, addFlags, varNum, varVal)->AsArgList()
+ copy->gtCall.gtCallObjp = tree->gtCall.gtCallObjp
+ ? gtCloneExpr(tree->gtCall.gtCallObjp, addFlags, deepVarNum, deepVarVal)
: nullptr;
+ copy->gtCall.gtCallArgs =
+ tree->gtCall.gtCallArgs
+ ? gtCloneExpr(tree->gtCall.gtCallArgs, addFlags, deepVarNum, deepVarVal)->AsArgList()
+ : nullptr;
copy->gtCall.gtCallMoreFlags = tree->gtCall.gtCallMoreFlags;
copy->gtCall.gtCallLateArgs =
tree->gtCall.gtCallLateArgs
- ? gtCloneExpr(tree->gtCall.gtCallLateArgs, addFlags, varNum, varVal)->AsArgList()
+ ? gtCloneExpr(tree->gtCall.gtCallLateArgs, addFlags, deepVarNum, deepVarVal)->AsArgList()
: nullptr;
#if !FEATURE_FIXED_OUT_ARGS
@@ -7832,11 +8190,12 @@ GenTreePtr Compiler::gtCloneExpr(GenTree* tree,
/* Copy the union */
if (tree->gtCall.gtCallType == CT_INDIRECT)
{
- copy->gtCall.gtCallCookie = tree->gtCall.gtCallCookie
- ? gtCloneExpr(tree->gtCall.gtCallCookie, addFlags, varNum, varVal)
- : nullptr;
- copy->gtCall.gtCallAddr =
- tree->gtCall.gtCallAddr ? gtCloneExpr(tree->gtCall.gtCallAddr, addFlags, varNum, varVal) : nullptr;
+ copy->gtCall.gtCallCookie =
+ tree->gtCall.gtCallCookie ? gtCloneExpr(tree->gtCall.gtCallCookie, addFlags, deepVarNum, deepVarVal)
+ : nullptr;
+ copy->gtCall.gtCallAddr = tree->gtCall.gtCallAddr
+ ? gtCloneExpr(tree->gtCall.gtCallAddr, addFlags, deepVarNum, deepVarVal)
+ : nullptr;
}
else if (tree->gtFlags & GTF_CALL_VIRT_STUB)
{
@@ -7883,8 +8242,9 @@ GenTreePtr Compiler::gtCloneExpr(GenTree* tree,
copy = gtNewFieldRef(tree->TypeGet(), tree->gtField.gtFldHnd, nullptr, tree->gtField.gtFldOffset);
- copy->gtField.gtFldObj =
- tree->gtField.gtFldObj ? gtCloneExpr(tree->gtField.gtFldObj, addFlags, varNum, varVal) : nullptr;
+ copy->gtField.gtFldObj = tree->gtField.gtFldObj
+ ? gtCloneExpr(tree->gtField.gtFldObj, addFlags, deepVarNum, deepVarVal)
+ : nullptr;
copy->gtField.gtFldMayOverlap = tree->gtField.gtFldMayOverlap;
#ifdef FEATURE_READYTORUN_COMPILER
copy->gtField.gtFieldLookup = tree->gtField.gtFieldLookup;
@@ -7897,10 +8257,10 @@ GenTreePtr Compiler::gtCloneExpr(GenTree* tree,
GenTreePtr inds[GT_ARR_MAX_RANK];
for (unsigned dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
{
- inds[dim] = gtCloneExpr(tree->gtArrElem.gtArrInds[dim], addFlags, varNum, varVal);
+ inds[dim] = gtCloneExpr(tree->gtArrElem.gtArrInds[dim], addFlags, deepVarNum, deepVarVal);
}
copy = new (this, GT_ARR_ELEM)
- GenTreeArrElem(tree->TypeGet(), gtCloneExpr(tree->gtArrElem.gtArrObj, addFlags, varNum, varVal),
+ GenTreeArrElem(tree->TypeGet(), gtCloneExpr(tree->gtArrElem.gtArrObj, addFlags, deepVarNum, deepVarVal),
tree->gtArrElem.gtArrRank, tree->gtArrElem.gtArrElemSize, tree->gtArrElem.gtArrElemType,
&inds[0]);
}
@@ -7909,34 +8269,37 @@ GenTreePtr Compiler::gtCloneExpr(GenTree* tree,
case GT_ARR_OFFSET:
{
copy = new (this, GT_ARR_OFFSET)
- GenTreeArrOffs(tree->TypeGet(), gtCloneExpr(tree->gtArrOffs.gtOffset, addFlags, varNum, varVal),
- gtCloneExpr(tree->gtArrOffs.gtIndex, addFlags, varNum, varVal),
- gtCloneExpr(tree->gtArrOffs.gtArrObj, addFlags, varNum, varVal),
+ GenTreeArrOffs(tree->TypeGet(), gtCloneExpr(tree->gtArrOffs.gtOffset, addFlags, deepVarNum, deepVarVal),
+ gtCloneExpr(tree->gtArrOffs.gtIndex, addFlags, deepVarNum, deepVarVal),
+ gtCloneExpr(tree->gtArrOffs.gtArrObj, addFlags, deepVarNum, deepVarVal),
tree->gtArrOffs.gtCurrDim, tree->gtArrOffs.gtArrRank, tree->gtArrOffs.gtArrElemType);
}
break;
case GT_CMPXCHG:
copy = new (this, GT_CMPXCHG)
- GenTreeCmpXchg(tree->TypeGet(), gtCloneExpr(tree->gtCmpXchg.gtOpLocation, addFlags, varNum, varVal),
- gtCloneExpr(tree->gtCmpXchg.gtOpValue, addFlags, varNum, varVal),
- gtCloneExpr(tree->gtCmpXchg.gtOpComparand, addFlags, varNum, varVal));
+ GenTreeCmpXchg(tree->TypeGet(),
+ gtCloneExpr(tree->gtCmpXchg.gtOpLocation, addFlags, deepVarNum, deepVarVal),
+ gtCloneExpr(tree->gtCmpXchg.gtOpValue, addFlags, deepVarNum, deepVarVal),
+ gtCloneExpr(tree->gtCmpXchg.gtOpComparand, addFlags, deepVarNum, deepVarVal));
break;
case GT_ARR_BOUNDS_CHECK:
#ifdef FEATURE_SIMD
case GT_SIMD_CHK:
#endif // FEATURE_SIMD
- copy = new (this, oper) GenTreeBoundsChk(oper, tree->TypeGet(),
- gtCloneExpr(tree->gtBoundsChk.gtArrLen, addFlags, varNum, varVal),
- gtCloneExpr(tree->gtBoundsChk.gtIndex, addFlags, varNum, varVal),
- tree->gtBoundsChk.gtThrowKind);
+ copy = new (this, oper)
+ GenTreeBoundsChk(oper, tree->TypeGet(),
+ gtCloneExpr(tree->gtBoundsChk.gtArrLen, addFlags, deepVarNum, deepVarVal),
+ gtCloneExpr(tree->gtBoundsChk.gtIndex, addFlags, deepVarNum, deepVarVal),
+ tree->gtBoundsChk.gtThrowKind);
break;
case GT_STORE_DYN_BLK:
case GT_DYN_BLK:
- copy = new (this, oper) GenTreeDynBlk(gtCloneExpr(tree->gtDynBlk.Addr(), addFlags, varNum, varVal),
- gtCloneExpr(tree->gtDynBlk.gtDynamicSize, addFlags, varNum, varVal));
+ copy = new (this, oper)
+ GenTreeDynBlk(gtCloneExpr(tree->gtDynBlk.Addr(), addFlags, deepVarNum, deepVarVal),
+ gtCloneExpr(tree->gtDynBlk.gtDynamicSize, addFlags, deepVarNum, deepVarVal));
break;
default:
@@ -8050,12 +8413,31 @@ GenTreePtr Compiler::gtReplaceTree(GenTreePtr stmt, GenTreePtr tree, GenTreePtr
{
assert(treeParent != nullptr);
+ // Check to see if the node to be replaced is a call argument and if so,
+ // set `treeParent` to the call node.
+ GenTree* cursor = treeParent;
+ while ((cursor != nullptr) && (cursor->OperGet() == GT_LIST))
+ {
+ cursor = cursor->gtNext;
+ }
+
+ if ((cursor != nullptr) && (cursor->OperGet() == GT_CALL))
+ {
+ treeParent = cursor;
+ }
+
+#ifdef DEBUG
+ GenTree** useEdge;
+ assert(treeParent->TryGetUse(tree, &useEdge));
+ assert(useEdge == treePtr);
+#endif // DEBUG
+
GenTreePtr treeFirstNode = fgGetFirstNode(tree);
GenTreePtr treeLastNode = tree;
GenTreePtr treePrevNode = treeFirstNode->gtPrev;
GenTreePtr treeNextNode = treeLastNode->gtNext;
- *treePtr = replacementTree;
+ treeParent->ReplaceOperand(treePtr, replacementTree);
// Build the linear order for "replacementTree".
fgSetTreeSeq(replacementTree, treePrevNode);
@@ -8082,48 +8464,6 @@ GenTreePtr Compiler::gtReplaceTree(GenTreePtr stmt, GenTreePtr tree, GenTreePtr
treeNextNode->gtPrev = treeLastNode;
}
- bool needFixupCallArg = false;
- GenTreePtr node = treeParent;
-
- // If we have replaced an arg, then update pointers in argtable.
- do
- {
- // Look for the first enclosing callsite
- switch (node->OperGet())
- {
- case GT_LIST:
- case GT_ARGPLACE:
- // "tree" is likely an argument of a call.
- needFixupCallArg = true;
- break;
-
- case GT_CALL:
- if (needFixupCallArg)
- {
- // We have replaced an arg, so update pointers in argtable.
- fgFixupArgTabEntryPtr(node, tree, replacementTree);
- needFixupCallArg = false;
- }
- break;
-
- default:
- // "tree" is unlikely an argument of a call.
- needFixupCallArg = false;
- break;
- }
-
- if (needFixupCallArg)
- {
- // Keep tracking to update the first enclosing call.
- node = node->gtGetParent(nullptr);
- }
- else
- {
- // Stop tracking.
- node = nullptr;
- }
- } while (node != nullptr);
-
// Propagate side-effect flags of "replacementTree" to its parents if needed.
gtUpdateSideEffects(treeParent, tree->gtFlags, replacementTree->gtFlags);
}
@@ -8304,14 +8644,13 @@ bool GenTree::gtSetFlags() const
//
// Precondition we have a GTK_SMPOP
//
- assert(OperIsSimple());
-
if (!varTypeIsIntegralOrI(TypeGet()))
{
return false;
}
#if FEATURE_SET_FLAGS
+ assert(OperIsSimple());
if ((gtFlags & GTF_SET_FLAGS) && gtOper != GT_IND)
{
@@ -8325,6 +8664,7 @@ bool GenTree::gtSetFlags() const
#else // !FEATURE_SET_FLAGS
+#ifdef LEGACY_BACKEND
#ifdef _TARGET_XARCH_
// Return true if/when the codegen for this node will set the flags
//
@@ -8346,6 +8686,22 @@ bool GenTree::gtSetFlags() const
return false;
#endif
+#else // !LEGACY_BACKEND
+#ifdef _TARGET_XARCH_
+ if (((gtFlags & GTF_SET_FLAGS) != 0) && (gtOper != GT_IND))
+ {
+ // GTF_SET_FLAGS is not valid on GT_IND and is overlaid with GTF_NONFAULTING_IND
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+#else
+ unreached();
+#endif
+#endif // !LEGACY_BACKEND
+
#endif // !FEATURE_SET_FLAGS
}
@@ -8399,7 +8755,8 @@ bool GenTree::gtRequestSetFlags()
/*****************************************************************************/
void GenTree::CopyTo(class Compiler* comp, const GenTree& gt)
{
- gtOper = gt.gtOper;
+ SetOperRaw(gt.OperGet());
+
gtType = gt.gtType;
gtAssertionNum = gt.gtAssertionNum;
@@ -8772,19 +9129,12 @@ GenTreePtr GenTree::GetChild(unsigned childNum)
}
}
-GenTreeUseEdgeIterator::GenTreeUseEdgeIterator()
- : m_node(nullptr)
- , m_edge(nullptr)
- , m_argList(nullptr)
- , m_state(-1)
+GenTreeUseEdgeIterator::GenTreeUseEdgeIterator() : m_node(nullptr), m_edge(nullptr), m_argList(nullptr), m_state(-1)
{
}
GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node)
- : m_node(node)
- , m_edge(nullptr)
- , m_argList(nullptr)
- , m_state(0)
+ : m_node(node), m_edge(nullptr), m_argList(nullptr), m_state(0)
{
assert(m_node != nullptr);
@@ -8894,30 +9244,53 @@ GenTree** GenTreeUseEdgeIterator::GetNextUseEdge() const
}
case GT_DYN_BLK:
+ {
+ GenTreeDynBlk* const dynBlock = m_node->AsDynBlk();
switch (m_state)
{
case 0:
- return &(m_node->AsDynBlk()->gtOp1);
+ return dynBlock->gtEvalSizeFirst ? &dynBlock->gtDynamicSize : &dynBlock->gtOp1;
case 1:
- return &(m_node->AsDynBlk()->gtDynamicSize);
+ return dynBlock->gtEvalSizeFirst ? &dynBlock->gtOp1 : &dynBlock->gtDynamicSize;
default:
return nullptr;
}
- break;
+ }
+ break;
case GT_STORE_DYN_BLK:
- switch (m_state)
+ {
+ GenTreeDynBlk* const dynBlock = m_node->AsDynBlk();
+ if (dynBlock->gtEvalSizeFirst)
{
- case 0:
- return &(m_node->AsDynBlk()->gtOp1);
- case 1:
- return &(m_node->AsDynBlk()->gtOp2);
- case 2:
- return &(m_node->AsDynBlk()->gtDynamicSize);
- default:
- return nullptr;
+ switch (m_state)
+ {
+ case 0:
+ return &dynBlock->gtDynamicSize;
+ case 1:
+ return dynBlock->IsReverseOp() ? &dynBlock->gtOp2 : &dynBlock->gtOp1;
+ case 2:
+ return dynBlock->IsReverseOp() ? &dynBlock->gtOp1 : &dynBlock->gtOp2;
+ default:
+ return nullptr;
+ }
}
- break;
+ else
+ {
+ switch (m_state)
+ {
+ case 0:
+ return dynBlock->IsReverseOp() ? &dynBlock->gtOp2 : &dynBlock->gtOp1;
+ case 1:
+ return dynBlock->IsReverseOp() ? &dynBlock->gtOp1 : &dynBlock->gtOp2;
+ case 2:
+ return &dynBlock->gtDynamicSize;
+ default:
+ return nullptr;
+ }
+ }
+ }
+ break;
case GT_LEA:
{
@@ -8942,13 +9315,9 @@ GenTree** GenTreeUseEdgeIterator::GetNextUseEdge() const
}
break;
- case GT_LIST:
- if (m_node->AsArgList()->IsAggregate())
- {
- // List nodes that represent aggregates are handled by MoveNextAggregateUseEdge.
- break;
- }
- __fallthrough;
+ case GT_FIELD_LIST:
+ // Field List nodes are handled by MoveToNextFieldUseEdge.
+ break;
default:
if (m_node->OperIsConst() || m_node->OperIsLeaf())
@@ -8988,13 +9357,13 @@ void GenTreeUseEdgeIterator::MoveToNextCallUseEdge()
{
enum
{
- CALL_INSTANCE = 0,
- CALL_ARGS = 1,
- CALL_LATE_ARGS = 2,
+ CALL_INSTANCE = 0,
+ CALL_ARGS = 1,
+ CALL_LATE_ARGS = 2,
CALL_CONTROL_EXPR = 3,
- CALL_COOKIE = 4,
- CALL_ADDRESS = 5,
- CALL_TERMINAL = 6,
+ CALL_COOKIE = 4,
+ CALL_ADDRESS = 5,
+ CALL_TERMINAL = 6,
};
GenTreeCall* call = m_node->AsCall();
@@ -9197,10 +9566,9 @@ void GenTreeUseEdgeIterator::MoveToNextSIMDUseEdge()
}
#endif // FEATURE_SIMD
-void GenTreeUseEdgeIterator::MoveToNextAggregateUseEdge()
+void GenTreeUseEdgeIterator::MoveToNextFieldUseEdge()
{
- assert(m_node->OperGet() == GT_LIST);
- assert(m_node->AsArgList()->IsAggregate());
+ assert(m_node->OperGet() == GT_FIELD_LIST);
for (;;)
{
@@ -9218,9 +9586,9 @@ void GenTreeUseEdgeIterator::MoveToNextAggregateUseEdge()
}
else
{
- GenTreeArgList* aggNode = m_argList->AsArgList();
- m_edge = &aggNode->gtOp1;
- m_argList = aggNode->Rest();
+ GenTreeArgList* listNode = m_argList->AsArgList();
+ m_edge = &listNode->gtOp1;
+ m_argList = listNode->Rest();
return;
}
break;
@@ -9266,9 +9634,9 @@ GenTreeUseEdgeIterator& GenTreeUseEdgeIterator::operator++()
MoveToNextSIMDUseEdge();
}
#endif
- else if ((op == GT_LIST) && (m_node->AsArgList()->IsAggregate()))
+ else if (op == GT_FIELD_LIST)
{
- MoveToNextAggregateUseEdge();
+ MoveToNextFieldUseEdge();
}
else
{
@@ -9529,7 +9897,7 @@ void Compiler::gtDispNodeName(GenTree* tree)
{
sprintf_s(bufp, sizeof(buf), " %s_ovfl%c", name, 0);
}
- else if (tree->OperIsBlk() && (tree->AsBlk()->gtBlkSize != 0))
+ else if (tree->OperIsBlk() && !tree->OperIsDynBlk())
{
sprintf_s(bufp, sizeof(buf), " %s(%d)", name, tree->AsBlk()->gtBlkSize);
}
@@ -9775,6 +10143,9 @@ void Compiler::gtDispNode(GenTreePtr tree, IndentStack* indentStack, __in __in_z
goto DASH;
case GT_MUL:
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+ case GT_MUL_LONG:
+#endif
if (tree->gtFlags & GTF_MUL_64RSLT)
{
printf("L");
@@ -10409,6 +10780,13 @@ void Compiler::gtDispConst(GenTree* tree)
printf(" field offset");
}
+#ifdef FEATURE_SIMD
+ if ((tree->gtFlags & GTF_ICON_SIMD_COUNT) != 0)
+ {
+ printf(" Vector<T>.Count");
+ }
+#endif
+
if ((tree->IsReuseRegVal()) != 0)
{
printf(" reuse reg val");
@@ -10714,6 +11092,10 @@ void Compiler::gtDispLeaf(GenTree* tree, IndentStack* indentStack)
}
break;
+ case GT_JCC:
+ printf(" cond=%s", GenTree::NodeName(tree->AsJumpCC()->gtCondition));
+ break;
+
default:
assert(!"don't know how to display tree leaf node");
}
@@ -10928,14 +11310,62 @@ void Compiler::gtDispTree(GenTreePtr tree,
{
printf(" (last use)");
}
- if (tree->OperIsCopyBlkOp())
+ if (tree->OperIsBlkOp())
+ {
+ if (tree->OperIsCopyBlkOp())
+ {
+ printf(" (copy)");
+ }
+ else if (tree->OperIsInitBlkOp())
+ {
+ printf(" (init)");
+ }
+ if (tree->OperIsStoreBlk() && (tree->AsBlk()->gtBlkOpKind != GenTreeBlk::BlkOpKindInvalid))
+ {
+ switch (tree->AsBlk()->gtBlkOpKind)
+ {
+ case GenTreeBlk::BlkOpKindRepInstr:
+ printf(" (RepInstr)");
+ break;
+ case GenTreeBlk::BlkOpKindUnroll:
+ printf(" (Unroll)");
+ break;
+ case GenTreeBlk::BlkOpKindHelper:
+ printf(" (Helper)");
+ break;
+ default:
+ unreached();
+ }
+ }
+ }
+ else if (tree->OperIsFieldList())
{
- printf(" (copy)");
+ printf(" %s at offset %d", varTypeName(tree->AsFieldList()->gtFieldType),
+ tree->AsFieldList()->gtFieldOffset);
}
- else if (tree->OperIsInitBlkOp())
+#if FEATURE_PUT_STRUCT_ARG_STK
+ else if ((tree->OperGet() == GT_PUTARG_STK) &&
+ (tree->AsPutArgStk()->gtPutArgStkKind != GenTreePutArgStk::Kind::Invalid))
{
- printf(" (init)");
+ switch (tree->AsPutArgStk()->gtPutArgStkKind)
+ {
+ case GenTreePutArgStk::Kind::RepInstr:
+ printf(" (RepInstr)");
+ break;
+ case GenTreePutArgStk::Kind::Unroll:
+ printf(" (Unroll)");
+ break;
+ case GenTreePutArgStk::Kind::Push:
+ printf(" (Push)");
+ break;
+ case GenTreePutArgStk::Kind::PushAllSlots:
+ printf(" (PushAllSlots)");
+ break;
+ default:
+ unreached();
+ }
}
+#endif // FEATURE_PUT_STRUCT_ARG_STK
IndirectAssignmentAnnotation* pIndirAnnote;
if (tree->gtOper == GT_ASG && GetIndirAssignMap()->Lookup(tree, &pIndirAnnote))
@@ -11282,7 +11712,7 @@ void Compiler::gtDispTree(GenTreePtr tree,
// call - The call for which 'arg' is an argument
// arg - The argument for which a message should be constructed
// argNum - The ordinal number of the arg in the argument list
-// listCount - When printing in LIR form this is the count for a multireg GT_LIST
+// listCount - When printing in LIR form this is the count for a GT_FIELD_LIST
// or -1 if we are not printing in LIR form
// bufp - A pointer to the buffer into which the message is written
// bufLength - The length of the buffer pointed to by bufp
@@ -11338,7 +11768,7 @@ void Compiler::gtGetArgMsg(
// call - The call for which 'arg' is an argument
// argx - The argument for which a message should be constructed
// lateArgIndex - The ordinal number of the arg in the lastArg list
-// listCount - When printing in LIR form this is the count for a multireg GT_LIST
+// listCount - When printing in LIR form this is the count for a multireg GT_FIELD_LIST
// or -1 if we are not printing in LIR form
// bufp - A pointer to the buffer into which the message is written
// bufLength - The length of the buffer pointed to by bufp
@@ -11542,22 +11972,8 @@ void Compiler::gtDispLIRNode(GenTree* node)
const bool nodeIsCall = node->IsCall();
- int numCallEarlyArgs = 0;
- if (nodeIsCall)
- {
- GenTreeCall* call = node->AsCall();
- for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
- {
- if (!args->Current()->IsArgPlaceHolderNode() && args->Current()->IsValue())
- {
- numCallEarlyArgs++;
- }
- }
- }
-
// Visit operands
- IndentInfo operandArc = IIArcTop;
- int callArgNumber = 0;
+ IndentInfo operandArc = IIArcTop;
for (GenTree* operand : node->Operands())
{
if (operand->IsArgPlaceHolderNode() || !operand->IsValue())
@@ -11588,20 +12004,22 @@ void Compiler::gtDispLIRNode(GenTree* node)
}
else
{
- int callLateArgNumber = callArgNumber - numCallEarlyArgs;
+ fgArgTabEntryPtr curArgTabEntry = gtArgEntryByNode(call, operand);
+ assert(curArgTabEntry);
+
if (operand->OperGet() == GT_LIST)
{
int listIndex = 0;
for (GenTreeArgList* element = operand->AsArgList(); element != nullptr; element = element->Rest())
{
operand = element->Current();
- if (callLateArgNumber < 0)
+ if (curArgTabEntry->lateArgInx == (unsigned)-1)
{
- gtGetArgMsg(call, operand, callArgNumber, listIndex, buf, sizeof(buf));
+ gtGetArgMsg(call, operand, curArgTabEntry->argNum, listIndex, buf, sizeof(buf));
}
else
{
- gtGetLateArgMsg(call, operand, callLateArgNumber, listIndex, buf, sizeof(buf));
+ gtGetLateArgMsg(call, operand, curArgTabEntry->lateArgInx, listIndex, buf, sizeof(buf));
}
displayOperand(operand, buf, operandArc, indentStack);
@@ -11610,19 +12028,17 @@ void Compiler::gtDispLIRNode(GenTree* node)
}
else
{
- if (callLateArgNumber < 0)
+ if (curArgTabEntry->lateArgInx == (unsigned)-1)
{
- gtGetArgMsg(call, operand, callArgNumber, -1, buf, sizeof(buf));
+ gtGetArgMsg(call, operand, curArgTabEntry->argNum, -1, buf, sizeof(buf));
}
else
{
- gtGetLateArgMsg(call, operand, callLateArgNumber, -1, buf, sizeof(buf));
+ gtGetLateArgMsg(call, operand, curArgTabEntry->lateArgInx, -1, buf, sizeof(buf));
}
displayOperand(operand, buf, operandArc, indentStack);
}
-
- callArgNumber++;
}
}
else if (node->OperIsDynBlkOp())
@@ -12315,9 +12731,6 @@ GenTreePtr Compiler::gtFoldExprConst(GenTreePtr tree)
case TYP_ULONG:
if (!(tree->gtFlags & GTF_UNSIGNED) && tree->gtOverflow() && i1 < 0)
{
- op1->ChangeOperConst(GT_CNS_NATIVELONG); // need type of oper to be same as tree
- op1->gtType = TYP_LONG;
- // We don't care about the value as we are throwing an exception
goto LNG_OVF;
}
lval1 = UINT64(UINT32(i1));
@@ -12516,47 +12929,19 @@ GenTreePtr Compiler::gtFoldExprConst(GenTreePtr tree)
// constants in a target-specific function.
CLANG_FORMAT_COMMENT_ANCHOR;
-#ifdef _TARGET_XARCH_
- // Don't fold conversions of +inf/-inf to integral value as the value returned by JIT helper
- // doesn't match with the C compiler's cast result.
+ // Don't fold conversions of +inf/-inf to integral value on all platforms
+ // as the value returned by JIT helper doesn't match with the C compiler's cast result.
+ // We want the behavior to be same with or without folding.
return tree;
-#else //!_TARGET_XARCH_
+ }
- switch (tree->CastToType())
- {
- case TYP_BYTE:
- i1 = ssize_t(INT8(d1));
- goto CNS_INT;
- case TYP_UBYTE:
- i1 = ssize_t(UINT8(d1));
- goto CNS_INT;
- case TYP_SHORT:
- i1 = ssize_t(INT16(d1));
- goto CNS_INT;
- case TYP_CHAR:
- i1 = ssize_t(UINT16(d1));
- goto CNS_INT;
- case TYP_INT:
- i1 = ssize_t(INT32(d1));
- goto CNS_INT;
- case TYP_UINT:
- i1 = ssize_t(UINT32(d1));
- goto CNS_INT;
- case TYP_LONG:
- lval1 = INT64(d1);
- goto CNS_LONG;
- case TYP_ULONG:
- lval1 = UINT64(d1);
- goto CNS_LONG;
- case TYP_FLOAT:
- case TYP_DOUBLE:
- if (op1->gtType == TYP_FLOAT)
- d1 = forceCastToFloat(d1); // it's only !_finite() after this conversion
- goto CNS_DOUBLE;
- default:
- unreached();
- }
-#endif //!_TARGET_XARCH_
+ if (d1 <= -1.0 && varTypeIsUnsigned(tree->CastToType()))
+ {
+ // Don't fold conversions of these cases becasue the result is unspecified per ECMA spec
+ // and the native math doing the fold doesn't match the run-time computation on all
+ // platforms.
+ // We want the behavior to be same with or without folding.
+ return tree;
}
switch (tree->CastToType())
@@ -12633,7 +13018,7 @@ GenTreePtr Compiler::gtFoldExprConst(GenTreePtr tree)
return op2;
}
- if (tree->gtOper == GT_LIST)
+ if (tree->OperIsAnyList())
{
return tree;
}
@@ -13621,8 +14006,8 @@ GenTreePtr Compiler::gtNewTempAssign(unsigned tmp, GenTreePtr val)
var_types valTyp = val->TypeGet();
if (val->OperGet() == GT_LCL_VAR && lvaTable[val->gtLclVar.gtLclNum].lvNormalizeOnLoad())
{
- valTyp = lvaGetRealType(val->gtLclVar.gtLclNum);
- val = gtNewLclvNode(val->gtLclVar.gtLclNum, valTyp, val->gtLclVar.gtLclILoffs);
+ valTyp = lvaGetRealType(val->gtLclVar.gtLclNum);
+ val->gtType = valTyp;
}
var_types dstTyp = varDsc->TypeGet();
@@ -14108,7 +14493,7 @@ void Compiler::gtExtractSideEffList(GenTreePtr expr,
// effect of this instruction, change it into a GT_LOCKADD node (the add only)
if (oper == GT_XADD)
{
- expr->gtOper = GT_LOCKADD;
+ expr->SetOperRaw(GT_LOCKADD);
expr->gtType = TYP_VOID;
}
@@ -14188,12 +14573,12 @@ void Compiler::gtExtractSideEffList(GenTreePtr expr,
GenTreePtr args;
for (args = expr->gtCall.gtCallArgs; args; args = args->gtOp.gtOp2)
{
- assert(args->IsList());
+ assert(args->OperIsList());
gtExtractSideEffList(args->Current(), pList, flags);
}
for (args = expr->gtCall.gtCallLateArgs; args; args = args->gtOp.gtOp2)
{
- assert(args->IsList());
+ assert(args->OperIsList());
gtExtractSideEffList(args->Current(), pList, flags);
}
}
@@ -15356,11 +15741,18 @@ bool GenTree::isContained() const
return false;
}
+ // these either produce a result in register or set flags reg.
+ if (IsSIMDEqualityOrInequality())
+ {
+ return false;
+ }
+
// TODO-Cleanup : this is not clean, would be nice to have some way of marking this.
switch (OperGet())
{
case GT_STOREIND:
case GT_JTRUE:
+ case GT_JCC:
case GT_RETURN:
case GT_RETFILT:
case GT_STORE_LCL_FLD:
@@ -15381,7 +15773,9 @@ bool GenTree::isContained() const
case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
case GT_SWITCH:
+#ifndef LEGACY_BACKEND
case GT_JMPTABLE:
+#endif
case GT_SWITCH_TABLE:
case GT_SWAP:
case GT_LCLHEAP:
@@ -15928,6 +16322,17 @@ void GenTree::ParseArrayAddress(
// TODO-Review: A NotAField here indicates a failure to properly maintain the field sequence
// See test case self_host_tests_x86\jit\regression\CLR-x86-JIT\v1-m12-beta2\ b70992\ b70992.exe
// Safest thing to do here is to drop back to MinOpts
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+ if (comp->opts.optRepeat)
+ {
+ // We don't guarantee preserving these annotations through the entire optimizer, so
+ // just conservatively return null if under optRepeat.
+ *pArr = nullptr;
+ return;
+ }
+#endif // DEBUG
noway_assert(!"fldSeqIter is NotAField() in ParseArrayAddress");
}
@@ -16446,24 +16851,6 @@ bool GenTree::isCommutativeSIMDIntrinsic()
#endif // FEATURE_SIMD
//---------------------------------------------------------------------------------------
-// GenTreeArgList::Prepend:
-// Prepends an element to a GT_LIST.
-//
-// Arguments:
-// compiler - The compiler context.
-// element - The element to prepend.
-//
-// Returns:
-// The new head of the list.
-GenTreeArgList* GenTreeArgList::Prepend(Compiler* compiler, GenTree* element)
-{
- GenTreeArgList* head = compiler->gtNewListNode(element, this);
- head->gtFlags |= (gtFlags & GTF_LIST_AGGREGATE);
- gtFlags &= ~GTF_LIST_AGGREGATE;
- return head;
-}
-
-//---------------------------------------------------------------------------------------
// InitializeStructReturnType:
// Initialize the Return Type Descriptor for a method that returns a struct type
//
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
index 4efeeae620..4611d35465 100644
--- a/src/jit/gentree.h
+++ b/src/jit/gentree.h
@@ -68,7 +68,7 @@ enum SpecialCodeKind
DECLARE_TYPED_ENUM(genTreeOps, BYTE)
{
-#define GTNODE(en, sn, cm, ok) GT_##en,
+#define GTNODE(en, sn, st, cm, ok) GT_##en,
#include "gtlist.h"
GT_COUNT,
@@ -429,13 +429,15 @@ struct GenTree
noway_assert(FitsIn<unsigned char>(level));
gtFPlvl = (unsigned char)level;
}
-#else // FEATURE_STACK_FP_X87
+#else // FEATURE_STACK_FP_X87
+
void gtCopyFPlvl(GenTree* other)
{
}
void gtSetFPlvl(unsigned level)
{
}
+
#endif // FEATURE_STACK_FP_X87
//
@@ -564,7 +566,7 @@ public:
bool isContainedIntOrIImmed() const
{
- return isContained() && IsCnsIntOrI();
+ return isContained() && IsCnsIntOrI() && !isContainedSpillTemp();
}
bool isContainedFltOrDblImmed() const
@@ -766,15 +768,15 @@ public:
#ifdef LEGACY_BACKEND
#define GTF_SPILLED_OPER 0x00000100 // op1 has been spilled
#define GTF_SPILLED_OP2 0x00000200 // op2 has been spilled
-#else
+#else // !LEGACY_BACKEND
#define GTF_NOREG_AT_USE 0x00000100 // tree node is in memory at the point of use
-#endif // LEGACY_BACKEND
+#endif // !LEGACY_BACKEND
#define GTF_ZSF_SET 0x00000400 // the zero(ZF) and sign(SF) flags set to the operand
-#if FEATURE_SET_FLAGS
+
#define GTF_SET_FLAGS 0x00000800 // Requires that codegen for this node set the flags
// Use gtSetFlags() to check this flags
-#endif
+
#define GTF_IND_NONFAULTING 0x00000800 // An indir that cannot fault. GTF_SET_FLAGS is not used on indirs
#define GTF_MAKE_CSE 0x00002000 // Hoisted Expression: try hard to make this into CSE (see optPerformHoistExpr)
@@ -865,12 +867,18 @@ public:
#define GTF_IND_TLS_REF 0x08000000 // GT_IND -- the target is accessed via TLS
#define GTF_IND_ASG_LHS 0x04000000 // GT_IND -- this GT_IND node is (the effective val) of the LHS of an
// assignment; don't evaluate it independently.
-#define GTF_IND_UNALIGNED 0x02000000 // GT_IND -- the load or store is unaligned (we assume worst case
- // alignment of 1 byte)
-#define GTF_IND_INVARIANT 0x01000000 // GT_IND -- the target is invariant (a prejit indirection)
-#define GTF_IND_ARR_LEN 0x80000000 // GT_IND -- the indirection represents an array length (of the REF
- // contribution to its argument).
-#define GTF_IND_ARR_INDEX 0x00800000 // GT_IND -- the indirection represents an (SZ) array index
+#define GTF_IND_REQ_ADDR_IN_REG GTF_IND_ASG_LHS // GT_IND -- requires its addr operand to be evaluated
+ // into a register. This flag is useful in cases where it
+ // is required to generate register indirect addressing mode.
+ // One such case is virtual stub calls on xarch. This is only
+ // valid in the backend, where GTF_IND_ASG_LHS is not necessary
+ // (all such indirections will be lowered to GT_STOREIND).
+#define GTF_IND_UNALIGNED 0x02000000 // GT_IND -- the load or store is unaligned (we assume worst case
+ // alignment of 1 byte)
+#define GTF_IND_INVARIANT 0x01000000 // GT_IND -- the target is invariant (a prejit indirection)
+#define GTF_IND_ARR_LEN 0x80000000 // GT_IND -- the indirection represents an array length (of the REF
+ // contribution to its argument).
+#define GTF_IND_ARR_INDEX 0x00800000 // GT_IND -- the indirection represents an (SZ) array index
#define GTF_IND_FLAGS \
(GTF_IND_VOLATILE | GTF_IND_REFARR_LAYOUT | GTF_IND_TGTANYWHERE | GTF_IND_NONFAULTING | GTF_IND_TLS_REF | \
@@ -925,11 +933,12 @@ public:
#define GTF_ICON_FIELD_OFF 0x08000000 // GT_CNS_INT -- constant is a field offset
+#define GTF_ICON_SIMD_COUNT 0x04000000 // GT_CNS_INT -- constant is Vector<T>.Count
+
#define GTF_BLK_VOLATILE 0x40000000 // GT_ASG, GT_STORE_BLK, GT_STORE_OBJ, GT_STORE_DYNBLK
// -- is a volatile block operation
#define GTF_BLK_UNALIGNED 0x02000000 // GT_ASG, GT_STORE_BLK, GT_STORE_OBJ, GT_STORE_DYNBLK
// -- is an unaligned block operation
-#define GTF_BLK_INIT 0x01000000 // GT_ASG, GT_STORE_BLK, GT_STORE_OBJ, GT_STORE_DYNBLK -- is an init block operation
#define GTF_OVERFLOW 0x10000000 // GT_ADD, GT_SUB, GT_MUL, - Need overflow check
// GT_ASG_ADD, GT_ASG_SUB,
@@ -942,10 +951,13 @@ public:
#define GTF_ARRLEN_ARR_IDX 0x80000000 // GT_ARR_LENGTH -- Length which feeds into an array index expression
-#define GTF_LIST_AGGREGATE 0x80000000 // GT_LIST -- Indicates that this list should be treated as an
- // anonymous aggregate value (e.g. a multi-value argument).
+#define GTF_FIELD_LIST_HEAD 0x80000000 // GT_FIELD_LIST -- Indicates that this is the first field in a list of
+ // struct fields constituting a single call argument.
//----------------------------------------------------------------
+#define GTF_SIMD12_OP 0x80000000 // GT_SIMD -- Indicates that the operands need to be handled as SIMD12
+ // even if they have been retyped as SIMD16.
+//----------------------------------------------------------------
#define GTF_STMT_CMPADD 0x80000000 // GT_STMT -- added by compiler
#define GTF_STMT_HAS_CSE 0x40000000 // GT_STMT -- CSE def or use was subsituted
@@ -958,8 +970,10 @@ public:
#define GTF_DEBUG_NODE_MORPHED 0x00000001 // the node has been morphed (in the global morphing phase)
#define GTF_DEBUG_NODE_SMALL 0x00000002
#define GTF_DEBUG_NODE_LARGE 0x00000004
+#define GTF_DEBUG_NODE_CG_PRODUCED 0x00000008 // genProduceReg has been called on this node
+#define GTF_DEBUG_NODE_CG_CONSUMED 0x00000010 // genConsumeReg has been called on this node
-#define GTF_DEBUG_NODE_MASK 0x00000007 // These flags are all node (rather than operation) properties.
+#define GTF_DEBUG_NODE_MASK 0x0000001F // These flags are all node (rather than operation) properties.
#define GTF_DEBUG_VAR_CSE_REF 0x00800000 // GT_LCL_VAR -- This is a CSE LCL_VAR node
#endif // defined(DEBUG)
@@ -970,6 +984,8 @@ public:
#ifdef DEBUG
unsigned gtTreeID;
unsigned gtSeqNum; // liveness traversal order within the current statement
+
+ int gtUseNum; // use-ordered traversal within the function
#endif
static const unsigned short gtOperKindTable[];
@@ -1011,9 +1027,9 @@ public:
return gtType != TYP_VOID;
}
- if (gtOper == GT_LIST)
+ if (gtOper == GT_FIELD_LIST)
{
- return (gtFlags & GTF_LIST_AGGREGATE) != 0;
+ return (gtFlags & GTF_FIELD_LIST_HEAD) != 0;
}
return true;
@@ -1033,14 +1049,14 @@ public:
return IsNothingNode();
case GT_ARGPLACE:
- // ARGPLACE nodes may not be present in a block's LIR sequence, but they may
+ case GT_LIST:
+ // ARGPLACE and LIST nodes may not be present in a block's LIR sequence, but they may
// be present as children of an LIR node.
return (gtNext == nullptr) && (gtPrev == nullptr);
- case GT_LIST:
- // LIST nodes may only be present in an LIR sequence if they represent aggregates.
- // They are always allowed, however, as children of an LIR node.
- return ((gtFlags & GTF_LIST_AGGREGATE) != 0) || ((gtNext == nullptr) && (gtPrev == nullptr));
+ case GT_FIELD_LIST:
+ // Only the head of the FIELD_LIST is present in the block's LIR sequence.
+ return (((gtFlags & GTF_FIELD_LIST_HEAD) != 0) || ((gtNext == nullptr) && (gtPrev == nullptr)));
case GT_ADDR:
{
@@ -1130,6 +1146,21 @@ public:
return (gtOper == GT_LEA);
}
+ static bool OperIsInitVal(genTreeOps gtOper)
+ {
+ return (gtOper == GT_INIT_VAL);
+ }
+
+ bool OperIsInitVal() const
+ {
+ return OperIsInitVal(OperGet());
+ }
+
+ bool IsConstInitVal()
+ {
+ return (gtOper == GT_CNS_INT) || (OperIsInitVal() && (gtGetOp1()->gtOper == GT_CNS_INT));
+ }
+
bool OperIsBlkOp();
bool OperIsCopyBlkOp();
bool OperIsInitBlkOp();
@@ -1146,6 +1177,16 @@ public:
return OperIsBlk(OperGet());
}
+ static bool OperIsDynBlk(genTreeOps gtOper)
+ {
+ return ((gtOper == GT_DYN_BLK) || (gtOper == GT_STORE_DYN_BLK));
+ }
+
+ bool OperIsDynBlk() const
+ {
+ return OperIsDynBlk(OperGet());
+ }
+
static bool OperIsStoreBlk(genTreeOps gtOper)
{
return ((gtOper == GT_STORE_BLK) || (gtOper == GT_STORE_OBJ) || (gtOper == GT_STORE_DYN_BLK));
@@ -1206,7 +1247,7 @@ public:
return OperIsLocalRead(OperGet());
}
- bool OperIsCompare()
+ bool OperIsCompare() const
{
return (OperKind(gtOper) & GTK_RELOP) != 0;
}
@@ -1270,7 +1311,6 @@ public:
{
case GT_ADD_HI:
case GT_SUB_HI:
- case GT_MUL_HI:
case GT_DIV_HI:
case GT_MOD_HI:
return true;
@@ -1396,8 +1436,7 @@ public:
static bool OperIsStore(genTreeOps gtOper)
{
return (gtOper == GT_STOREIND || gtOper == GT_STORE_LCL_VAR || gtOper == GT_STORE_LCL_FLD ||
- gtOper == GT_STORE_CLS_VAR || gtOper == GT_STORE_BLK || gtOper == GT_STORE_OBJ ||
- gtOper == GT_STORE_DYN_BLK);
+ gtOper == GT_STORE_BLK || gtOper == GT_STORE_OBJ || gtOper == GT_STORE_DYN_BLK);
}
static bool OperIsAtomicOp(genTreeOps gtOper)
@@ -1425,9 +1464,34 @@ public:
return OperIsSIMD(gtOper);
}
- bool OperIsAggregate()
+ bool OperIsFieldListHead()
+ {
+ return (gtOper == GT_FIELD_LIST) && ((gtFlags & GTF_FIELD_LIST_HEAD) != 0);
+ }
+
+ bool OperIsConditionalJump() const
+ {
+ return (gtOper == GT_JTRUE) || (gtOper == GT_JCC);
+ }
+
+ static bool OperIsBoundsCheck(genTreeOps op)
+ {
+ if (op == GT_ARR_BOUNDS_CHECK)
+ {
+ return true;
+ }
+#ifdef FEATURE_SIMD
+ if (op == GT_SIMD_CHK)
+ {
+ return true;
+ }
+#endif // FEATURE_SIMD
+ return false;
+ }
+
+ bool OperIsBoundsCheck() const
{
- return (gtOper == GT_LIST) && ((gtFlags & GTF_LIST_AGGREGATE) != 0);
+ return OperIsBoundsCheck(OperGet());
}
// Requires that "op" is an op= operator. Returns
@@ -1462,6 +1526,7 @@ public:
switch (gtOper)
{
case GT_LIST:
+ case GT_FIELD_LIST:
case GT_INTRINSIC:
case GT_LEA:
#ifdef FEATURE_SIMD
@@ -1474,19 +1539,47 @@ public:
}
static inline bool RequiresNonNullOp2(genTreeOps oper);
- bool IsListForMultiRegArg();
+ bool IsValidCallArgument();
#endif // DEBUG
inline bool IsFPZero();
inline bool IsIntegralConst(ssize_t constVal);
+ inline bool IsIntegralConstVector(ssize_t constVal);
inline bool IsBoxedValue();
- bool IsList() const
+ inline bool IsSIMDEqualityOrInequality() const;
+
+ static bool OperIsList(genTreeOps gtOper)
{
return gtOper == GT_LIST;
}
+ bool OperIsList() const
+ {
+ return OperIsList(gtOper);
+ }
+
+ static bool OperIsFieldList(genTreeOps gtOper)
+ {
+ return gtOper == GT_FIELD_LIST;
+ }
+
+ bool OperIsFieldList() const
+ {
+ return OperIsFieldList(gtOper);
+ }
+
+ static bool OperIsAnyList(genTreeOps gtOper)
+ {
+ return OperIsList(gtOper) || OperIsFieldList(gtOper);
+ }
+
+ bool OperIsAnyList() const
+ {
+ return OperIsAnyList(gtOper);
+ }
+
inline GenTreePtr MoveNext();
inline GenTreePtr Current();
@@ -1508,6 +1601,8 @@ public:
// Get the parent of this node, and optionally capture the pointer to the child so that it can be modified.
GenTreePtr gtGetParent(GenTreePtr** parentChildPtrPtr);
+ void ReplaceOperand(GenTree** useEdge, GenTree* replacement);
+
inline GenTreePtr gtEffectiveVal(bool commaOnly = false);
// Return the child of this node if it is a GT_RELOAD or GT_COPY; otherwise simply return the node itself
@@ -1536,7 +1631,13 @@ public:
public:
#if SMALL_TREE_NODES
static unsigned char s_gtNodeSizes[];
+#if NODEBASH_STATS || MEASURE_NODE_SIZE || COUNT_AST_OPERS
+ static unsigned char s_gtTrueSizes[];
+#endif
+#if COUNT_AST_OPERS
+ static LONG s_gtNodeCounts[];
#endif
+#endif // SMALL_TREE_NODES
static void InitNodeSize();
@@ -1555,15 +1656,19 @@ public:
static bool Compare(GenTreePtr op1, GenTreePtr op2, bool swapOK = false);
//---------------------------------------------------------------------
-#ifdef DEBUG
- //---------------------------------------------------------------------
+#if defined(DEBUG)
static const char* NodeName(genTreeOps op);
+#endif
+#if defined(DEBUG) || NODEBASH_STATS || MEASURE_NODE_SIZE || COUNT_AST_OPERS
static const char* OpName(genTreeOps op);
+#endif
-//---------------------------------------------------------------------
+#if MEASURE_NODE_SIZE && SMALL_TREE_NODES
+ static const char* OpStructName(genTreeOps op);
#endif
+
//---------------------------------------------------------------------
bool IsNothingNode() const;
@@ -1583,6 +1688,7 @@ public:
// set gtOper and only keep GTF_COMMON_MASK flags
void ChangeOper(genTreeOps oper, ValueNumberUpdate vnUpdate = CLEAR_VN);
void ChangeOperUnchecked(genTreeOps oper);
+ void SetOperRaw(genTreeOps oper);
void ChangeType(var_types newType)
{
@@ -1597,6 +1703,20 @@ public:
}
}
+#if SMALL_TREE_NODES
+#if NODEBASH_STATS
+ static void RecordOperBashing(genTreeOps operOld, genTreeOps operNew);
+ static void ReportOperBashing(FILE* fp);
+#else
+ static void RecordOperBashing(genTreeOps operOld, genTreeOps operNew)
+ { /* do nothing */
+ }
+ static void ReportOperBashing(FILE* fp)
+ { /* do nothing */
+ }
+#endif
+#endif
+
bool IsLocal() const
{
return OperIsLocal(OperGet());
@@ -1777,6 +1897,14 @@ public:
bool gtOverflowEx() const;
bool gtSetFlags() const;
bool gtRequestSetFlags();
+
+ // Returns true if the codegen of this tree node
+ // sets ZF and SF flags.
+ bool gtSetZSFlags() const
+ {
+ return (gtFlags & GTF_ZSF_SET) != 0;
+ }
+
#ifdef DEBUG
bool gtIsValid64RsltMul();
static int gtDispFlags(unsigned flags, unsigned debugFlags);
@@ -1827,10 +1955,10 @@ public:
// Returns an iterator that will produce the use edge to each operand of this node. Differs
// from the sequence of nodes produced by a loop over `GetChild` in its handling of call, phi,
// and block op nodes.
- GenTreeUseEdgeIterator GenTree::UseEdgesBegin();
- GenTreeUseEdgeIterator GenTree::UseEdgesEnd();
+ GenTreeUseEdgeIterator UseEdgesBegin();
+ GenTreeUseEdgeIterator UseEdgesEnd();
- IteratorPair<GenTreeUseEdgeIterator> GenTree::UseEdges();
+ IteratorPair<GenTreeUseEdgeIterator> UseEdges();
// Returns an iterator that will produce each operand of this node. Differs from the sequence
// of nodes produced by a loop over `GetChild` in its handling of call, phi, and block op
@@ -1866,6 +1994,10 @@ public:
gtFlags &= ~GTF_REUSE_REG_VAL;
}
+#if MEASURE_NODE_SIZE
+ static void DumpNodeSizes(FILE* fp);
+#endif
+
#ifdef DEBUG
private:
@@ -1931,7 +2063,7 @@ class GenTreeUseEdgeIterator final
#ifdef FEATURE_SIMD
void MoveToNextSIMDUseEdge();
#endif
- void MoveToNextAggregateUseEdge();
+ void MoveToNextFieldUseEdge();
public:
GenTreeUseEdgeIterator();
@@ -2128,7 +2260,7 @@ struct GenTreeIntConCommon : public GenTree
}
bool ImmedValNeedsReloc(Compiler* comp);
- bool GenTreeIntConCommon::ImmedValCanBeFolded(Compiler* comp, genTreeOps op);
+ bool ImmedValCanBeFolded(Compiler* comp, genTreeOps op);
#ifdef _TARGET_XARCH_
bool FitsInAddrBase(Compiler* comp);
@@ -2629,18 +2761,13 @@ struct GenTreeField : public GenTree
// method names for the arguments.
struct GenTreeArgList : public GenTreeOp
{
- bool IsAggregate() const
- {
- return (gtFlags & GTF_LIST_AGGREGATE) != 0;
- }
-
GenTreePtr& Current()
{
return gtOp1;
}
GenTreeArgList*& Rest()
{
- assert(gtOp2 == nullptr || gtOp2->OperGet() == GT_LIST);
+ assert(gtOp2 == nullptr || gtOp2->OperIsAnyList());
return *reinterpret_cast<GenTreeArgList**>(&gtOp2);
}
@@ -2654,20 +2781,68 @@ struct GenTreeArgList : public GenTreeOp
{
}
- GenTreeArgList(GenTreePtr arg, GenTreeArgList* rest) : GenTreeOp(GT_LIST, TYP_VOID, arg, rest)
+ GenTreeArgList(GenTreePtr arg, GenTreeArgList* rest) : GenTreeArgList(GT_LIST, arg, rest)
{
- // With structs passed in multiple args we could have an arg
- // GT_LIST containing a list of LCL_FLDs, see IsListForMultiRegArg()
- //
- assert((arg != nullptr) && ((!arg->IsList()) || (arg->IsListForMultiRegArg())));
+ }
+
+ GenTreeArgList(genTreeOps oper, GenTreePtr arg, GenTreeArgList* rest) : GenTreeOp(oper, TYP_VOID, arg, rest)
+ {
+ assert(OperIsAnyList(oper));
+ assert((arg != nullptr) && arg->IsValidCallArgument());
gtFlags |= arg->gtFlags & GTF_ALL_EFFECT;
if (rest != nullptr)
{
gtFlags |= rest->gtFlags & GTF_ALL_EFFECT;
}
}
+};
+
+// Represents a list of fields constituting a struct, when it is passed as an argument.
+// The first field of the struct is marked with the GTF_FIELD_LIST_HEAD flag, and
+// in LIR form it is the only member of the list that is threaded into the execution
+// order.
+// It differs from the GenTreeArgList in a couple of ways:
+// - The entire list represents a single argument.
+// - It contains additional fields to provide the offset and type of the field.
+//
+struct GenTreeFieldList : public GenTreeArgList
+{
+ unsigned gtFieldOffset;
+ var_types gtFieldType;
+
+ bool IsFieldListHead() const
+ {
+ return (gtFlags & GTF_FIELD_LIST_HEAD) != 0;
+ }
- GenTreeArgList* Prepend(Compiler* compiler, GenTree* element);
+#if DEBUGGABLE_GENTREE
+ GenTreeFieldList() : GenTreeArgList()
+ {
+ }
+#endif
+
+ GenTreeFieldList*& Rest()
+ {
+ assert(gtOp2 == nullptr || gtOp2->OperGet() == GT_FIELD_LIST);
+ return *reinterpret_cast<GenTreeFieldList**>(&gtOp2);
+ }
+
+ GenTreeFieldList(GenTreePtr arg, unsigned fieldOffset, var_types fieldType, GenTreeFieldList* prevList)
+ : GenTreeArgList(GT_FIELD_LIST, arg, nullptr)
+ {
+ // While GT_FIELD_LIST can be in a GT_LIST, GT_FIELD_LISTs cannot be nested or have GT_LISTs.
+ assert(!arg->OperIsAnyList());
+ gtFieldOffset = fieldOffset;
+ gtFieldType = fieldType;
+ if (prevList == nullptr)
+ {
+ gtFlags |= GTF_FIELD_LIST_HEAD;
+ }
+ else
+ {
+ prevList->gtOp2 = this;
+ }
+ }
};
// There was quite a bit of confusion in the code base about which of gtOp1 and gtOp2 was the
@@ -3360,8 +3535,13 @@ struct GenTreeCall final : public GenTree
bool IsHelperCall(Compiler* compiler, unsigned helper) const;
+ void ReplaceCallOperand(GenTree** operandUseEdge, GenTree* replacement);
+
+ bool AreArgsComplete() const;
+
GenTreeCall(var_types type) : GenTree(GT_CALL, type)
{
+ fgArgInfo = nullptr;
}
#if DEBUGGABLE_GENTREE
GenTreeCall() : GenTree()
@@ -4017,6 +4197,19 @@ struct GenTreeObj : public GenTreeBlk
// Let's assert it just to be safe.
noway_assert(roundUp(gtBlkSize, REGSIZE_BYTES) == gtBlkSize);
}
+ else
+ {
+ genTreeOps newOper = GT_BLK;
+ if (gtOper == GT_STORE_OBJ)
+ {
+ newOper = GT_STORE_BLK;
+ }
+ else
+ {
+ assert(gtOper == GT_OBJ);
+ }
+ SetOper(newOper);
+ }
}
void CopyGCInfo(GenTreeObj* srcObj)
@@ -4068,6 +4261,8 @@ public:
GenTreeDynBlk(GenTreePtr addr, GenTreePtr dynamicSize)
: GenTreeBlk(GT_DYN_BLK, TYP_STRUCT, addr, 0), gtDynamicSize(dynamicSize), gtEvalSizeFirst(false)
{
+ // Conservatively the 'addr' could be null or point into the global heap.
+ gtFlags |= GTF_EXCEPT | GTF_GLOB_REF;
gtFlags |= (dynamicSize->gtFlags & GTF_ALL_EFFECT);
}
@@ -4198,10 +4393,7 @@ struct GenTreeStmt : public GenTree
GenTreePtr gtStmtExpr; // root of the expression tree
GenTreePtr gtStmtList; // first node (for forward walks)
InlineContext* gtInlineContext; // The inline context for this statement.
-
-#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
- IL_OFFSETX gtStmtILoffsx; // instr offset (if available)
-#endif
+ IL_OFFSETX gtStmtILoffsx; // instr offset (if available)
#ifdef DEBUG
IL_OFFSET gtStmtLastILoffs; // instr offset at end of stmt
@@ -4240,9 +4432,7 @@ struct GenTreeStmt : public GenTree
, gtStmtExpr(expr)
, gtStmtList(nullptr)
, gtInlineContext(nullptr)
-#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
, gtStmtILoffsx(offset)
-#endif
#ifdef DEBUG
, gtStmtLastILoffs(BAD_IL_OFFSET)
#endif
@@ -4350,20 +4540,19 @@ struct GenTreePutArgStk : public GenTreeUnOp
GenTreePutArgStk(genTreeOps oper,
var_types type,
- unsigned slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots)
- FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct),
+ unsigned slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(unsigned numSlots)
+ PUT_STRUCT_ARG_STK_ONLY_ARG(bool isStruct),
bool _putInIncomingArgArea = false DEBUGARG(GenTreePtr callNode = nullptr)
DEBUGARG(bool largeNode = false))
: GenTreeUnOp(oper, type DEBUGARG(largeNode))
, gtSlotNum(slotNum)
, putInIncomingArgArea(_putInIncomingArgArea)
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- , gtPutArgStkKind(PutArgStkKindInvalid)
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
+ , gtPutArgStkKind(Kind::Invalid)
, gtNumSlots(numSlots)
- , gtIsStruct(isStruct)
, gtNumberReferenceSlots(0)
, gtGcPtrs(nullptr)
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#endif // FEATURE_PUT_STRUCT_ARG_STK
{
#ifdef DEBUG
gtCall = callNode;
@@ -4373,20 +4562,18 @@ struct GenTreePutArgStk : public GenTreeUnOp
GenTreePutArgStk(genTreeOps oper,
var_types type,
GenTreePtr op1,
- unsigned slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots)
- FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct),
+ unsigned slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(unsigned numSlots),
bool _putInIncomingArgArea = false DEBUGARG(GenTreePtr callNode = nullptr)
DEBUGARG(bool largeNode = false))
: GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode))
, gtSlotNum(slotNum)
, putInIncomingArgArea(_putInIncomingArgArea)
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- , gtPutArgStkKind(PutArgStkKindInvalid)
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
+ , gtPutArgStkKind(Kind::Invalid)
, gtNumSlots(numSlots)
- , gtIsStruct(isStruct)
, gtNumberReferenceSlots(0)
, gtGcPtrs(nullptr)
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#endif // FEATURE_PUT_STRUCT_ARG_STK
{
#ifdef DEBUG
gtCall = callNode;
@@ -4397,18 +4584,16 @@ struct GenTreePutArgStk : public GenTreeUnOp
GenTreePutArgStk(genTreeOps oper,
var_types type,
- unsigned slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots)
- FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct) DEBUGARG(GenTreePtr callNode = NULL)
- DEBUGARG(bool largeNode = false))
+ unsigned slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(unsigned numSlots)
+ DEBUGARG(GenTreePtr callNode = NULL) DEBUGARG(bool largeNode = false))
: GenTreeUnOp(oper, type DEBUGARG(largeNode))
, gtSlotNum(slotNum)
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- , gtPutArgStkKind(PutArgStkKindInvalid)
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
+ , gtPutArgStkKind(Kind::Invalid)
, gtNumSlots(numSlots)
- , gtIsStruct(isStruct)
, gtNumberReferenceSlots(0)
, gtGcPtrs(nullptr)
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#endif // FEATURE_PUT_STRUCT_ARG_STK
{
#ifdef DEBUG
gtCall = callNode;
@@ -4418,18 +4603,16 @@ struct GenTreePutArgStk : public GenTreeUnOp
GenTreePutArgStk(genTreeOps oper,
var_types type,
GenTreePtr op1,
- unsigned slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots)
- FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct) DEBUGARG(GenTreePtr callNode = NULL)
- DEBUGARG(bool largeNode = false))
+ unsigned slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(unsigned numSlots)
+ DEBUGARG(GenTreePtr callNode = NULL) DEBUGARG(bool largeNode = false))
: GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode))
, gtSlotNum(slotNum)
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- , gtPutArgStkKind(PutArgStkKindInvalid)
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
+ , gtPutArgStkKind(Kind::Invalid)
, gtNumSlots(numSlots)
- , gtIsStruct(isStruct)
, gtNumberReferenceSlots(0)
, gtGcPtrs(nullptr)
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#endif // FEATURE_PUT_STRUCT_ARG_STK
{
#ifdef DEBUG
gtCall = callNode;
@@ -4442,14 +4625,14 @@ struct GenTreePutArgStk : public GenTreeUnOp
return gtSlotNum * TARGET_POINTER_SIZE;
}
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
unsigned getArgSize()
{
return gtNumSlots * TARGET_POINTER_SIZE;
}
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#endif // FEATURE_PUT_STRUCT_ARG_STK
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
//------------------------------------------------------------------------
// setGcPointers: Sets the number of references and the layout of the struct object returned by the VM.
//
@@ -4471,27 +4654,32 @@ struct GenTreePutArgStk : public GenTreeUnOp
gtNumberReferenceSlots = numPointers;
gtGcPtrs = pointers;
}
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#endif // FEATURE_PUT_STRUCT_ARG_STK
#ifdef DEBUG
GenTreePtr gtCall; // the call node to which this argument belongs
#endif
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
// Instruction selection: during codegen time, what code sequence we will be using
// to encode this operation.
+ // TODO-Throughput: The following information should be obtained from the child
+ // block node.
- enum PutArgStkKind : __int8{
- PutArgStkKindInvalid, PutArgStkKindRepInstr, PutArgStkKindUnroll,
+ enum class Kind : __int8{
+ Invalid, RepInstr, Unroll, Push, PushAllSlots,
};
- PutArgStkKind gtPutArgStkKind;
+ Kind gtPutArgStkKind;
+ bool isPushKind()
+ {
+ return (gtPutArgStkKind == Kind::Push) || (gtPutArgStkKind == Kind::PushAllSlots);
+ }
unsigned gtNumSlots; // Number of slots for the argument to be passed on stack
- bool gtIsStruct; // This stack arg is a struct.
unsigned gtNumberReferenceSlots; // Number of reference slots.
BYTE* gtGcPtrs; // gcPointers
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#endif // FEATURE_PUT_STRUCT_ARG_STK
#if DEBUGGABLE_GENTREE
GenTreePutArgStk() : GenTreeUnOp()
@@ -4644,6 +4832,23 @@ struct GenTreeAllocObj final : public GenTreeUnOp
#endif
};
+struct GenTreeJumpCC final : public GenTree
+{
+ genTreeOps gtCondition; // any relop
+
+ GenTreeJumpCC(genTreeOps condition)
+ : GenTree(GT_JCC, TYP_VOID DEBUGARG(/*largeNode*/ FALSE)), gtCondition(condition)
+ {
+ assert(OperIsCompare(condition));
+ }
+
+#if DEBUGGABLE_GENTREE
+ GenTreeJumpCC() : GenTree()
+ {
+ }
+#endif // DEBUGGABLE_GENTREE
+};
+
//------------------------------------------------------------------------
// Deferred inline functions of GenTree -- these need the subtypes above to
// be defined already.
@@ -4673,34 +4878,31 @@ inline bool GenTree::OperIsDynBlkOp()
return false;
}
-inline bool GenTree::OperIsCopyBlkOp()
+inline bool GenTree::OperIsInitBlkOp()
{
- if (gtOper == GT_ASG)
+ if (!OperIsBlkOp())
{
- return (varTypeIsStruct(gtGetOp1()) && ((gtFlags & GTF_BLK_INIT) == 0));
+ return false;
}
#ifndef LEGACY_BACKEND
- else if (OperIsStoreBlk())
- {
- return ((gtFlags & GTF_BLK_INIT) == 0);
- }
-#endif
- return false;
-}
-
-inline bool GenTree::OperIsInitBlkOp()
-{
+ GenTree* src;
if (gtOper == GT_ASG)
{
- return (varTypeIsStruct(gtGetOp1()) && ((gtFlags & GTF_BLK_INIT) != 0));
+ src = gtGetOp2();
}
-#ifndef LEGACY_BACKEND
- else if (OperIsStoreBlk())
+ else
{
- return ((gtFlags & GTF_BLK_INIT) != 0);
+ src = AsBlk()->Data()->gtSkipReloadOrCopy();
}
-#endif
- return false;
+#else // LEGACY_BACKEND
+ GenTree* src = gtGetOp2();
+#endif // LEGACY_BACKEND
+ return src->OperIsInitVal() || src->OperIsConst();
+}
+
+inline bool GenTree::OperIsCopyBlkOp()
+{
+ return OperIsBlkOp() && !OperIsInitBlkOp();
}
//------------------------------------------------------------------------
@@ -4748,34 +4950,63 @@ inline bool GenTree::IsIntegralConst(ssize_t constVal)
return false;
}
+//-------------------------------------------------------------------
+// IsIntegralConstVector: returns true if this this is a SIMD vector
+// with all its elements equal to an integral constant.
+//
+// Arguments:
+// constVal - const value of vector element
+//
+// Returns:
+// True if this represents an integral const SIMD vector.
+//
+inline bool GenTree::IsIntegralConstVector(ssize_t constVal)
+{
+#ifdef FEATURE_SIMD
+ // SIMDIntrinsicInit intrinsic with a const value as initializer
+ // represents a const vector.
+ if ((gtOper == GT_SIMD) && (gtSIMD.gtSIMDIntrinsicID == SIMDIntrinsicInit) && gtGetOp1()->IsIntegralConst(constVal))
+ {
+ assert(varTypeIsIntegral(gtSIMD.gtSIMDBaseType));
+ assert(gtGetOp2() == nullptr);
+ return true;
+ }
+#endif
+
+ return false;
+}
+
inline bool GenTree::IsBoxedValue()
{
assert(gtOper != GT_BOX || gtBox.BoxOp() != nullptr);
return (gtOper == GT_BOX) && (gtFlags & GTF_BOX_VALUE);
}
+inline bool GenTree::IsSIMDEqualityOrInequality() const
+{
+#ifdef FEATURE_SIMD
+ if (gtOper == GT_SIMD)
+ {
+ // Has to cast away const-ness since AsSIMD() method is non-const.
+ GenTreeSIMD* simdNode = const_cast<GenTree*>(this)->AsSIMD();
+ return (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality ||
+ simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality);
+ }
+#endif
+
+ return false;
+}
+
inline GenTreePtr GenTree::MoveNext()
{
- assert(IsList());
+ assert(OperIsAnyList());
return gtOp.gtOp2;
}
#ifdef DEBUG
//------------------------------------------------------------------------
-// IsListForMultiRegArg: Given an GenTree node that represents an argument
-// enforce (or don't enforce) the following invariant.
-//
-// For LEGACY_BACKEND or architectures that don't support MultiReg args
-// we don't allow a GT_LIST at all.
-//
-// Currently for AMD64 UNIX we allow a limited case where a GT_LIST is
-// allowed but every element must be a GT_LCL_FLD.
-//
-// For the future targets that allow for Multireg args (and this includes
-// the current ARM64 target) we allow a GT_LIST of arbitrary nodes, these
-// would typically start out as GT_LCL_VARs or GT_LCL_FLDS or GT_INDs,
-// but could be changed into constants or GT_COMMA trees by the later
-// optimization phases.
+// IsValidCallArgument: Given an GenTree node that represents an argument
+// enforce (or don't enforce) the following invariant.
//
// Arguments:
// instance method for a GenTree node
@@ -4784,33 +5015,46 @@ inline GenTreePtr GenTree::MoveNext()
// true: the GenTree node is accepted as a valid argument
// false: the GenTree node is not accepted as a valid argumeny
//
-inline bool GenTree::IsListForMultiRegArg()
+// Notes:
+// For targets that don't support arguments as a list of fields, we do not support GT_FIELD_LIST.
+//
+// Currently for AMD64 UNIX we allow a limited case where a GT_FIELD_LIST is
+// allowed but every element must be a GT_LCL_FLD.
+//
+// For the future targets that allow for Multireg args (and this includes the current ARM64 target),
+// or that allow for passing promoted structs, we allow a GT_FIELD_LIST of arbitrary nodes.
+// These would typically start out as GT_LCL_VARs or GT_LCL_FLDS or GT_INDs,
+// but could be changed into constants or GT_COMMA trees by the later
+// optimization phases.
+
+inline bool GenTree::IsValidCallArgument()
{
- if (!IsList())
+ if (OperIsList())
{
- // We don't have a GT_LIST, so just return true.
- return true;
+ // GT_FIELD_LIST is the only list allowed.
+ return false;
}
- else // We do have a GT_LIST
+ if (OperIsFieldList())
{
-#if defined(LEGACY_BACKEND) || !FEATURE_MULTIREG_ARGS
-
- // Not allowed to have a GT_LIST for an argument
- // unless we have a RyuJIT backend and FEATURE_MULTIREG_ARGS
+#if defined(LEGACY_BACKEND) || (!FEATURE_MULTIREG_ARGS && !FEATURE_PUT_STRUCT_ARG_STK)
+ // Not allowed to have a GT_FIELD_LIST for an argument
+ // unless we have a RyuJIT backend and FEATURE_MULTIREG_ARGS or FEATURE_PUT_STRUCT_ARG_STK
return false;
-#else // we have RyuJIT backend and FEATURE_MULTIREG_ARGS
+#else // we have RyuJIT backend and FEATURE_MULTIREG_ARGS or FEATURE_PUT_STRUCT_ARG_STK
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- // For UNIX ABI we currently only allow a GT_LIST of GT_LCL_FLDs nodes
+ // For UNIX ABI we currently only allow a GT_FIELD_LIST of GT_LCL_FLDs nodes
GenTree* gtListPtr = this;
while (gtListPtr != nullptr)
{
// ToDo: fix UNIX_AMD64 so that we do not generate this kind of a List
// Note the list as currently created is malformed, as the last entry is a nullptr
if (gtListPtr->Current() == nullptr)
+ {
break;
+ }
// Only a list of GT_LCL_FLDs is allowed
if (gtListPtr->Current()->OperGet() != GT_LCL_FLD)
@@ -4821,25 +5065,27 @@ inline bool GenTree::IsListForMultiRegArg()
}
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- // Note that for non-UNIX ABI the GT_LIST may contain any node
+ // Note that for non-UNIX ABI the GT_FIELD_LIST may contain any node
//
- // We allow this GT_LIST as an argument
+ // We allow this GT_FIELD_LIST as an argument
return true;
-#endif // RyuJIT backend and FEATURE_MULTIREG_ARGS
+#endif // FEATURE_MULTIREG_ARGS
}
+ // We don't have either kind of list, so it satisfies the invariant.
+ return true;
}
#endif // DEBUG
inline GenTreePtr GenTree::Current()
{
- assert(IsList());
+ assert(OperIsAnyList());
return gtOp.gtOp1;
}
inline GenTreePtr* GenTree::pCurrent()
{
- assert(IsList());
+ assert(OperIsAnyList());
return &(gtOp.gtOp1);
}
@@ -4917,23 +5163,22 @@ inline GenTreePtr GenTree::gtGetOp2()
inline GenTreePtr GenTree::gtEffectiveVal(bool commaOnly)
{
- switch (gtOper)
+ GenTree* effectiveVal = this;
+ for (;;)
{
- case GT_COMMA:
- return gtOp.gtOp2->gtEffectiveVal(commaOnly);
-
- case GT_NOP:
- if (!commaOnly && gtOp.gtOp1 != nullptr)
- {
- return gtOp.gtOp1->gtEffectiveVal();
- }
- break;
-
- default:
- break;
+ if (effectiveVal->gtOper == GT_COMMA)
+ {
+ effectiveVal = effectiveVal->gtOp.gtOp2;
+ }
+ else if (!commaOnly && (effectiveVal->gtOper == GT_NOP) && (effectiveVal->gtOp.gtOp1 != nullptr))
+ {
+ effectiveVal = effectiveVal->gtOp.gtOp1;
+ }
+ else
+ {
+ return effectiveVal;
+ }
}
-
- return this;
}
inline GenTree* GenTree::gtSkipReloadOrCopy()
diff --git a/src/jit/gschecks.cpp b/src/jit/gschecks.cpp
index 43cbb892e9..9255d8fd36 100644
--- a/src/jit/gschecks.cpp
+++ b/src/jit/gschecks.cpp
@@ -40,9 +40,9 @@ const unsigned NO_SHADOW_COPY = UINT_MAX;
* The current function has an unsafe buffer on the stack. Search for vulnerable
* parameters which could be used to modify a code address and take over the process
* in the case of a buffer overrun. Create a safe local copy for each vulnerable parameter,
- * which will be allocated bellow the unsafe buffer. Change uses of the param to the
+ * which will be allocated bellow the unsafe buffer. Change uses of the param to the
* shadow copy.
- *
+ *
* A pointer under indirection is considered vulnerable. A malicious user could read from
* protected memory or write to it. If a parameter is assigned/computed into another variable,
* and is a pointer (i.e., under indirection), then we consider the variable to be part of the
@@ -58,7 +58,7 @@ void Compiler::gsCopyShadowParams()
// Allocate array for shadow param info
gsShadowVarInfo = new (this, CMK_Unknown) ShadowParamVarInfo[lvaCount]();
- // Find groups of variables assigned to each other, and also
+ // Find groups of variables assigned to each other, and also
// tracks variables which are dereferenced and marks them as ptrs.
// Look for assignments to *p, and ptrs passed to functions
if (gsFindVulnerableParams())
@@ -83,7 +83,7 @@ struct MarkPtrsInfo
{
printf(
"[MarkPtrsInfo] = {comp = %p, lvAssignDef = %d, isAssignSrc = %d, isUnderIndir = %d, skipNextNode = %d}\n",
- comp, lvAssignDef, isAssignSrc, isUnderIndir, skipNextNode);
+ comp, lvAssignDef, isAssignSrc, isUnderIndir, skipNextNode);
}
#endif
};
@@ -129,7 +129,7 @@ Compiler::fgWalkResult Compiler::gsMarkPtrsAndAssignGroups(GenTreePtr* pTree, fg
newState.isUnderIndir = true;
{
newState.skipNextNode = true; // Don't have to worry about which kind of node we're dealing with
- comp->fgWalkTreePre(&tree, comp->gsMarkPtrsAndAssignGroups, (void *)&newState);
+ comp->fgWalkTreePre(&tree, comp->gsMarkPtrsAndAssignGroups, (void*)&newState);
}
return WALK_SKIP_SUBTREES;
@@ -160,50 +160,50 @@ Compiler::fgWalkResult Compiler::gsMarkPtrsAndAssignGroups(GenTreePtr* pTree, fg
{
shadowVarInfo[pState->lvAssignDef].assignGroup->bitVectSet(lclNum);
}
-
+
// Point both to the same bit vector
shadowVarInfo[lclNum].assignGroup = shadowVarInfo[pState->lvAssignDef].assignGroup;
}
else if (shadowVarInfo[lclNum].assignGroup)
{
shadowVarInfo[lclNum].assignGroup->bitVectSet(pState->lvAssignDef);
-
+
// Point both to the same bit vector
shadowVarInfo[pState->lvAssignDef].assignGroup = shadowVarInfo[lclNum].assignGroup;
}
else
{
- FixedBitVect* bv = FixedBitVect::bitVectInit(pState->comp->lvaCount, pState->comp);
+ FixedBitVect* bv = FixedBitVect::bitVectInit(pState->comp->lvaCount, pState->comp);
// (shadowVarInfo[pState->lvAssignDef] == NULL && shadowVarInfo[lclNew] == NULL);
// Neither of them has an assign group yet. Make a new one.
shadowVarInfo[pState->lvAssignDef].assignGroup = bv;
- shadowVarInfo[lclNum].assignGroup = bv;
+ shadowVarInfo[lclNum].assignGroup = bv;
bv->bitVectSet(pState->lvAssignDef);
bv->bitVectSet(lclNum);
}
}
return WALK_CONTINUE;
-
+
// Calls - Mark arg variables
case GT_CALL:
newState.isUnderIndir = false;
- newState.isAssignSrc = false;
+ newState.isAssignSrc = false;
{
if (tree->gtCall.gtCallObjp)
{
newState.isUnderIndir = true;
- comp->fgWalkTreePre(&tree->gtCall.gtCallObjp, gsMarkPtrsAndAssignGroups, (void*)&newState);
+ comp->fgWalkTreePre(&tree->gtCall.gtCallObjp, gsMarkPtrsAndAssignGroups, (void*)&newState);
}
for (GenTreeArgList* args = tree->gtCall.gtCallArgs; args; args = args->Rest())
{
- comp->fgWalkTreePre(&args->Current(), gsMarkPtrsAndAssignGroups, (void*)&newState);
+ comp->fgWalkTreePre(&args->Current(), gsMarkPtrsAndAssignGroups, (void*)&newState);
}
for (GenTreeArgList* args = tree->gtCall.gtCallLateArgs; args; args = args->Rest())
{
- comp->fgWalkTreePre(&args->Current(), gsMarkPtrsAndAssignGroups, (void*)&newState);
+ comp->fgWalkTreePre(&args->Current(), gsMarkPtrsAndAssignGroups, (void*)&newState);
}
if (tree->gtCall.gtCallType == CT_INDIRECT)
@@ -213,7 +213,7 @@ Compiler::fgWalkResult Compiler::gsMarkPtrsAndAssignGroups(GenTreePtr* pTree, fg
// A function pointer is treated like a write-through pointer since
// it controls what code gets executed, and so indirectly can cause
// a write to memory.
- comp->fgWalkTreePre(&tree->gtCall.gtCallAddr, gsMarkPtrsAndAssignGroups, (void*)&newState);
+ comp->fgWalkTreePre(&tree->gtCall.gtCallAddr, gsMarkPtrsAndAssignGroups, (void*)&newState);
}
}
return WALK_SKIP_SUBTREES;
@@ -223,7 +223,7 @@ Compiler::fgWalkResult Compiler::gsMarkPtrsAndAssignGroups(GenTreePtr* pTree, fg
// We'll assume p in "**p = " can be vulnerable because by changing 'p', someone
// could control where **p stores to.
{
- comp->fgWalkTreePre(&tree->gtOp.gtOp1, comp->gsMarkPtrsAndAssignGroups, (void*)&newState);
+ comp->fgWalkTreePre(&tree->gtOp.gtOp1, comp->gsMarkPtrsAndAssignGroups, (void*)&newState);
}
return WALK_SKIP_SUBTREES;
@@ -251,7 +251,7 @@ Compiler::fgWalkResult Compiler::gsMarkPtrsAndAssignGroups(GenTreePtr* pTree, fg
{
// Walk dst side
comp->fgWalkTreePre(&tree->gtOp.gtOp1, comp->gsMarkPtrsAndAssignGroups, (void*)&newState);
-
+
// Now handle src side
isLocVar = tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR;
isLocFld = tree->gtOp.gtOp1->OperGet() == GT_LCL_FLD;
@@ -262,7 +262,7 @@ Compiler::fgWalkResult Compiler::gsMarkPtrsAndAssignGroups(GenTreePtr* pTree, fg
newState.lvAssignDef = lclNum;
newState.isAssignSrc = true;
}
-
+
comp->fgWalkTreePre(&tree->gtOp.gtOp2, comp->gsMarkPtrsAndAssignGroups, (void*)&newState);
}
@@ -377,7 +377,7 @@ bool Compiler::gsFindVulnerableParams()
*/
void Compiler::gsParamsToShadows()
{
- // Cache old count since we'll add new variables, and
+ // Cache old count since we'll add new variables, and
// gsShadowVarInfo will not grow to accomodate the new ones.
UINT lvaOldCount = lvaCount;
@@ -513,7 +513,7 @@ void Compiler::gsParamsToShadows()
GenTreePtr src = gtNewLclvNode(shadowVar, lvaTable[shadowVar].TypeGet());
GenTreePtr dst = gtNewLclvNode(lclNum, varDsc->TypeGet());
-
+
src->gtFlags |= GTF_DONT_CSE;
dst->gtFlags |= GTF_DONT_CSE;
@@ -530,7 +530,7 @@ void Compiler::gsParamsToShadows()
{
opAssign = gtNewAssignNode(dst, src);
}
-
+
(void)fgInsertStmtNearEnd(block, fgMorphTree(opAssign));
}
}
@@ -552,8 +552,8 @@ Compiler::fgWalkResult Compiler::gsReplaceShadowParams(GenTreePtr* pTree, fgWalk
{
asg = tree; // "asg" is the assignment tree.
tree = tree->gtOp.gtOp1; // "tree" is the local var tree at the left-hand size of the assignment.
- }
-
+ }
+
if (tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_LCL_FLD)
{
UINT paramNum = tree->gtLclVarCommon.gtLclNum;
@@ -571,7 +571,7 @@ Compiler::fgWalkResult Compiler::gsReplaceShadowParams(GenTreePtr* pTree, fgWalk
if (varTypeIsSmall(comp->lvaTable[paramNum].TypeGet()))
{
tree->gtType = TYP_INT;
- if (asg)
+ if (asg)
{
// If this is an assignment tree, propagate the type to it as well.
asg->gtType = TYP_INT;
diff --git a/src/jit/gtlist.h b/src/jit/gtlist.h
index a03bcfe4b0..92265a7359 100644
--- a/src/jit/gtlist.h
+++ b/src/jit/gtlist.h
@@ -9,245 +9,270 @@
#endif
/*****************************************************************************/
//
-// Node enum
-// , "Node name"
-// ,commutative
-// ,operKind
+// Node enum
+// ,"Node name"
+// ,GenTree struct flavor
+// ,commutative
+// ,operKind
-GTNODE(NONE , "<none>" ,0,GTK_SPECIAL)
+GTNODE(NONE , "<none>" ,char ,0,GTK_SPECIAL)
//-----------------------------------------------------------------------------
// Leaf nodes (i.e. these nodes have no sub-operands):
//-----------------------------------------------------------------------------
-GTNODE(LCL_VAR , "lclVar" ,0,GTK_LEAF|GTK_LOCAL) // local variable
-GTNODE(LCL_FLD , "lclFld" ,0,GTK_LEAF|GTK_LOCAL) // field in a non-primitive variable
-GTNODE(LCL_VAR_ADDR , "&lclVar" ,0,GTK_LEAF) // address of local variable
-GTNODE(LCL_FLD_ADDR , "&lclFld" ,0,GTK_LEAF) // address of field in a non-primitive variable
-GTNODE(STORE_LCL_VAR , "st.lclVar" ,0,GTK_UNOP|GTK_LOCAL|GTK_NOVALUE) // store to local variable
-GTNODE(STORE_LCL_FLD , "st.lclFld" ,0,GTK_UNOP|GTK_LOCAL|GTK_NOVALUE) // store to field in a non-primitive variable
-GTNODE(CATCH_ARG , "catchArg" ,0,GTK_LEAF) // Exception object in a catch block
-GTNODE(LABEL , "codeLabel" ,0,GTK_LEAF) // Jump-target
-GTNODE(FTN_ADDR , "ftnAddr" ,0,GTK_LEAF) // Address of a function
-GTNODE(RET_EXPR , "retExpr" ,0,GTK_LEAF) // Place holder for the return expression from an inline candidate
+GTNODE(LCL_VAR , "lclVar" ,GenTreeLclVar ,0,GTK_LEAF|GTK_LOCAL) // local variable
+GTNODE(LCL_FLD , "lclFld" ,GenTreeLclFld ,0,GTK_LEAF|GTK_LOCAL) // field in a non-primitive variable
+GTNODE(LCL_VAR_ADDR , "&lclVar" ,GenTreeLclVar ,0,GTK_LEAF) // address of local variable
+GTNODE(LCL_FLD_ADDR , "&lclFld" ,GenTreeLclFld ,0,GTK_LEAF) // address of field in a non-primitive variable
+GTNODE(STORE_LCL_VAR , "st.lclVar" ,GenTreeLclVar ,0,GTK_UNOP|GTK_LOCAL|GTK_NOVALUE) // store to local variable
+GTNODE(STORE_LCL_FLD , "st.lclFld" ,GenTreeLclFld ,0,GTK_UNOP|GTK_LOCAL|GTK_NOVALUE) // store to field in a non-primitive variable
+GTNODE(CATCH_ARG , "catchArg" ,GenTree ,0,GTK_LEAF) // Exception object in a catch block
+GTNODE(LABEL , "codeLabel" ,GenTreeLabel ,0,GTK_LEAF) // Jump-target
+GTNODE(FTN_ADDR , "ftnAddr" ,GenTreeFptrVal ,0,GTK_LEAF) // Address of a function
+GTNODE(RET_EXPR , "retExpr" ,GenTreeRetExpr ,0,GTK_LEAF) // Place holder for the return expression from an inline candidate
//-----------------------------------------------------------------------------
// Constant nodes:
//-----------------------------------------------------------------------------
-GTNODE(CNS_INT , "const" ,0,GTK_LEAF|GTK_CONST)
-GTNODE(CNS_LNG , "lconst" ,0,GTK_LEAF|GTK_CONST)
-GTNODE(CNS_DBL , "dconst" ,0,GTK_LEAF|GTK_CONST)
-GTNODE(CNS_STR , "sconst" ,0,GTK_LEAF|GTK_CONST)
+GTNODE(CNS_INT , "const" ,GenTreeIntCon ,0,GTK_LEAF|GTK_CONST)
+GTNODE(CNS_LNG , "lconst" ,GenTreeLngCon ,0,GTK_LEAF|GTK_CONST)
+GTNODE(CNS_DBL , "dconst" ,GenTreeDblCon ,0,GTK_LEAF|GTK_CONST)
+GTNODE(CNS_STR , "sconst" ,GenTreeStrCon ,0,GTK_LEAF|GTK_CONST)
//-----------------------------------------------------------------------------
// Unary operators (1 operand):
//-----------------------------------------------------------------------------
-GTNODE(NOT , "~" ,0,GTK_UNOP)
-GTNODE(NOP , "nop" ,0,GTK_UNOP)
-GTNODE(NEG , "unary -" ,0,GTK_UNOP)
-GTNODE(COPY , "copy" ,0,GTK_UNOP) // Copies a variable from its current location to a register that satisfies
- // code generation constraints. The child is the actual lclVar node.
-GTNODE(RELOAD , "reload" ,0,GTK_UNOP)
-GTNODE(CHS , "flipsign" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) // GT_CHS is actually unary -- op2 is ignored.
- // Changing to unary presently causes problems, though -- take a little work to fix.
-
-GTNODE(ARR_LENGTH , "arrLen" ,0,GTK_UNOP|GTK_EXOP) // array-length
-
-GTNODE(INTRINSIC , "intrinsic" ,0,GTK_BINOP|GTK_EXOP) // intrinsics
-
-GTNODE(LOCKADD , "lockAdd" ,0,GTK_BINOP|GTK_NOVALUE)
-GTNODE(XADD , "XAdd" ,0,GTK_BINOP)
-GTNODE(XCHG , "Xchg" ,0,GTK_BINOP)
-GTNODE(CMPXCHG , "cmpxchg" ,0,GTK_SPECIAL)
-GTNODE(MEMORYBARRIER , "memoryBarrier" ,0,GTK_LEAF|GTK_NOVALUE)
-
-GTNODE(CAST , "cast" ,0,GTK_UNOP|GTK_EXOP) // conversion to another type
-GTNODE(CKFINITE , "ckfinite" ,0,GTK_UNOP) // Check for NaN
-GTNODE(LCLHEAP , "lclHeap" ,0,GTK_UNOP) // alloca()
-GTNODE(JMP , "jump" ,0,GTK_LEAF|GTK_NOVALUE) // Jump to another function
-
-
-GTNODE(ADDR , "addr" ,0,GTK_UNOP) // address of
-GTNODE(IND , "indir" ,0,GTK_UNOP) // load indirection
-GTNODE(STOREIND , "storeIndir" ,0,GTK_BINOP|GTK_NOVALUE) // store indirection
-
- // TODO-Cleanup: GT_ARR_BOUNDS_CHECK should be made a GTK_BINOP now that it has only two child nodes
-GTNODE(ARR_BOUNDS_CHECK , "arrBndsChk" ,0,GTK_SPECIAL|GTK_NOVALUE) // array bounds check
-GTNODE(OBJ , "obj" ,0,GTK_UNOP|GTK_EXOP) // Object that MAY have gc pointers, and thus includes the relevant gc layout info.
-GTNODE(STORE_OBJ , "storeObj" ,0,GTK_BINOP|GTK_EXOP|GTK_NOVALUE) // Object that MAY have gc pointers, and thus includes the relevant gc layout info.
-GTNODE(BLK , "blk" ,0,GTK_UNOP) // Block/object with no gc pointers, and with a known size (e.g. a struct with no gc fields)
-GTNODE(STORE_BLK , "storeBlk" ,0,GTK_BINOP|GTK_NOVALUE) // Block/object with no gc pointers, and with a known size (e.g. a struct with no gc fields)
-GTNODE(DYN_BLK , "DynBlk" ,0,GTK_SPECIAL) // Dynamically sized block object
-GTNODE(STORE_DYN_BLK , "storeDynBlk" ,0,GTK_SPECIAL|GTK_NOVALUE) // Dynamically sized block object
-GTNODE(BOX , "box" ,0,GTK_UNOP|GTK_EXOP|GTK_NOTLIR)
+GTNODE(NOT , "~" ,GenTreeOp ,0,GTK_UNOP)
+GTNODE(NOP , "nop" ,GenTree ,0,GTK_UNOP)
+GTNODE(NEG , "unary -" ,GenTreeOp ,0,GTK_UNOP)
+GTNODE(COPY , "copy" ,GenTreeCopyOrReload,0,GTK_UNOP) // Copies a variable from its current location to a register that satisfies
+ // code generation constraints. The child is the actual lclVar node.
+GTNODE(RELOAD , "reload" ,GenTreeCopyOrReload,0,GTK_UNOP)
+GTNODE(CHS , "flipsign" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) // GT_CHS is actually unary -- op2 is ignored.
+ // Changing to unary presently causes problems, though -- take a little work to fix.
+
+GTNODE(ARR_LENGTH , "arrLen" ,GenTreeArrLen ,0,GTK_UNOP|GTK_EXOP) // array-length
+
+GTNODE(INTRINSIC , "intrinsic" ,GenTreeIntrinsic ,0,GTK_BINOP|GTK_EXOP) // intrinsics
+
+GTNODE(LOCKADD , "lockAdd" ,GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE)
+GTNODE(XADD , "XAdd" ,GenTreeOp ,0,GTK_BINOP)
+GTNODE(XCHG , "Xchg" ,GenTreeOp ,0,GTK_BINOP)
+GTNODE(CMPXCHG , "cmpxchg" ,GenTreeCmpXchg ,0,GTK_SPECIAL)
+GTNODE(MEMORYBARRIER , "memoryBarrier",GenTree ,0,GTK_LEAF|GTK_NOVALUE)
+
+GTNODE(CAST , "cast" ,GenTreeCast ,0,GTK_UNOP|GTK_EXOP) // conversion to another type
+GTNODE(CKFINITE , "ckfinite" ,GenTreeOp ,0,GTK_UNOP) // Check for NaN
+GTNODE(LCLHEAP , "lclHeap" ,GenTreeOp ,0,GTK_UNOP) // alloca()
+GTNODE(JMP , "jump" ,GenTreeVal ,0,GTK_LEAF|GTK_NOVALUE) // Jump to another function
+
+GTNODE(ADDR , "addr" ,GenTreeOp ,0,GTK_UNOP) // address of
+GTNODE(IND , "indir" ,GenTreeOp ,0,GTK_UNOP) // load indirection
+GTNODE(STOREIND , "storeIndir" ,GenTreeStoreInd ,0,GTK_BINOP|GTK_NOVALUE) // store indirection
+
+ // TODO-Cleanup: GT_ARR_BOUNDS_CHECK should be made a GTK_BINOP now that it has only two child nodes
+GTNODE(ARR_BOUNDS_CHECK , "arrBndsChk" ,GenTreeBoundsChk ,0,GTK_SPECIAL|GTK_NOVALUE)// array bounds check
+GTNODE(OBJ , "obj" ,GenTreeObj ,0,GTK_UNOP|GTK_EXOP) // Object that MAY have gc pointers, and thus includes the relevant gc layout info.
+GTNODE(STORE_OBJ , "storeObj" ,GenTreeBlk ,0,GTK_BINOP|GTK_EXOP|GTK_NOVALUE) // Object that MAY have gc pointers, and thus includes the relevant gc layout info.
+GTNODE(BLK , "blk" ,GenTreeBlk ,0,GTK_UNOP) // Block/object with no gc pointers, and with a known size (e.g. a struct with no gc fields)
+GTNODE(STORE_BLK , "storeBlk" ,GenTreeBlk ,0,GTK_BINOP|GTK_NOVALUE) // Block/object with no gc pointers, and with a known size (e.g. a struct with no gc fields)
+GTNODE(DYN_BLK , "DynBlk" ,GenTreeBlk ,0,GTK_SPECIAL) // Dynamically sized block object
+GTNODE(STORE_DYN_BLK , "storeDynBlk" ,GenTreeBlk ,0,GTK_SPECIAL|GTK_NOVALUE)// Dynamically sized block object
+GTNODE(BOX , "box" ,GenTreeBox ,0,GTK_UNOP|GTK_EXOP|GTK_NOTLIR)
#ifdef FEATURE_SIMD
-GTNODE(SIMD_CHK , "simdChk" ,0,GTK_SPECIAL|GTK_NOVALUE) // Compare whether an index is less than the given SIMD vector length, and call CORINFO_HELP_RNGCHKFAIL if not.
- // TODO-CQ: In future may want to add a field that specifies different exceptions but we'll
- // need VM assistance for that.
- // TODO-CQ: It would actually be very nice to make this an unconditional throw, and expose the control flow that
- // does the compare, so that it can be more easily optimized. But that involves generating qmarks at import time...
+GTNODE(SIMD_CHK , "simdChk" ,GenTreeBoundsChk ,0,GTK_SPECIAL|GTK_NOVALUE)// Compare whether an index is less than the given SIMD vector length, and call CORINFO_HELP_RNGCHKFAIL if not.
+ // TODO-CQ: In future may want to add a field that specifies different exceptions but we'll
+ // need VM assistance for that.
+ // TODO-CQ: It would actually be very nice to make this an unconditional throw, and expose the control flow that
+ // does the compare, so that it can be more easily optimized. But that involves generating qmarks at import time...
#endif // FEATURE_SIMD
-GTNODE(ALLOCOBJ , "allocObj" ,0,GTK_UNOP|GTK_EXOP) // object allocator
+GTNODE(ALLOCOBJ , "allocObj" ,GenTreeAllocObj ,0,GTK_UNOP|GTK_EXOP) // object allocator
+
+GTNODE(INIT_VAL , "initVal" ,GenTreeOp ,0,GTK_UNOP) // Initialization value for an initBlk
//-----------------------------------------------------------------------------
// Binary operators (2 operands):
//-----------------------------------------------------------------------------
-GTNODE(ADD , "+" ,1,GTK_BINOP)
-GTNODE(SUB , "-" ,0,GTK_BINOP)
-GTNODE(MUL , "*" ,1,GTK_BINOP)
-GTNODE(DIV , "/" ,0,GTK_BINOP)
-GTNODE(MOD , "%" ,0,GTK_BINOP)
+GTNODE(ADD , "+" ,GenTreeOp ,1,GTK_BINOP)
+GTNODE(SUB , "-" ,GenTreeOp ,0,GTK_BINOP)
+GTNODE(MUL , "*" ,GenTreeOp ,1,GTK_BINOP)
+GTNODE(DIV , "/" ,GenTreeOp ,0,GTK_BINOP)
+GTNODE(MOD , "%" ,GenTreeOp ,0,GTK_BINOP)
-GTNODE(UDIV , "un-/" ,0,GTK_BINOP)
-GTNODE(UMOD , "un-%" ,0,GTK_BINOP)
+GTNODE(UDIV , "un-/" ,GenTreeOp ,0,GTK_BINOP)
+GTNODE(UMOD , "un-%" ,GenTreeOp ,0,GTK_BINOP)
-GTNODE(OR , "|" ,1,GTK_BINOP|GTK_LOGOP)
-GTNODE(XOR , "^" ,1,GTK_BINOP|GTK_LOGOP)
-GTNODE(AND , "&" ,1,GTK_BINOP|GTK_LOGOP)
+GTNODE(OR , "|" ,GenTreeOp ,1,GTK_BINOP|GTK_LOGOP)
+GTNODE(XOR , "^" ,GenTreeOp ,1,GTK_BINOP|GTK_LOGOP)
+GTNODE(AND , "&" ,GenTreeOp ,1,GTK_BINOP|GTK_LOGOP)
-GTNODE(LSH , "<<" ,0,GTK_BINOP)
-GTNODE(RSH , ">>" ,0,GTK_BINOP)
-GTNODE(RSZ , ">>>" ,0,GTK_BINOP)
-GTNODE(ROL , "rol" ,0,GTK_BINOP)
-GTNODE(ROR , "ror" ,0,GTK_BINOP)
-GTNODE(MULHI , "mulhi" ,1,GTK_BINOP) // returns high bits (top N bits of the 2N bit result of an NxN multiply)
+GTNODE(LSH , "<<" ,GenTreeOp ,0,GTK_BINOP)
+GTNODE(RSH , ">>" ,GenTreeOp ,0,GTK_BINOP)
+GTNODE(RSZ , ">>>" ,GenTreeOp ,0,GTK_BINOP)
+GTNODE(ROL , "rol" ,GenTreeOp ,0,GTK_BINOP)
+GTNODE(ROR , "ror" ,GenTreeOp ,0,GTK_BINOP)
+GTNODE(MULHI , "mulhi" ,GenTreeOp ,1,GTK_BINOP) // returns high bits (top N bits of the 2N bit result of an NxN multiply)
+ // GT_MULHI is used in division by a constant (fgMorphDivByConst). We turn
+ // the div into a MULHI + some adjustments. In codegen, we only use the
+ // results of the high register, and we drop the low results.
-GTNODE(ASG , "=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
-GTNODE(ASG_ADD , "+=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
-GTNODE(ASG_SUB , "-=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
-GTNODE(ASG_MUL , "*=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
-GTNODE(ASG_DIV , "/=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
-GTNODE(ASG_MOD , "%=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG , "=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_ADD , "+=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_SUB , "-=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_MUL , "*=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_DIV , "/=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_MOD , "%=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
-GTNODE(ASG_UDIV , "/=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
-GTNODE(ASG_UMOD , "%=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_UDIV , "/=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_UMOD , "%=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
-GTNODE(ASG_OR , "|=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
-GTNODE(ASG_XOR , "^=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
-GTNODE(ASG_AND , "&=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
-GTNODE(ASG_LSH , "<<=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
-GTNODE(ASG_RSH , ">>=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
-GTNODE(ASG_RSZ , ">>>=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_OR , "|=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_XOR , "^=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_AND , "&=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_LSH , "<<=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_RSH , ">>=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_RSZ , ">>>=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
-GTNODE(EQ , "==" ,0,GTK_BINOP|GTK_RELOP)
-GTNODE(NE , "!=" ,0,GTK_BINOP|GTK_RELOP)
-GTNODE(LT , "<" ,0,GTK_BINOP|GTK_RELOP)
-GTNODE(LE , "<=" ,0,GTK_BINOP|GTK_RELOP)
-GTNODE(GE , ">=" ,0,GTK_BINOP|GTK_RELOP)
-GTNODE(GT , ">" ,0,GTK_BINOP|GTK_RELOP)
+GTNODE(EQ , "==" ,GenTreeOp ,0,GTK_BINOP|GTK_RELOP)
+GTNODE(NE , "!=" ,GenTreeOp ,0,GTK_BINOP|GTK_RELOP)
+GTNODE(LT , "<" ,GenTreeOp ,0,GTK_BINOP|GTK_RELOP)
+GTNODE(LE , "<=" ,GenTreeOp ,0,GTK_BINOP|GTK_RELOP)
+GTNODE(GE , ">=" ,GenTreeOp ,0,GTK_BINOP|GTK_RELOP)
+GTNODE(GT , ">" ,GenTreeOp ,0,GTK_BINOP|GTK_RELOP)
-GTNODE(COMMA , "comma" ,0,GTK_BINOP|GTK_NOTLIR)
+GTNODE(COMMA , "comma" ,GenTreeOp ,0,GTK_BINOP|GTK_NOTLIR)
-GTNODE(QMARK , "qmark" ,0,GTK_BINOP|GTK_EXOP|GTK_NOTLIR)
-GTNODE(COLON , "colon" ,0,GTK_BINOP|GTK_NOTLIR)
+GTNODE(QMARK , "qmark" ,GenTreeQmark ,0,GTK_BINOP|GTK_EXOP|GTK_NOTLIR)
+GTNODE(COLON , "colon" ,GenTreeColon ,0,GTK_BINOP|GTK_NOTLIR)
-GTNODE(INDEX , "[]" ,0,GTK_BINOP|GTK_EXOP|GTK_NOTLIR) // SZ-array-element
+GTNODE(INDEX , "[]" ,GenTreeIndex ,0,GTK_BINOP|GTK_EXOP|GTK_NOTLIR) // SZ-array-element
-GTNODE(MKREFANY , "mkrefany" ,0,GTK_BINOP)
+GTNODE(MKREFANY , "mkrefany" ,GenTreeOp ,0,GTK_BINOP)
-GTNODE(LEA , "lea" ,0,GTK_BINOP|GTK_EXOP)
+GTNODE(LEA , "lea" ,GenTreeAddrMode ,0,GTK_BINOP|GTK_EXOP)
#if !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_)
// A GT_LONG node simply represents the long value produced by the concatenation
// of its two (lower and upper half) operands. Some GT_LONG nodes are transient,
// during the decomposing of longs; others are handled by codegen as operands of
// nodes such as calls, returns and stores of long lclVars.
-GTNODE(LONG , "gt_long" ,0,GTK_BINOP)
-
-// The following are nodes representing the upper half of a 64-bit operation
-// that requires a carry/borrow. However, they are all named GT_XXX_HI for
-// consistency.
-GTNODE(ADD_LO , "+Lo" ,1,GTK_BINOP)
-GTNODE(ADD_HI , "+Hi" ,1,GTK_BINOP)
-GTNODE(SUB_LO , "-Lo" ,0,GTK_BINOP)
-GTNODE(SUB_HI , "-Hi" ,0,GTK_BINOP)
-GTNODE(MUL_HI , "*Hi" ,1,GTK_BINOP)
-GTNODE(DIV_HI , "/Hi" ,0,GTK_BINOP)
-GTNODE(MOD_HI , "%Hi" ,0,GTK_BINOP)
+GTNODE(LONG , "gt_long" ,GenTreeOp ,0,GTK_BINOP)
+
+// The following are nodes representing x86 specific long operators, including
+// high operators of a 64-bit operations that requires a carry/borrow, which are
+// named GT_XXX_HI for consistency, low operators of 64-bit operations that need
+// to not be modified in phases post-decompose, and operators that return 64-bit
+// results in one instruction.
+GTNODE(ADD_LO , "+Lo" ,GenTreeOp ,1,GTK_BINOP)
+GTNODE(ADD_HI , "+Hi" ,GenTreeOp ,1,GTK_BINOP)
+GTNODE(SUB_LO , "-Lo" ,GenTreeOp ,0,GTK_BINOP)
+GTNODE(SUB_HI , "-Hi" ,GenTreeOp ,0,GTK_BINOP)
+GTNODE(DIV_HI , "/Hi" ,GenTreeOp ,0,GTK_BINOP)
+GTNODE(MOD_HI , "%Hi" ,GenTreeOp ,0,GTK_BINOP)
+GTNODE(MUL_LONG , "*long" ,GenTreeOp ,1,GTK_BINOP) // A mul that returns the 2N bit result of an NxN multiply. This op
+ // is used for x86 multiplies that take two ints and return a long
+ // result. All other multiplies with long results are morphed into
+ // helper calls. It is similar to GT_MULHI, the difference being that
+ // GT_MULHI drops the lo part of the result, whereas GT_MUL_LONG keeps
+ // both parts of the result.
+
+// The following are nodes that specify shifts that take a GT_LONG op1. The GT_LONG
+// contains the hi and lo parts of three operand shift form where one op will be
+// shifted into the other op as part of the operation (LSH_HI will shift
+// the high bits of the lo operand into the high operand as it shifts left. RSH_LO
+// will shift the lo bits of the high operand into the lo operand). LSH_HI
+// represents the high operation of a 64-bit left shift by a constant int, and
+// RSH_LO represents the lo operation of a 64-bit right shift by a constant int.
+GTNODE(LSH_HI , "<<Hi" ,GenTreeOp ,0,GTK_BINOP)
+GTNODE(RSH_LO , ">>Lo" ,GenTreeOp ,0,GTK_BINOP)
#endif // !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_)
#ifdef FEATURE_SIMD
-GTNODE(SIMD , "simd" ,0,GTK_BINOP|GTK_EXOP) // SIMD functions/operators/intrinsics
+GTNODE(SIMD , "simd" ,GenTreeSIMD ,0,GTK_BINOP|GTK_EXOP) // SIMD functions/operators/intrinsics
#endif // FEATURE_SIMD
//-----------------------------------------------------------------------------
// Other nodes that look like unary/binary operators:
//-----------------------------------------------------------------------------
-GTNODE(JTRUE , "jmpTrue" ,0,GTK_UNOP|GTK_NOVALUE)
+GTNODE(JTRUE , "jmpTrue" ,GenTreeOp ,0,GTK_UNOP|GTK_NOVALUE)
+GTNODE(JCC , "jcc" ,GenTreeJumpCC ,0,GTK_LEAF|GTK_NOVALUE)
-GTNODE(LIST , "<list>" ,0,GTK_BINOP)
+GTNODE(LIST , "<list>" ,GenTreeArgList ,0,GTK_BINOP|GTK_NOVALUE)
+GTNODE(FIELD_LIST , "<fldList>" ,GenTreeFieldList ,0,GTK_BINOP) // List of fields of a struct, when passed as an argument
//-----------------------------------------------------------------------------
// Other nodes that have special structure:
//-----------------------------------------------------------------------------
-GTNODE(FIELD , "field" ,0,GTK_SPECIAL) // Member-field
-GTNODE(ARR_ELEM , "arrMD&" ,0,GTK_SPECIAL) // Multi-dimensional array-element address
-GTNODE(ARR_INDEX , "arrMDIdx" ,0,GTK_BINOP|GTK_EXOP) // Effective, bounds-checked index for one dimension of a multi-dimensional array element
-GTNODE(ARR_OFFSET , "arrMDOffs" ,0,GTK_SPECIAL) // Flattened offset of multi-dimensional array element
-GTNODE(CALL , "call()" ,0,GTK_SPECIAL)
+GTNODE(FIELD , "field" ,GenTreeField ,0,GTK_SPECIAL) // Member-field
+GTNODE(ARR_ELEM , "arrMD&" ,GenTreeArrElem ,0,GTK_SPECIAL) // Multi-dimensional array-element address
+GTNODE(ARR_INDEX , "arrMDIdx" ,GenTreeArrIndex ,0,GTK_BINOP|GTK_EXOP) // Effective, bounds-checked index for one dimension of a multi-dimensional array element
+GTNODE(ARR_OFFSET , "arrMDOffs" ,GenTreeArrOffs ,0,GTK_SPECIAL) // Flattened offset of multi-dimensional array element
+GTNODE(CALL , "call()" ,GenTreeCall ,0,GTK_SPECIAL)
//-----------------------------------------------------------------------------
// Statement operator nodes:
//-----------------------------------------------------------------------------
-GTNODE(BEG_STMTS , "begStmts" ,0,GTK_SPECIAL|GTK_NOVALUE) // used only temporarily in importer by impBegin/EndTreeList()
-GTNODE(STMT , "stmtExpr" ,0,GTK_SPECIAL|GTK_NOVALUE) // top-level list nodes in bbTreeList
+GTNODE(BEG_STMTS , "begStmts" ,GenTree ,0,GTK_SPECIAL|GTK_NOVALUE)// used only temporarily in importer by impBegin/EndTreeList()
+GTNODE(STMT , "stmtExpr" ,GenTreeStmt ,0,GTK_SPECIAL|GTK_NOVALUE)// top-level list nodes in bbTreeList
-GTNODE(RETURN , "return" ,0,GTK_UNOP|GTK_NOVALUE) // return from current function
-GTNODE(SWITCH , "switch" ,0,GTK_UNOP|GTK_NOVALUE) // switch
+GTNODE(RETURN , "return" ,GenTreeOp ,0,GTK_UNOP|GTK_NOVALUE) // return from current function
+GTNODE(SWITCH , "switch" ,GenTreeOp ,0,GTK_UNOP|GTK_NOVALUE) // switch
-GTNODE(NO_OP , "no_op" ,0,GTK_LEAF|GTK_NOVALUE) // nop!
+GTNODE(NO_OP , "no_op" ,GenTree ,0,GTK_LEAF|GTK_NOVALUE) // nop!
-GTNODE(START_NONGC, "start_nongc",0,GTK_LEAF|GTK_NOVALUE) // starts a new instruction group that will be non-gc interruptible
+GTNODE(START_NONGC , "start_nongc" ,GenTree ,0,GTK_LEAF|GTK_NOVALUE) // starts a new instruction group that will be non-gc interruptible
-GTNODE(PROF_HOOK , "prof_hook" ,0,GTK_LEAF|GTK_NOVALUE) // profiler Enter/Leave/TailCall hook
+GTNODE(PROF_HOOK , "prof_hook" ,GenTree ,0,GTK_LEAF|GTK_NOVALUE) // profiler Enter/Leave/TailCall hook
-GTNODE(RETFILT , "retfilt", 0,GTK_UNOP|GTK_NOVALUE) // end filter with TYP_I_IMPL return value
+GTNODE(RETFILT , "retfilt" ,GenTreeOp ,0,GTK_UNOP|GTK_NOVALUE) // end filter with TYP_I_IMPL return value
#if !FEATURE_EH_FUNCLETS
-GTNODE(END_LFIN , "endLFin" ,0,GTK_LEAF|GTK_NOVALUE) // end locally-invoked finally
+GTNODE(END_LFIN , "endLFin" ,GenTreeVal ,0,GTK_LEAF|GTK_NOVALUE) // end locally-invoked finally
#endif // !FEATURE_EH_FUNCLETS
//-----------------------------------------------------------------------------
// Nodes used for optimizations.
//-----------------------------------------------------------------------------
-GTNODE(PHI , "phi" ,0,GTK_UNOP) // phi node for ssa.
-GTNODE(PHI_ARG , "phiArg" ,0,GTK_LEAF|GTK_LOCAL) // phi(phiarg, phiarg, phiarg)
+GTNODE(PHI , "phi" ,GenTreeOp ,0,GTK_UNOP) // phi node for ssa.
+GTNODE(PHI_ARG , "phiArg" ,GenTreePhiArg ,0,GTK_LEAF|GTK_LOCAL) // phi(phiarg, phiarg, phiarg)
//-----------------------------------------------------------------------------
// Nodes used by Lower to generate a closer CPU representation of other nodes
//-----------------------------------------------------------------------------
-GTNODE(JMPTABLE , "jumpTable" , 0, GTK_LEAF) // Generates the jump table for switches
-GTNODE(SWITCH_TABLE, "tableSwitch", 0, GTK_BINOP|GTK_NOVALUE) // Jump Table based switch construct
+#ifndef LEGACY_BACKEND
+GTNODE(JMPTABLE , "jumpTable" ,GenTreeJumpTable ,0, GTK_LEAF) // Generates the jump table for switches
+#endif
+GTNODE(SWITCH_TABLE , "tableSwitch" ,GenTreeOp ,0, GTK_BINOP|GTK_NOVALUE) // Jump Table based switch construct
//-----------------------------------------------------------------------------
// Nodes used only within the code generator:
//-----------------------------------------------------------------------------
-GTNODE(REG_VAR , "regVar" ,0,GTK_LEAF|GTK_LOCAL) // register variable
-GTNODE(CLS_VAR , "clsVar" ,0,GTK_LEAF) // static data member
-GTNODE(CLS_VAR_ADDR , "&clsVar" ,0,GTK_LEAF) // static data member address
-GTNODE(STORE_CLS_VAR, "st.clsVar" ,0,GTK_LEAF|GTK_NOVALUE) // store to static data member
-GTNODE(ARGPLACE , "argPlace" ,0,GTK_LEAF) // placeholder for a register arg
-GTNODE(NULLCHECK , "nullcheck" ,0,GTK_UNOP|GTK_NOVALUE) // null checks the source
-GTNODE(PHYSREG , "physregSrc" ,0,GTK_LEAF) // read from a physical register
-GTNODE(PHYSREGDST , "physregDst" ,0,GTK_UNOP|GTK_NOVALUE) // write to a physical register
-GTNODE(EMITNOP , "emitnop" ,0,GTK_LEAF|GTK_NOVALUE) // emitter-placed nop
-GTNODE(PINVOKE_PROLOG,"pinvoke_prolog",0,GTK_LEAF|GTK_NOVALUE) // pinvoke prolog seq
-GTNODE(PINVOKE_EPILOG,"pinvoke_epilog",0,GTK_LEAF|GTK_NOVALUE) // pinvoke epilog seq
-GTNODE(PUTARG_REG , "putarg_reg" ,0,GTK_UNOP) // operator that places outgoing arg in register
-GTNODE(PUTARG_STK , "putarg_stk" ,0,GTK_UNOP) // operator that places outgoing arg in stack
-GTNODE(RETURNTRAP , "returnTrap" ,0,GTK_UNOP|GTK_NOVALUE) // a conditional call to wait on gc
-GTNODE(SWAP , "swap" ,0,GTK_BINOP|GTK_NOVALUE) // op1 and op2 swap (registers)
-GTNODE(IL_OFFSET , "il_offset" ,0,GTK_LEAF|GTK_NOVALUE) // marks an IL offset for debugging purposes
+GTNODE(REG_VAR , "regVar" ,GenTreeLclVar ,0,GTK_LEAF|GTK_LOCAL) // register variable
+GTNODE(CLS_VAR , "clsVar" ,GenTreeClsVar ,0,GTK_LEAF) // static data member
+GTNODE(CLS_VAR_ADDR , "&clsVar" ,GenTreeClsVar ,0,GTK_LEAF) // static data member address
+GTNODE(ARGPLACE , "argPlace" ,GenTreeArgPlace ,0,GTK_LEAF) // placeholder for a register arg
+GTNODE(NULLCHECK , "nullcheck" ,GenTreeOp ,0,GTK_UNOP|GTK_NOVALUE) // null checks the source
+GTNODE(PHYSREG , "physregSrc" ,GenTreePhysReg ,0,GTK_LEAF) // read from a physical register
+GTNODE(PHYSREGDST , "physregDst" ,GenTreeOp ,0,GTK_UNOP|GTK_NOVALUE) // write to a physical register
+GTNODE(EMITNOP , "emitnop" ,GenTree ,0,GTK_LEAF|GTK_NOVALUE) // emitter-placed nop
+GTNODE(PINVOKE_PROLOG ,"pinvoke_prolog",GenTree ,0,GTK_LEAF|GTK_NOVALUE) // pinvoke prolog seq
+GTNODE(PINVOKE_EPILOG ,"pinvoke_epilog",GenTree ,0,GTK_LEAF|GTK_NOVALUE) // pinvoke epilog seq
+GTNODE(PUTARG_REG , "putarg_reg" ,GenTreeOp ,0,GTK_UNOP) // operator that places outgoing arg in register
+GTNODE(PUTARG_STK , "putarg_stk" ,GenTreePutArgStk ,0,GTK_UNOP) // operator that places outgoing arg in stack
+GTNODE(RETURNTRAP , "returnTrap" ,GenTreeOp ,0,GTK_UNOP|GTK_NOVALUE) // a conditional call to wait on gc
+GTNODE(SWAP , "swap" ,GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE) // op1 and op2 swap (registers)
+GTNODE(IL_OFFSET , "il_offset" ,GenTreeStmt ,0,GTK_LEAF|GTK_NOVALUE) // marks an IL offset for debugging purposes
/*****************************************************************************/
#undef GTNODE
diff --git a/src/jit/gtstructs.h b/src/jit/gtstructs.h
index 895d3b6598..ac912407be 100644
--- a/src/jit/gtstructs.h
+++ b/src/jit/gtstructs.h
@@ -65,7 +65,8 @@ GTSTRUCT_1(Cast , GT_CAST)
GTSTRUCT_1(Box , GT_BOX)
GTSTRUCT_1(Field , GT_FIELD)
GTSTRUCT_1(Call , GT_CALL)
-GTSTRUCT_1(ArgList , GT_LIST)
+GTSTRUCT_2(ArgList , GT_LIST, GT_FIELD_LIST)
+GTSTRUCT_1(FieldList , GT_FIELD_LIST)
GTSTRUCT_1(Colon , GT_COLON)
GTSTRUCT_1(FptrVal , GT_FTN_ADDR)
GTSTRUCT_1(Intrinsic , GT_INTRINSIC)
@@ -100,6 +101,7 @@ GTSTRUCT_1(PhysReg , GT_PHYSREG)
GTSTRUCT_1(SIMD , GT_SIMD)
#endif // FEATURE_SIMD
GTSTRUCT_1(AllocObj , GT_ALLOCOBJ)
+GTSTRUCT_1(JumpCC , GT_JCC)
/*****************************************************************************/
#undef GTSTRUCT_0
#undef GTSTRUCT_1
diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp
index d04ded78fa..cb09ff8b8c 100644
--- a/src/jit/importer.cpp
+++ b/src/jit/importer.cpp
@@ -63,15 +63,12 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
void Compiler::impInit()
{
-#ifdef DEBUG
- impTreeList = impTreeLast = nullptr;
-#endif
-#if defined(DEBUG)
+#ifdef DEBUG
+ impTreeList = nullptr;
+ impTreeLast = nullptr;
impInlinedCodeSize = 0;
#endif
-
- seenConditionalJump = false;
}
/*****************************************************************************
@@ -600,13 +597,9 @@ inline void Compiler::impAppendStmt(GenTreePtr stmt, unsigned chkLevel)
// Assignment to (unaliased) locals don't count as a side-effect as
// we handle them specially using impSpillLclRefs(). Temp locals should
// be fine too.
- // TODO-1stClassStructs: The check below should apply equally to struct assignments,
- // but previously the block ops were always being marked GTF_GLOB_REF, even if
- // the operands could not be global refs.
if ((expr->gtOper == GT_ASG) && (expr->gtOp.gtOp1->gtOper == GT_LCL_VAR) &&
- !(expr->gtOp.gtOp1->gtFlags & GTF_GLOB_REF) && !gtHasLocalsWithAddrOp(expr->gtOp.gtOp2) &&
- !varTypeIsStruct(expr->gtOp.gtOp1))
+ !(expr->gtOp.gtOp1->gtFlags & GTF_GLOB_REF) && !gtHasLocalsWithAddrOp(expr->gtOp.gtOp2))
{
unsigned op2Flags = expr->gtOp.gtOp2->gtFlags & GTF_GLOB_EFFECT;
assert(flags == (op2Flags | GTF_ASG));
@@ -673,8 +666,6 @@ inline void Compiler::impAppendStmt(GenTreePtr stmt, unsigned chkLevel)
impMarkContiguousSIMDFieldAssignments(stmt);
#endif
-#ifdef DEBUGGING_SUPPORT
-
/* Once we set impCurStmtOffs in an appended tree, we are ready to
report the following offsets. So reset impCurStmtOffs */
@@ -683,8 +674,6 @@ inline void Compiler::impAppendStmt(GenTreePtr stmt, unsigned chkLevel)
impCurStmtOffsSet(BAD_IL_OFFSET);
}
-#endif
-
#ifdef DEBUG
if (impLastILoffsStmt == nullptr)
{
@@ -1143,9 +1132,13 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr destAddr,
if (destAddr->OperGet() == GT_ADDR)
{
GenTree* destNode = destAddr->gtGetOp1();
- // If the actual destination is already a block node, or is a node that
+ // If the actual destination is a local (for non-LEGACY_BACKEND), or already a block node, or is a node that
// will be morphed, don't insert an OBJ(ADDR).
- if (destNode->gtOper == GT_INDEX || destNode->OperIsBlk())
+ if (destNode->gtOper == GT_INDEX || destNode->OperIsBlk()
+#ifndef LEGACY_BACKEND
+ || ((destNode->OperGet() == GT_LCL_VAR) && (destNode->TypeGet() == src->TypeGet()))
+#endif // !LEGACY_BACKEND
+ )
{
dest = destNode;
}
@@ -1194,6 +1187,9 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr destAddr,
{
// Mark the struct LclVar as used in a MultiReg return context
// which currently makes it non promotable.
+ // TODO-1stClassStructs: Eliminate this pessimization when we can more generally
+ // handle multireg returns.
+ lcl->gtFlags |= GTF_DONT_CSE;
lvaTable[lcl->gtLclVarCommon.gtLclNum].lvIsMultiRegRet = true;
}
else // The call result is not a multireg return
@@ -1208,12 +1204,20 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr destAddr,
dest = lcl;
#if defined(_TARGET_ARM_)
+ // TODO-Cleanup: This should have been taken care of in the above HasMultiRegRetVal() case,
+ // but that method has not been updadted to include ARM.
impMarkLclDstNotPromotable(lcl->gtLclVarCommon.gtLclNum, src, structHnd);
+ lcl->gtFlags |= GTF_DONT_CSE;
#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
// Not allowed for FEATURE_CORCLR which is the only SKU available for System V OSs.
assert(!src->gtCall.IsVarargs() && "varargs not allowed for System V OSs.");
// Make the struct non promotable. The eightbytes could contain multiple fields.
+ // TODO-1stClassStructs: Eliminate this pessimization when we can more generally
+ // handle multireg returns.
+ // TODO-Cleanup: Why is this needed here? This seems that it will set this even for
+ // non-multireg returns.
+ lcl->gtFlags |= GTF_DONT_CSE;
lvaTable[lcl->gtLclVarCommon.gtLclNum].lvIsMultiRegRet = true;
#endif
}
@@ -1255,10 +1259,11 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr destAddr,
src->gtType = genActualType(returnType);
call->gtType = src->gtType;
- // 1stClassStructToDo: We shouldn't necessarily need this.
- if (dest != nullptr)
+ // If we've changed the type, and it no longer matches a local destination,
+ // we must use an indirection.
+ if ((dest != nullptr) && (dest->OperGet() == GT_LCL_VAR) && (dest->TypeGet() != asgType))
{
- dest = gtNewOperNode(GT_IND, returnType, gtNewOperNode(GT_ADDR, TYP_BYREF, dest));
+ dest = nullptr;
}
// !!! The destination could be on stack. !!!
@@ -1329,21 +1334,19 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr destAddr,
}
else if (src->IsLocal())
{
- // TODO-1stClassStructs: Eliminate this; it is only here to minimize diffs in the
- // initial implementation. Previously the source would have been under a GT_ADDR, which
- // would cause it to be marked GTF_DONT_CSE.
asgType = src->TypeGet();
- src->gtFlags |= GTF_DONT_CSE;
- if (asgType == TYP_STRUCT)
- {
- GenTree* srcAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, src);
- src = gtNewOperNode(GT_IND, TYP_STRUCT, srcAddr);
- }
}
else if (asgType == TYP_STRUCT)
{
asgType = impNormStructType(structHnd);
src->gtType = asgType;
+#ifdef LEGACY_BACKEND
+ if (asgType == TYP_STRUCT)
+ {
+ GenTree* srcAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, src);
+ src = gtNewOperNode(GT_IND, TYP_STRUCT, srcAddr);
+ }
+#endif
}
if (dest == nullptr)
{
@@ -1459,6 +1462,8 @@ GenTreePtr Compiler::impGetStructAddr(GenTreePtr structVal,
// into which the gcLayout will be written.
// pNumGCVars - (optional, default nullptr) - if non-null, a pointer to an unsigned,
// which will be set to the number of GC fields in the struct.
+// pSimdBaseType - (optional, default nullptr) - if non-null, and the struct is a SIMD
+// type, set to the SIMD base type
//
// Return Value:
// The JIT type for the struct (e.g. TYP_STRUCT, or TYP_SIMD*).
@@ -1480,53 +1485,69 @@ var_types Compiler::impNormStructType(CORINFO_CLASS_HANDLE structHnd,
var_types* pSimdBaseType)
{
assert(structHnd != NO_CLASS_HANDLE);
- unsigned originalSize = info.compCompHnd->getClassSize(structHnd);
- unsigned numGCVars = 0;
- var_types structType = TYP_STRUCT;
- var_types simdBaseType = TYP_UNKNOWN;
- bool definitelyHasGCPtrs = false;
-#ifdef FEATURE_SIMD
- // We don't want to consider this as a possible SIMD type if it has GC pointers.
- // (Saves querying about the SIMD assembly.)
- BYTE gcBytes[maxPossibleSIMDStructBytes / TARGET_POINTER_SIZE];
- if ((gcLayout == nullptr) && (originalSize >= minSIMDStructBytes()) && (originalSize <= maxSIMDStructBytes()))
- {
- gcLayout = gcBytes;
- }
-#endif // FEATURE_SIMD
+ const DWORD structFlags = info.compCompHnd->getClassAttribs(structHnd);
+ var_types structType = TYP_STRUCT;
+
+#ifdef FEATURE_CORECLR
+ const bool hasGCPtrs = (structFlags & CORINFO_FLG_CONTAINS_GC_PTR) != 0;
+#else
+ // Desktop CLR won't report FLG_CONTAINS_GC_PTR for RefAnyClass - need to check explicitly.
+ const bool isRefAny = (structHnd == impGetRefAnyClass());
+ const bool hasGCPtrs = isRefAny || ((structFlags & CORINFO_FLG_CONTAINS_GC_PTR) != 0);
+#endif
- if (gcLayout != nullptr)
- {
- numGCVars = info.compCompHnd->getClassGClayout(structHnd, gcLayout);
- definitelyHasGCPtrs = (numGCVars != 0);
- }
#ifdef FEATURE_SIMD
// Check to see if this is a SIMD type.
- if (featureSIMD && (originalSize <= getSIMDVectorRegisterByteLength()) && (originalSize >= TARGET_POINTER_SIZE) &&
- !definitelyHasGCPtrs)
+ if (featureSIMD && !hasGCPtrs)
{
- unsigned int sizeBytes;
- simdBaseType = getBaseTypeAndSizeOfSIMDType(structHnd, &sizeBytes);
- if (simdBaseType != TYP_UNKNOWN)
+ unsigned originalSize = info.compCompHnd->getClassSize(structHnd);
+
+ if ((originalSize >= minSIMDStructBytes()) && (originalSize <= maxSIMDStructBytes()))
{
- assert(sizeBytes == originalSize);
- structType = getSIMDTypeForSize(sizeBytes);
- if (pSimdBaseType != nullptr)
+ unsigned int sizeBytes;
+ var_types simdBaseType = getBaseTypeAndSizeOfSIMDType(structHnd, &sizeBytes);
+ if (simdBaseType != TYP_UNKNOWN)
{
- *pSimdBaseType = simdBaseType;
- }
+ assert(sizeBytes == originalSize);
+ structType = getSIMDTypeForSize(sizeBytes);
+ if (pSimdBaseType != nullptr)
+ {
+ *pSimdBaseType = simdBaseType;
+ }
#ifdef _TARGET_AMD64_
- // Amd64: also indicate that we use floating point registers
- compFloatingPointUsed = true;
+ // Amd64: also indicate that we use floating point registers
+ compFloatingPointUsed = true;
#endif
+ }
}
}
#endif // FEATURE_SIMD
- if (pNumGCVars != nullptr)
+
+ // Fetch GC layout info if requested
+ if (gcLayout != nullptr)
+ {
+ unsigned numGCVars = info.compCompHnd->getClassGClayout(structHnd, gcLayout);
+
+ // Verify that the quick test up above via the class attributes gave a
+ // safe view of the type's GCness.
+ //
+ // Note there are cases where hasGCPtrs is true but getClassGClayout
+ // does not report any gc fields.
+ assert(hasGCPtrs || (numGCVars == 0));
+
+ if (pNumGCVars != nullptr)
+ {
+ *pNumGCVars = numGCVars;
+ }
+ }
+ else
{
- *pNumGCVars = numGCVars;
+ // Can't safely ask for number of GC pointers without also
+ // asking for layout.
+ assert(pNumGCVars == nullptr);
}
+
return structType;
}
@@ -1777,15 +1798,19 @@ GenTreePtr Compiler::impReadyToRunLookupToTree(CORINFO_CONST_LOOKUP* pLookup,
unsigned handleFlags,
void* compileTimeHandle)
{
- CORINFO_GENERIC_HANDLE handle = 0;
- void* pIndirection = 0;
+ CORINFO_GENERIC_HANDLE handle = nullptr;
+ void* pIndirection = nullptr;
assert(pLookup->accessType != IAT_PPVALUE);
if (pLookup->accessType == IAT_VALUE)
+ {
handle = pLookup->handle;
+ }
else if (pLookup->accessType == IAT_PVALUE)
+ {
pIndirection = pLookup->addr;
- return gtNewIconEmbHndNode(handle, pIndirection, handleFlags, 0, 0, compileTimeHandle);
+ }
+ return gtNewIconEmbHndNode(handle, pIndirection, handleFlags, 0, nullptr, compileTimeHandle);
}
GenTreePtr Compiler::impReadyToRunHelperToTree(
@@ -1798,7 +1823,9 @@ GenTreePtr Compiler::impReadyToRunHelperToTree(
CORINFO_CONST_LOOKUP lookup;
#if COR_JIT_EE_VERSION > 460
if (!info.compCompHnd->getReadyToRunHelper(pResolvedToken, pGenericLookupKind, helper, &lookup))
- return NULL;
+ {
+ return nullptr;
+ }
#else
info.compCompHnd->getReadyToRunHelper(pResolvedToken, helper, &lookup);
#endif
@@ -1828,7 +1855,9 @@ GenTreePtr Compiler::impMethodPointer(CORINFO_RESOLVED_TOKEN* pResolvedToken, CO
*op1->gtFptrVal.gtLdftnResolvedToken = *pResolvedToken;
}
else
+ {
op1->gtFptrVal.gtEntryPoint.addr = nullptr;
+ }
#endif
break;
@@ -1852,6 +1881,46 @@ GenTreePtr Compiler::impMethodPointer(CORINFO_RESOLVED_TOKEN* pResolvedToken, CO
return op1;
}
+//------------------------------------------------------------------------
+// getRuntimeContextTree: find pointer to context for runtime lookup.
+//
+// Arguments:
+// kind - lookup kind.
+//
+// Return Value:
+// Return GenTree pointer to generic shared context.
+//
+// Notes:
+// Reports about generic context using.
+
+GenTreePtr Compiler::getRuntimeContextTree(CORINFO_RUNTIME_LOOKUP_KIND kind)
+{
+ GenTreePtr ctxTree = nullptr;
+
+ // Collectible types requires that for shared generic code, if we use the generic context parameter
+ // that we report it. (This is a conservative approach, we could detect some cases particularly when the
+ // context parameter is this that we don't need the eager reporting logic.)
+ lvaGenericsContextUsed = true;
+
+ if (kind == CORINFO_LOOKUP_THISOBJ)
+ {
+ // this Object
+ ctxTree = gtNewLclvNode(info.compThisArg, TYP_REF);
+
+ // Vtable pointer of this object
+ ctxTree = gtNewOperNode(GT_IND, TYP_I_IMPL, ctxTree);
+ ctxTree->gtFlags |= GTF_EXCEPT; // Null-pointer exception
+ ctxTree->gtFlags |= GTF_IND_INVARIANT;
+ }
+ else
+ {
+ assert(kind == CORINFO_LOOKUP_METHODPARAM || kind == CORINFO_LOOKUP_CLASSPARAM);
+
+ ctxTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL); // Exact method descriptor as passed in as last arg
+ }
+ return ctxTree;
+}
+
/*****************************************************************************/
/* Import a dictionary lookup to access a handle in code shared between
generic instantiations.
@@ -1874,36 +1943,12 @@ GenTreePtr Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedTok
CORINFO_LOOKUP* pLookup,
void* compileTimeHandle)
{
- CORINFO_RUNTIME_LOOKUP_KIND kind = pLookup->lookupKind.runtimeLookupKind;
- CORINFO_RUNTIME_LOOKUP* pRuntimeLookup = &pLookup->runtimeLookup;
// This method can only be called from the importer instance of the Compiler.
// In other word, it cannot be called by the instance of the Compiler for the inlinee.
assert(!compIsForInlining());
- GenTreePtr ctxTree;
-
- // Collectible types requires that for shared generic code, if we use the generic context parameter
- // that we report it. (This is a conservative approach, we could detect some cases particularly when the
- // context parameter is this that we don't need the eager reporting logic.)
- lvaGenericsContextUsed = true;
-
- if (kind == CORINFO_LOOKUP_THISOBJ)
- {
- // this Object
- ctxTree = gtNewLclvNode(info.compThisArg, TYP_REF);
-
- // Vtable pointer of this object
- ctxTree = gtNewOperNode(GT_IND, TYP_I_IMPL, ctxTree);
- ctxTree->gtFlags |= GTF_EXCEPT; // Null-pointer exception
- ctxTree->gtFlags |= GTF_IND_INVARIANT;
- }
- else
- {
- assert(kind == CORINFO_LOOKUP_METHODPARAM || kind == CORINFO_LOOKUP_CLASSPARAM);
-
- ctxTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL); // Exact method descriptor as passed in as last arg
- }
+ GenTreePtr ctxTree = getRuntimeContextTree(pLookup->lookupKind.runtimeLookupKind);
#ifdef FEATURE_READYTORUN_COMPILER
if (opts.IsReadyToRun())
@@ -1913,6 +1958,7 @@ GenTreePtr Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedTok
}
#endif
+ CORINFO_RUNTIME_LOOKUP* pRuntimeLookup = &pLookup->runtimeLookup;
// It's available only via the run-time helper function
if (pRuntimeLookup->indirections == CORINFO_USEHELPER)
{
@@ -2083,8 +2129,6 @@ bool Compiler::impSpillStackEntry(unsigned level,
guard.Init(&impNestedStackSpill, bAssertOnRecursion);
#endif
- assert(!fgGlobalMorph); // use impInlineSpillStackEntry() during inlining
-
GenTreePtr tree = verCurrentState.esStack[level].val;
/* Allocate a temp if we haven't been asked to use a particular one */
@@ -2179,8 +2223,6 @@ void Compiler::impSpillStackEnsure(bool spillLeaves)
void Compiler::impSpillEvalStack()
{
- assert(!fgGlobalMorph); // use impInlineSpillEvalStack() during inlining
-
for (unsigned level = 0; level < verCurrentState.esStackDepth; level++)
{
impSpillStackEntry(level, BAD_VAR_NUM DEBUGARG(false) DEBUGARG("impSpillEvalStack"));
@@ -2318,8 +2360,6 @@ Compiler::fgWalkResult Compiler::impFindValueClasses(GenTreePtr* pTree, fgWalkDa
void Compiler::impSpillLclRefs(ssize_t lclNum)
{
- assert(!fgGlobalMorph); // use impInlineSpillLclRefs() during inlining
-
/* Before we make any appends to the tree list we must spill the
* "special" side effects (GTF_ORDER_SIDEEFF) - GT_CATCH_ARG */
@@ -2676,7 +2716,6 @@ static inline bool impOpcodeIsCallOpcode(OPCODE opcode)
}
/*****************************************************************************/
-#ifdef DEBUGGING_SUPPORT
static inline bool impOpcodeIsCallSiteBoundary(OPCODE opcode)
{
@@ -2695,8 +2734,6 @@ static inline bool impOpcodeIsCallSiteBoundary(OPCODE opcode)
}
}
-#endif // DEBUGGING_SUPPORT
-
/*****************************************************************************/
// One might think it is worth caching these values, but results indicate
@@ -2816,27 +2853,6 @@ GenTreePtr Compiler::impImplicitR4orR8Cast(GenTreePtr tree, var_types dstTyp)
return tree;
}
-/*****************************************************************************/
-BOOL Compiler::impLocAllocOnStack()
-{
- if (!compLocallocUsed)
- {
- return (FALSE);
- }
-
- // Returns true if a GT_LCLHEAP node is encountered in any of the trees
- // that have been pushed on the importer evaluatuion stack.
- //
- for (unsigned i = 0; i < verCurrentState.esStackDepth; i++)
- {
- if (fgWalkTreePre(&verCurrentState.esStack[i].val, Compiler::fgChkLocAllocCB) == WALK_ABORT)
- {
- return (TRUE);
- }
- }
- return (FALSE);
-}
-
//------------------------------------------------------------------------
// impInitializeArrayIntrinsic: Attempts to replace a call to InitializeArray
// with a GT_COPYBLK node.
@@ -3236,7 +3252,7 @@ GenTreePtr Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
#if COR_JIT_EE_VERSION > 460
CorInfoIntrinsics intrinsicID = info.compCompHnd->getIntrinsicID(method, &mustExpand);
#else
- CorInfoIntrinsics intrinsicID = info.compCompHnd->getIntrinsicID(method);
+ CorInfoIntrinsics intrinsicID = info.compCompHnd->getIntrinsicID(method);
#endif
*pIntrinsicID = intrinsicID;
@@ -3307,9 +3323,9 @@ GenTreePtr Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
op1 = nullptr;
-#ifdef LEGACY_BACKEND
+#if defined(LEGACY_BACKEND)
if (IsTargetIntrinsic(intrinsicID))
-#else
+#elif !defined(_TARGET_X86_)
// Intrinsics that are not implemented directly by target instructions will
// be re-materialized as users calls in rationalizer. For prefixed tail calls,
// don't do this optimization, because
@@ -3317,6 +3333,11 @@ GenTreePtr Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
// b) It will be non-trivial task or too late to re-materialize a surviving
// tail prefixed GT_INTRINSIC as tail call in rationalizer.
if (!IsIntrinsicImplementedByUserCall(intrinsicID) || !tailCall)
+#else
+ // On x86 RyuJIT, importing intrinsics that are implemented as user calls can cause incorrect calculation
+ // of the depth of the stack if these intrinsics are used as arguments to another call. This causes bad
+ // code generation for certain EH constructs.
+ if (!IsIntrinsicImplementedByUserCall(intrinsicID))
#endif
{
switch (sig->numArgs)
@@ -3534,7 +3555,7 @@ GenTreePtr Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
// Get native TypeHandle argument to old helper
op1 = op1->gtCall.gtCallArgs;
- assert(op1->IsList());
+ assert(op1->OperIsList());
assert(op1->gtOp.gtOp2 == nullptr);
op1 = op1->gtOp.gtOp1;
retNode = op1;
@@ -3886,7 +3907,7 @@ void Compiler::verHandleVerificationFailure(BasicBlock* block DEBUGARG(bool logM
#endif // DEBUG
// Add the non verifiable flag to the compiler
- if ((opts.eeFlags & CORJIT_FLG_IMPORT_ONLY) != 0)
+ if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IMPORT_ONLY))
{
tiIsVerifiableCode = FALSE;
}
@@ -4913,14 +4934,26 @@ GenTreePtr Compiler::impImportLdvirtftn(GenTreePtr thisPtr,
}
#ifdef FEATURE_READYTORUN_COMPILER
- if (opts.IsReadyToRun() && !pCallInfo->exactContextNeedsRuntimeLookup)
+ if (opts.IsReadyToRun())
{
- GenTreeCall* call = gtNewHelperCallNode(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR, TYP_I_IMPL, GTF_EXCEPT,
- gtNewArgList(thisPtr));
+ if (!pCallInfo->exactContextNeedsRuntimeLookup)
+ {
+ GenTreeCall* call = gtNewHelperCallNode(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR, TYP_I_IMPL, GTF_EXCEPT,
+ gtNewArgList(thisPtr));
- call->setEntryPoint(pCallInfo->codePointerLookup.constLookup);
+ call->setEntryPoint(pCallInfo->codePointerLookup.constLookup);
- return call;
+ return call;
+ }
+
+ // We need a runtime lookup. CoreRT has a ReadyToRun helper for that too.
+ if (IsTargetAbi(CORINFO_CORERT_ABI))
+ {
+ GenTreePtr ctxTree = getRuntimeContextTree(pCallInfo->codePointerLookup.lookupKind.runtimeLookupKind);
+
+ return impReadyToRunHelperToTree(pResolvedToken, CORINFO_HELP_READYTORUN_GENERIC_HANDLE, TYP_I_IMPL,
+ gtNewArgList(ctxTree), &pCallInfo->codePointerLookup.lookupKind);
+ }
}
#endif
@@ -5001,7 +5034,7 @@ void Compiler::impImportAndPushBox(CORINFO_RESOLVED_TOKEN* pResolvedToken)
if (opts.IsReadyToRun())
{
op1 = impReadyToRunHelperToTree(pResolvedToken, CORINFO_HELP_READYTORUN_NEW, TYP_REF);
- usingReadyToRunHelper = (op1 != NULL);
+ usingReadyToRunHelper = (op1 != nullptr);
}
if (!usingReadyToRunHelper)
@@ -5150,7 +5183,7 @@ void Compiler::impImportNewObjArray(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORI
CLANG_FORMAT_COMMENT_ANCHOR;
#if COR_JIT_EE_VERSION > 460
- if (!opts.IsReadyToRun() || (eeGetEEInfo()->targetAbi == CORINFO_CORERT_ABI))
+ if (!opts.IsReadyToRun() || IsTargetAbi(CORINFO_CORERT_ABI))
{
LclVarDsc* newObjArrayArgsVar;
@@ -5325,61 +5358,110 @@ GenTreePtr Compiler::impTransformThis(GenTreePtr thisPtr,
}
}
-bool Compiler::impCanPInvokeInline(var_types callRetTyp)
+//------------------------------------------------------------------------
+// impCanPInvokeInline: examine information from a call to see if the call
+// qualifies as an inline pinvoke.
+//
+// Arguments:
+// block - block contaning the call, or for inlinees, block
+// containing the call being inlined
+//
+// Return Value:
+// true if this call qualifies as an inline pinvoke, false otherwise
+//
+// Notes:
+// Checks basic legality and then a number of ambient conditions
+// where we could pinvoke but choose not to
+
+bool Compiler::impCanPInvokeInline(BasicBlock* block)
{
- return impCanPInvokeInlineCallSite(callRetTyp) && getInlinePInvokeEnabled() && (!opts.compDbgCode) &&
+ return impCanPInvokeInlineCallSite(block) && getInlinePInvokeEnabled() && (!opts.compDbgCode) &&
(compCodeOpt() != SMALL_CODE) && (!opts.compNoPInvokeInlineCB) // profiler is preventing inline pinvoke
;
}
-// Returns false only if the callsite really cannot be inlined. Ignores global variables
-// like debugger, profiler etc.
-bool Compiler::impCanPInvokeInlineCallSite(var_types callRetTyp)
+//------------------------------------------------------------------------
+// impCanPInvokeInlineSallSite: basic legality checks using information
+// from a call to see if the call qualifies as an inline pinvoke.
+//
+// Arguments:
+// block - block contaning the call, or for inlinees, block
+// containing the call being inlined
+//
+// Return Value:
+// true if this call can legally qualify as an inline pinvoke, false otherwise
+//
+// Notes:
+// For runtimes that support exception handling interop there are
+// restrictions on using inline pinvoke in handler regions.
+//
+// * We have to disable pinvoke inlining inside of filters because
+// in case the main execution (i.e. in the try block) is inside
+// unmanaged code, we cannot reuse the inlined stub (we still need
+// the original state until we are in the catch handler)
+//
+// * We disable pinvoke inlining inside handlers since the GSCookie
+// is in the inlined Frame (see
+// CORINFO_EE_INFO::InlinedCallFrameInfo::offsetOfGSCookie), but
+// this would not protect framelets/return-address of handlers.
+//
+// These restrictions are currently also in place for CoreCLR but
+// can be relaxed when coreclr/#8459 is addressed.
+
+bool Compiler::impCanPInvokeInlineCallSite(BasicBlock* block)
{
- return
- // We have to disable pinvoke inlining inside of filters
- // because in case the main execution (i.e. in the try block) is inside
- // unmanaged code, we cannot reuse the inlined stub (we still need the
- // original state until we are in the catch handler)
- (!bbInFilterILRange(compCurBB)) &&
- // We disable pinvoke inlining inside handlers since the GSCookie is
- // in the inlined Frame (see CORINFO_EE_INFO::InlinedCallFrameInfo::offsetOfGSCookie),
- // but this would not protect framelets/return-address of handlers.
- !compCurBB->hasHndIndex() &&
#ifdef _TARGET_AMD64_
- // Turns out JIT64 doesn't perform PInvoke inlining inside try regions, here's an excerpt of
- // the comment from JIT64 explaining why:
- //
- //// [VSWhidbey: 611015] - because the jitted code links in the Frame (instead
- //// of the stub) we rely on the Frame not being 'active' until inside the
- //// stub. This normally happens by the stub setting the return address
- //// pointer in the Frame object inside the stub. On a normal return, the
- //// return address pointer is zeroed out so the Frame can be safely re-used,
- //// but if an exception occurs, nobody zeros out the return address pointer.
- //// Thus if we re-used the Frame object, it would go 'active' as soon as we
- //// link it into the Frame chain.
- ////
- //// Technically we only need to disable PInvoke inlining if we're in a
- //// handler or if we're
- //// in a try body with a catch or filter/except where other non-handler code
- //// in this method might run and try to re-use the dirty Frame object.
- //
- // Now, because of this, the VM actually assumes that in 64 bit we never PInvoke
- // inline calls on any EH construct, you can verify that on VM\ExceptionHandling.cpp:203
- // The method responsible for resuming execution is UpdateObjectRefInResumeContextCallback
- // you can see how it aligns with JIT64 policy of not inlining PInvoke calls almost right
- // at the beginning of the body of the method.
- !compCurBB->hasTryIndex() &&
-#endif
- (!impLocAllocOnStack()) && (callRetTyp != TYP_STRUCT);
+ // On x64, we disable pinvoke inlining inside of try regions.
+ // Here is the comment from JIT64 explaining why:
+ //
+ // [VSWhidbey: 611015] - because the jitted code links in the
+ // Frame (instead of the stub) we rely on the Frame not being
+ // 'active' until inside the stub. This normally happens by the
+ // stub setting the return address pointer in the Frame object
+ // inside the stub. On a normal return, the return address
+ // pointer is zeroed out so the Frame can be safely re-used, but
+ // if an exception occurs, nobody zeros out the return address
+ // pointer. Thus if we re-used the Frame object, it would go
+ // 'active' as soon as we link it into the Frame chain.
+ //
+ // Technically we only need to disable PInvoke inlining if we're
+ // in a handler or if we're in a try body with a catch or
+ // filter/except where other non-handler code in this method
+ // might run and try to re-use the dirty Frame object.
+ //
+ // A desktop test case where this seems to matter is
+ // jit\jit64\ebvts\mcpp\sources2\ijw\__clrcall\vector_ctor_dtor.02\deldtor_clr.exe
+ const bool inX64Try = block->hasTryIndex();
+#else
+ const bool inX64Try = false;
+#endif // _TARGET_AMD64_
+
+ return !inX64Try && !block->hasHndIndex();
}
-void Compiler::impCheckForPInvokeCall(GenTreePtr call,
- CORINFO_METHOD_HANDLE methHnd,
- CORINFO_SIG_INFO* sig,
- unsigned mflags)
+//------------------------------------------------------------------------
+// impCheckForPInvokeCall examine call to see if it is a pinvoke and if so
+// if it can be expressed as an inline pinvoke.
+//
+// Arguments:
+// call - tree for the call
+// methHnd - handle for the method being called (may be null)
+// sig - signature of the method being called
+// mflags - method flags for the method being called
+// block - block contaning the call, or for inlinees, block
+// containing the call being inlined
+//
+// Notes:
+// Sets GTF_CALL_M_PINVOKE on the call for pinvokes.
+//
+// Also sets GTF_CALL_UNMANAGED on call for inline pinvokes if the
+// call passes a combination of legality and profitabilty checks.
+//
+// If GTF_CALL_UNMANAGED is set, increments info.compCallUnmanaged
+
+void Compiler::impCheckForPInvokeCall(
+ GenTreePtr call, CORINFO_METHOD_HANDLE methHnd, CORINFO_SIG_INFO* sig, unsigned mflags, BasicBlock* block)
{
- var_types callRetTyp = JITtype2varType(sig->retType);
CorInfoUnmanagedCallConv unmanagedCallConv;
// If VM flagged it as Pinvoke, flag the call node accordingly
@@ -5422,15 +5504,12 @@ void Compiler::impCheckForPInvokeCall(GenTreePtr call,
if (opts.compMustInlinePInvokeCalli && methHnd == nullptr)
{
-#ifdef _TARGET_X86_
- // CALLI in IL stubs must be inlined
- assert(impCanPInvokeInlineCallSite(callRetTyp));
- assert(!info.compCompHnd->pInvokeMarshalingRequired(methHnd, sig));
-#endif // _TARGET_X86_
+ // Always inline pinvoke.
}
else
{
- if (!impCanPInvokeInline(callRetTyp))
+ // Check legality and profitability.
+ if (!impCanPInvokeInline(block))
{
return;
}
@@ -5439,6 +5518,14 @@ void Compiler::impCheckForPInvokeCall(GenTreePtr call,
{
return;
}
+
+ // Size-speed tradeoff: don't use inline pinvoke at rarely
+ // executed call sites. The non-inline version is more
+ // compact.
+ if (block->isRunRarely())
+ {
+ return;
+ }
}
JITLOG((LL_INFO1000000, "\nInline a CALLI PINVOKE call from method %s", info.compFullName));
@@ -5446,8 +5533,6 @@ void Compiler::impCheckForPInvokeCall(GenTreePtr call,
call->gtFlags |= GTF_CALL_UNMANAGED;
info.compCallUnmanaged++;
- assert(!compIsForInlining());
-
// AMD64 convention is same for native and managed
if (unmanagedCallConv == CORINFO_UNMANAGED_CALLCONV_C)
{
@@ -5736,6 +5821,7 @@ GenTreePtr Compiler::impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN* pResolve
break;
case CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER:
+ {
#ifdef FEATURE_READYTORUN_COMPILER
if (opts.IsReadyToRun())
{
@@ -5762,8 +5848,39 @@ GenTreePtr Compiler::impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN* pResolve
new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, pFieldInfo->offset, fs));
}
break;
+ }
+#if COR_JIT_EE_VERSION > 460
+ case CORINFO_FIELD_STATIC_READYTORUN_HELPER:
+ {
+#ifdef FEATURE_READYTORUN_COMPILER
+ noway_assert(opts.IsReadyToRun());
+ CORINFO_LOOKUP_KIND kind = info.compCompHnd->getLocationOfThisType(info.compMethodHnd);
+ assert(kind.needsRuntimeLookup);
+
+ GenTreePtr ctxTree = getRuntimeContextTree(kind.runtimeLookupKind);
+ GenTreeArgList* args = gtNewArgList(ctxTree);
+
+ unsigned callFlags = 0;
+
+ if (info.compCompHnd->getClassAttribs(pResolvedToken->hClass) & CORINFO_FLG_BEFOREFIELDINIT)
+ {
+ callFlags |= GTF_CALL_HOISTABLE;
+ }
+ var_types type = TYP_BYREF;
+ op1 = gtNewHelperCallNode(CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE, type, callFlags, args);
+ op1->gtCall.setEntryPoint(pFieldInfo->fieldLookup);
+ FieldSeqNode* fs = GetFieldSeqStore()->CreateSingleton(pResolvedToken->hField);
+ op1 = gtNewOperNode(GT_ADD, type, op1,
+ new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, pFieldInfo->offset, fs));
+#else
+ unreached();
+#endif // FEATURE_READYTORUN_COMPILER
+ }
+ break;
+#endif // COR_JIT_EE_VERSION > 460
default:
+ {
if (!(access & CORINFO_ACCESS_ADDRESS))
{
// In future, it may be better to just create the right tree here instead of folding it later.
@@ -5820,6 +5937,7 @@ GenTreePtr Compiler::impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN* pResolve
}
}
break;
+ }
}
if (pFieldInfo->fieldFlags & CORINFO_FLG_FIELD_STATIC_IN_HEAP)
@@ -6071,7 +6189,7 @@ bool Compiler::impIsTailCallILPattern(bool tailPrefixed,
((nextOpcode == CEE_NOP) || ((nextOpcode == CEE_POP) && (++cntPop == 1)))); // Next opcode = nop or exactly
// one pop seen so far.
#else
- nextOpcode = (OPCODE)getU1LittleEndian(codeAddrOfNextOpcode);
+ nextOpcode = (OPCODE)getU1LittleEndian(codeAddrOfNextOpcode);
#endif
if (isCallPopAndRet)
@@ -6845,9 +6963,15 @@ var_types Compiler::impImportCall(OPCODE opcode,
//--------------------------- Inline NDirect ------------------------------
- if (!compIsForInlining())
+ // For inline cases we technically should look at both the current
+ // block and the call site block (or just the latter if we've
+ // fused the EH trees). However the block-related checks pertain to
+ // EH and we currently won't inline a method with EH. So for
+ // inlinees, just checking the call site block is sufficient.
{
- impCheckForPInvokeCall(call, methHnd, sig, mflags);
+ // New lexical block here to avoid compilation errors because of GOTOs.
+ BasicBlock* block = compIsForInlining() ? impInlineInfo->iciBlock : compCurBB;
+ impCheckForPInvokeCall(call, methHnd, sig, mflags, block);
}
if (call->gtFlags & GTF_CALL_UNMANAGED)
@@ -7035,7 +7159,7 @@ var_types Compiler::impImportCall(OPCODE opcode,
{
instParam =
impReadyToRunLookupToTree(&callInfo->instParamLookup, GTF_ICON_CLASS_HDL, exactClassHandle);
- if (instParam == NULL)
+ if (instParam == nullptr)
{
return callRetTyp;
}
@@ -7452,10 +7576,6 @@ DONE_CALL:
{
call = impFixupCallStructReturn(call, sig->retTypeClass);
}
- else if (varTypeIsLong(callRetTyp))
- {
- call = impInitCallLongReturn(call);
- }
if ((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0)
{
@@ -7467,6 +7587,13 @@ DONE_CALL:
// TODO: Still using the widened type.
call = gtNewInlineCandidateReturnExpr(call, genActualType(callRetTyp));
}
+ else
+ {
+ // For non-candidates we must also spill, since we
+ // might have locals live on the eval stack that this
+ // call can modify.
+ impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("non-inline candidate call"));
+ }
}
if (!bIntrinsicImported)
@@ -7738,42 +7865,6 @@ GenTreePtr Compiler::impFixupCallStructReturn(GenTreePtr call, CORINFO_CLASS_HAN
return call;
}
-//-------------------------------------------------------------------------------------
-// impInitCallLongReturn:
-// Initialize the ReturnTypDesc for a call that returns a TYP_LONG
-//
-// Arguments:
-// call - GT_CALL GenTree node
-//
-// Return Value:
-// Returns new GenTree node after initializing the ReturnTypeDesc of call node
-//
-GenTreePtr Compiler::impInitCallLongReturn(GenTreePtr call)
-{
- assert(call->gtOper == GT_CALL);
-
-#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
- // LEGACY_BACKEND does not use multi reg returns for calls with long return types
-
- if (varTypeIsLong(call))
- {
- GenTreeCall* callNode = call->AsCall();
-
- // The return type will remain as the incoming long type
- callNode->gtReturnType = call->gtType;
-
- // Initialize Return type descriptor of call node
- ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
- retTypeDesc->InitializeLongReturnType(this);
-
- // must be a long returned in two registers
- assert(retTypeDesc->GetReturnRegCount() == 2);
- }
-#endif // _TARGET_X86_ && !LEGACY_BACKEND
-
- return call;
-}
-
/*****************************************************************************
For struct return values, re-type the operand in the case where the ABI
does not use a struct return buffer
@@ -7804,6 +7895,9 @@ GenTreePtr Compiler::impFixupStructReturnType(GenTreePtr op, CORINFO_CLASS_HANDL
unsigned lclNum = op->gtLclVarCommon.gtLclNum;
lvaTable[lclNum].lvIsMultiRegRet = true;
+ // TODO-1stClassStructs: Handle constant propagation and CSE-ing of multireg returns.
+ op->gtFlags |= GTF_DONT_CSE;
+
return op;
}
@@ -7828,6 +7922,10 @@ GenTreePtr Compiler::impFixupStructReturnType(GenTreePtr op, CORINFO_CLASS_HANDL
unsigned lclNum = op->gtLclVarCommon.gtLclNum;
// Make sure this struct type stays as struct so that we can return it as an HFA
lvaTable[lclNum].lvIsMultiRegRet = true;
+
+ // TODO-1stClassStructs: Handle constant propagation and CSE-ing of multireg returns.
+ op->gtFlags |= GTF_DONT_CSE;
+
return op;
}
@@ -7860,6 +7958,10 @@ GenTreePtr Compiler::impFixupStructReturnType(GenTreePtr op, CORINFO_CLASS_HANDL
// Make sure this struct type is not struct promoted
lvaTable[lclNum].lvIsMultiRegRet = true;
+
+ // TODO-1stClassStructs: Handle constant propagation and CSE-ing of multireg returns.
+ op->gtFlags |= GTF_DONT_CSE;
+
return op;
}
@@ -9311,8 +9413,6 @@ void Compiler::impImportBlockCode(BasicBlock* block)
opcodeOffs = (IL_OFFSET)(codeAddr - info.compCode);
-#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
-
#ifndef DEBUG
if (opts.compDbgInfo)
#endif
@@ -9424,8 +9524,6 @@ void Compiler::impImportBlockCode(BasicBlock* block)
}
}
-#endif // defined(DEBUGGING_SUPPORT) || defined(DEBUG)
-
CORINFO_CLASS_HANDLE clsHnd = DUMMY_INIT(NULL);
CORINFO_CLASS_HANDLE ldelemClsHnd = DUMMY_INIT(NULL);
CORINFO_CLASS_HANDLE stelemClsHnd = DUMMY_INIT(NULL);
@@ -9515,6 +9613,14 @@ void Compiler::impImportBlockCode(BasicBlock* block)
SPILL_APPEND:
+ // We need to call impSpillLclRefs() for a struct type lclVar.
+ // This is done for non-block assignments in the handling of stloc.
+ if ((op1->OperGet() == GT_ASG) && varTypeIsStruct(op1->gtOp.gtOp1) &&
+ (op1->gtOp.gtOp1->gtOper == GT_LCL_VAR))
+ {
+ impSpillLclRefs(op1->gtOp.gtOp1->AsLclVarCommon()->gtLclNum);
+ }
+
/* Append 'op1' to the list of statements */
impAppendTree(op1, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
goto DONE_APPEND;
@@ -11087,8 +11193,6 @@ void Compiler::impImportBlockCode(BasicBlock* block)
COND_JUMP:
- seenConditionalJump = true;
-
/* Fold comparison if we can */
op1 = gtFoldExpr(op1);
@@ -12328,14 +12432,12 @@ void Compiler::impImportBlockCode(BasicBlock* block)
// At present this can only be String
else if (clsFlags & CORINFO_FLG_VAROBJSIZE)
{
-#if COR_JIT_EE_VERSION > 460
- if (eeGetEEInfo()->targetAbi == CORINFO_CORERT_ABI)
+ if (IsTargetAbi(CORINFO_CORERT_ABI))
{
// The dummy argument does not exist in CoreRT
newObjThisPtr = nullptr;
}
else
-#endif
{
// This is the case for variable-sized objects that are not
// arrays. In this case, call the constructor with a null 'this'
@@ -12368,6 +12470,33 @@ void Compiler::impImportBlockCode(BasicBlock* block)
// The lookup of the code pointer will be handled by CALL in this case
if (clsFlags & CORINFO_FLG_VALUECLASS)
{
+ if (compIsForInlining())
+ {
+ // If value class has GC fields, inform the inliner. It may choose to
+ // bail out on the inline.
+ DWORD typeFlags = info.compCompHnd->getClassAttribs(resolvedToken.hClass);
+ if ((typeFlags & CORINFO_FLG_CONTAINS_GC_PTR) != 0)
+ {
+ compInlineResult->Note(InlineObservation::CALLEE_HAS_GC_STRUCT);
+ if (compInlineResult->IsFailure())
+ {
+ return;
+ }
+
+ // Do further notification in the case where the call site is rare;
+ // some policies do not track the relative hotness of call sites for
+ // "always" inline cases.
+ if (impInlineInfo->iciBlock->isRunRarely())
+ {
+ compInlineResult->Note(InlineObservation::CALLSITE_RARE_GC_STRUCT);
+ if (compInlineResult->IsFailure())
+ {
+ return;
+ }
+ }
+ }
+ }
+
CorInfoType jitTyp = info.compCompHnd->asCorInfoType(resolvedToken.hClass);
unsigned size = info.compCompHnd->getClassSize(resolvedToken.hClass);
@@ -12403,7 +12532,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
if (opts.IsReadyToRun())
{
op1 = impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_NEW, TYP_REF);
- usingReadyToRunHelper = (op1 != NULL);
+ usingReadyToRunHelper = (op1 != nullptr);
}
if (!usingReadyToRunHelper)
@@ -12503,6 +12632,10 @@ void Compiler::impImportBlockCode(BasicBlock* block)
if (compIsForInlining())
{
+ if (compDonotInline())
+ {
+ return;
+ }
// We rule out inlinees with explicit tail calls in fgMakeBasicBlocks.
assert((prefixFlags & PREFIX_TAILCALL_EXPLICIT) == 0);
}
@@ -12696,7 +12829,9 @@ void Compiler::impImportBlockCode(BasicBlock* block)
return;
case CORINFO_FIELD_STATIC_GENERICS_STATIC_HELPER:
-
+#if COR_JIT_EE_VERSION > 460
+ case CORINFO_FIELD_STATIC_READYTORUN_HELPER:
+#endif
/* We may be able to inline the field accessors in specific instantiations of generic
* methods */
compInlineResult->NoteFatal(InlineObservation::CALLSITE_LDFLD_NEEDS_HELPER);
@@ -12828,7 +12963,9 @@ void Compiler::impImportBlockCode(BasicBlock* block)
#ifdef FEATURE_READYTORUN_COMPILER
if (fieldInfo.fieldAccessor == CORINFO_FIELD_INSTANCE_WITH_BASE)
+ {
op1->gtField.gtFieldLookup = fieldInfo.fieldLookup;
+ }
#endif
op1->gtFlags |= (obj->gtFlags & GTF_GLOB_EFFECT);
@@ -12925,6 +13062,9 @@ void Compiler::impImportBlockCode(BasicBlock* block)
case CORINFO_FIELD_STATIC_RVA_ADDRESS:
case CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER:
case CORINFO_FIELD_STATIC_GENERICS_STATIC_HELPER:
+#if COR_JIT_EE_VERSION > 460
+ case CORINFO_FIELD_STATIC_READYTORUN_HELPER:
+#endif
op1 = impImportStaticFieldAccess(&resolvedToken, (CORINFO_ACCESS_FLAGS)aflags, &fieldInfo,
lclTyp);
break;
@@ -13068,6 +13208,9 @@ void Compiler::impImportBlockCode(BasicBlock* block)
return;
case CORINFO_FIELD_STATIC_GENERICS_STATIC_HELPER:
+#if COR_JIT_EE_VERSION > 460
+ case CORINFO_FIELD_STATIC_READYTORUN_HELPER:
+#endif
/* We may be able to inline the field accessors in specific instantiations of generic
* methods */
@@ -13134,7 +13277,9 @@ void Compiler::impImportBlockCode(BasicBlock* block)
#ifdef FEATURE_READYTORUN_COMPILER
if (fieldInfo.fieldAccessor == CORINFO_FIELD_INSTANCE_WITH_BASE)
+ {
op1->gtField.gtFieldLookup = fieldInfo.fieldLookup;
+ }
#endif
op1->gtFlags |= (obj->gtFlags & GTF_GLOB_EFFECT);
@@ -13185,6 +13330,9 @@ void Compiler::impImportBlockCode(BasicBlock* block)
case CORINFO_FIELD_STATIC_RVA_ADDRESS:
case CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER:
case CORINFO_FIELD_STATIC_GENERICS_STATIC_HELPER:
+#if COR_JIT_EE_VERSION > 460
+ case CORINFO_FIELD_STATIC_READYTORUN_HELPER:
+#endif
op1 = impImportStaticFieldAccess(&resolvedToken, (CORINFO_ACCESS_FLAGS)aflags, &fieldInfo,
lclTyp);
break;
@@ -13376,7 +13524,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
{
op1 = impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_NEWARR_1, TYP_REF,
gtNewArgList(op2));
- usingReadyToRunHelper = (op1 != NULL);
+ usingReadyToRunHelper = (op1 != nullptr);
if (!usingReadyToRunHelper)
{
@@ -13388,9 +13536,11 @@ void Compiler::impImportBlockCode(BasicBlock* block)
// Reason: performance (today, we'll always use the slow helper for the R2R generics case)
// Need to restore array classes before creating array objects on the heap
- op1 = impTokenToHandle(&resolvedToken, NULL, TRUE /*mustRestoreHandle*/);
- if (op1 == NULL) // compDonotInline()
+ op1 = impTokenToHandle(&resolvedToken, nullptr, TRUE /*mustRestoreHandle*/);
+ if (op1 == nullptr)
+ { // compDonotInline()
return;
+ }
}
}
@@ -13498,7 +13648,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
GenTreePtr opLookup =
impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_ISINSTANCEOF, TYP_REF,
gtNewArgList(op1));
- usingReadyToRunHelper = (opLookup != NULL);
+ usingReadyToRunHelper = (opLookup != nullptr);
op1 = (usingReadyToRunHelper ? opLookup : op1);
if (!usingReadyToRunHelper)
@@ -13510,9 +13660,11 @@ void Compiler::impImportBlockCode(BasicBlock* block)
// 3) Perform the 'is instance' check on the input object
// Reason: performance (today, we'll always use the slow helper for the R2R generics case)
- op2 = impTokenToHandle(&resolvedToken, NULL, FALSE);
- if (op2 == NULL) // compDonotInline()
+ op2 = impTokenToHandle(&resolvedToken, nullptr, FALSE);
+ if (op2 == nullptr)
+ { // compDonotInline()
return;
+ }
}
}
@@ -14026,7 +14178,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
{
GenTreePtr opLookup = impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_CHKCAST,
TYP_REF, gtNewArgList(op1));
- usingReadyToRunHelper = (opLookup != NULL);
+ usingReadyToRunHelper = (opLookup != nullptr);
op1 = (usingReadyToRunHelper ? opLookup : op1);
if (!usingReadyToRunHelper)
@@ -14038,9 +14190,11 @@ void Compiler::impImportBlockCode(BasicBlock* block)
// 3) Check the object on the stack for the type-cast
// Reason: performance (today, we'll always use the slow helper for the R2R generics case)
- op2 = impTokenToHandle(&resolvedToken, NULL, FALSE);
- if (op2 == NULL) // compDonotInline()
+ op2 = impTokenToHandle(&resolvedToken, nullptr, FALSE);
+ if (op2 == nullptr)
+ { // compDonotInline()
return;
+ }
}
}
@@ -14075,20 +14229,6 @@ void Compiler::impImportBlockCode(BasicBlock* block)
compInlineResult->NoteFatal(InlineObservation::CALLEE_THROW_WITH_INVALID_STACK);
return;
}
-
- /* Don't inline non-void conditionals that have a throw in one of the branches */
-
- /* NOTE: If we do allow this, note that we can't simply do a
- checkLiveness() to match the liveness at the end of the "then"
- and "else" branches of the GT_COLON. The branch with the throw
- will keep nothing live, so we should use the liveness at the
- end of the non-throw branch. */
-
- if (seenConditionalJump && (impInlineInfo->inlineCandidateInfo->fncRetType != TYP_VOID))
- {
- compInlineResult->NoteFatal(InlineObservation::CALLSITE_CONDITIONAL_THROW);
- return;
- }
}
if (tiVerificationNeeded)
@@ -14714,6 +14854,10 @@ GenTreePtr Compiler::impAssignMultiRegTypeToVar(GenTreePtr op, CORINFO_CLASS_HAN
unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for multireg return."));
impAssignTempGen(tmpNum, op, hClass, (unsigned)CHECK_SPILL_NONE);
GenTreePtr ret = gtNewLclvNode(tmpNum, op->gtType);
+
+ // TODO-1stClassStructs: Handle constant propagation and CSE-ing of multireg returns.
+ ret->gtFlags |= GTF_DONT_CSE;
+
assert(IsMultiRegReturnedType(hClass));
// Mark the var so that fields are not promoted and stay together.
@@ -14852,7 +14996,8 @@ bool Compiler::impReturnInstruction(BasicBlock* block, int prefixFlags, OPCODE&
if (lvaInlineeReturnSpillTemp != BAD_VAR_NUM)
{
- assert(info.compRetNativeType != TYP_VOID && fgMoreThanOneReturnBlock());
+ assert(info.compRetNativeType != TYP_VOID &&
+ (fgMoreThanOneReturnBlock() || impInlineInfo->hasPinnedLocals));
// This is a bit of a workaround...
// If we are inlining a call that returns a struct, where the actual "native" return type is
@@ -14943,7 +15088,7 @@ bool Compiler::impReturnInstruction(BasicBlock* block, int prefixFlags, OPCODE&
// in this case we have to insert multiple struct copies to the temp
// and the retexpr is just the temp.
assert(info.compRetNativeType != TYP_VOID);
- assert(fgMoreThanOneReturnBlock());
+ assert(fgMoreThanOneReturnBlock() || impInlineInfo->hasPinnedLocals);
impAssignTempGen(lvaInlineeReturnSpillTemp, op2, se.seTypeInfo.GetClassHandle(),
(unsigned)CHECK_SPILL_ALL);
@@ -16469,7 +16614,7 @@ void Compiler::impImport(BasicBlock* method)
// coupled with the JIT64 IL Verification logic. Look inside verHandleVerificationFailure
// method for further explanation on why we raise this exception instead of making the jitted
// code throw the verification exception during execution.
- if (tiVerificationNeeded && (opts.eeFlags & CORJIT_FLG_IMPORT_ONLY) != 0)
+ if (tiVerificationNeeded && opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IMPORT_ONLY))
{
BADCODE("Basic block marked as not verifiable");
}
@@ -16989,18 +17134,10 @@ void Compiler::impInlineRecordArgInfo(InlineInfo* pInlineInfo,
#endif // FEATURE_SIMD
}
- if (curArgVal->gtFlags & GTF_ORDER_SIDEEFF)
- {
- // Right now impInlineSpillLclRefs and impInlineSpillGlobEffects don't take
- // into account special side effects, so we disallow them during inlining.
- inlineResult->NoteFatal(InlineObservation::CALLSITE_ARG_HAS_SIDE_EFFECT);
- return;
- }
-
- if (curArgVal->gtFlags & GTF_GLOB_EFFECT)
+ if (curArgVal->gtFlags & GTF_ALL_EFFECT)
{
inlCurArgInfo->argHasGlobRef = (curArgVal->gtFlags & GTF_GLOB_REF) != 0;
- inlCurArgInfo->argHasSideEff = (curArgVal->gtFlags & GTF_SIDE_EFFECT) != 0;
+ inlCurArgInfo->argHasSideEff = (curArgVal->gtFlags & (GTF_ALL_EFFECT & ~GTF_GLOB_REF)) != 0;
}
if (curArgVal->gtOper == GT_LCL_VAR)
@@ -17251,6 +17388,7 @@ void Compiler::impInlineInitVars(InlineInfo* pInlineInfo)
var_types sigType = (var_types)eeGetArgType(argLst, &methInfo->args);
lclVarInfo[i].lclVerTypeInfo = verParseArgSigToTypeInfo(&methInfo->args, argLst);
+
#ifdef FEATURE_SIMD
if ((!foundSIMDType || (sigType == TYP_STRUCT)) && isSIMDClass(&(lclVarInfo[i].lclVerTypeInfo)))
{
@@ -17377,16 +17515,49 @@ void Compiler::impInlineInitVars(InlineInfo* pInlineInfo)
var_types type = (var_types)eeGetArgType(localsSig, &methInfo->locals, &isPinned);
lclVarInfo[i + argCnt].lclHasLdlocaOp = false;
+ lclVarInfo[i + argCnt].lclIsPinned = isPinned;
lclVarInfo[i + argCnt].lclTypeInfo = type;
if (isPinned)
{
- inlineResult->NoteFatal(InlineObservation::CALLEE_HAS_PINNED_LOCALS);
- return;
+ // Pinned locals may cause inlines to fail.
+ inlineResult->Note(InlineObservation::CALLEE_HAS_PINNED_LOCALS);
+ if (inlineResult->IsFailure())
+ {
+ return;
+ }
}
lclVarInfo[i + argCnt].lclVerTypeInfo = verParseArgSigToTypeInfo(&methInfo->locals, localsSig);
+ // If this local is a struct type with GC fields, inform the inliner. It may choose to bail
+ // out on the inline.
+ if (type == TYP_STRUCT)
+ {
+ CORINFO_CLASS_HANDLE lclHandle = lclVarInfo[i + argCnt].lclVerTypeInfo.GetClassHandle();
+ DWORD typeFlags = info.compCompHnd->getClassAttribs(lclHandle);
+ if ((typeFlags & CORINFO_FLG_CONTAINS_GC_PTR) != 0)
+ {
+ inlineResult->Note(InlineObservation::CALLEE_HAS_GC_STRUCT);
+ if (inlineResult->IsFailure())
+ {
+ return;
+ }
+
+ // Do further notification in the case where the call site is rare; some policies do
+ // not track the relative hotness of call sites for "always" inline cases.
+ if (pInlineInfo->iciBlock->isRunRarely())
+ {
+ inlineResult->Note(InlineObservation::CALLSITE_RARE_GC_STRUCT);
+ if (inlineResult->IsFailure())
+ {
+
+ return;
+ }
+ }
+ }
+ }
+
localsSig = info.compCompHnd->getArgNext(localsSig);
#ifdef FEATURE_SIMD
@@ -17431,6 +17602,28 @@ unsigned Compiler::impInlineFetchLocal(unsigned lclNum DEBUGARG(const char* reas
lvaTable[tmpNum].lvHasLdAddrOp = 1;
}
+ if (impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclIsPinned)
+ {
+ lvaTable[tmpNum].lvPinned = 1;
+
+ if (!impInlineInfo->hasPinnedLocals)
+ {
+ // If the inlinee returns a value, use a spill temp
+ // for the return value to ensure that even in case
+ // where the return expression refers to one of the
+ // pinned locals, we can unpin the local right after
+ // the inlined method body.
+ if ((info.compRetNativeType != TYP_VOID) && (lvaInlineeReturnSpillTemp == BAD_VAR_NUM))
+ {
+ lvaInlineeReturnSpillTemp =
+ lvaGrabTemp(false DEBUGARG("Inline candidate pinned local return spill temp"));
+ lvaTable[lvaInlineeReturnSpillTemp].lvType = info.compRetNativeType;
+ }
+ }
+
+ impInlineInfo->hasPinnedLocals = true;
+ }
+
if (impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclVerTypeInfo.IsStruct())
{
if (varTypeIsStruct(lclTyp))
@@ -17895,10 +18088,17 @@ void Compiler::impMarkInlineCandidate(GenTreePtr callNode,
bool Compiler::IsTargetIntrinsic(CorInfoIntrinsics intrinsicId)
{
-#if defined(_TARGET_AMD64_)
+#if defined(_TARGET_AMD64_) || (defined(_TARGET_X86_) && !defined(LEGACY_BACKEND))
switch (intrinsicId)
{
// Amd64 only has SSE2 instruction to directly compute sqrt/abs.
+ //
+ // TODO: Because the x86 backend only targets SSE for floating-point code,
+ // it does not treat Sine, Cosine, or Round as intrinsics (JIT32
+ // implemented those intrinsics as x87 instructions). If this poses
+ // a CQ problem, it may be necessary to change the implementation of
+ // the helper calls to decrease call overhead or switch back to the
+ // x87 instructions. This is tracked by #7097.
case CORINFO_INTRINSIC_Sqrt:
case CORINFO_INTRINSIC_Abs:
return true;
diff --git a/src/jit/inline.cpp b/src/jit/inline.cpp
index deccc0e84b..05fcf1c6b9 100644
--- a/src/jit/inline.cpp
+++ b/src/jit/inline.cpp
@@ -447,7 +447,7 @@ void InlineContext::DumpData(unsigned indent)
else if (m_Success)
{
const char* inlineReason = InlGetObservationString(m_Observation);
- printf("%*s%u,\"%s\",\"%s\"", indent, "", m_Ordinal, inlineReason, calleeName);
+ printf("%*s%u,\"%s\",\"%s\",", indent, "", m_Ordinal, inlineReason, calleeName);
m_Policy->DumpData(jitstdout);
printf("\n");
}
@@ -500,14 +500,25 @@ void InlineContext::DumpXml(FILE* file, unsigned indent)
fprintf(file, "%*s<Offset>%u</Offset>\n", indent + 2, "", offset);
fprintf(file, "%*s<Reason>%s</Reason>\n", indent + 2, "", inlineReason);
- // Optionally, dump data about the last inline
- if ((JitConfig.JitInlineDumpData() != 0) && (this == m_InlineStrategy->GetLastContext()))
+ // Optionally, dump data about the inline
+ const int dumpDataSetting = JitConfig.JitInlineDumpData();
+
+ // JitInlineDumpData=1 -- dump data plus deltas for last inline only
+ if ((dumpDataSetting == 1) && (this == m_InlineStrategy->GetLastContext()))
{
fprintf(file, "%*s<Data>", indent + 2, "");
m_InlineStrategy->DumpDataContents(file);
fprintf(file, "</Data>\n");
}
+ // JitInlineDumpData=2 -- dump data for all inlines, no deltas
+ if ((dumpDataSetting == 2) && (m_Policy != nullptr))
+ {
+ fprintf(file, "%*s<Data>", indent + 2, "");
+ m_Policy->DumpData(file);
+ fprintf(file, "</Data>\n");
+ }
+
newIndent = indent + 2;
}
@@ -646,10 +657,11 @@ void InlineResult::Report()
m_Reported = true;
#ifdef DEBUG
- const char* callee = nullptr;
+ const char* callee = nullptr;
+ const bool showInlines = (JitConfig.JitPrintInlinedMethods() == 1);
// Optionally dump the result
- if (VERBOSE)
+ if (VERBOSE || showInlines)
{
const char* format = "INLINER: during '%s' result '%s' reason '%s' for '%s' calling '%s'\n";
const char* caller = (m_Caller == nullptr) ? "n/a" : m_RootCompiler->eeGetMethodFullName(m_Caller);
@@ -689,12 +701,18 @@ void InlineResult::Report()
#ifdef DEBUG
+ const char* obsString = InlGetObservationString(obs);
+
if (VERBOSE)
{
- const char* obsString = InlGetObservationString(obs);
JITDUMP("\nINLINER: Marking %s as NOINLINE because of %s\n", callee, obsString);
}
+ if (showInlines)
+ {
+ printf("Marking %s as NOINLINE because of %s\n", callee, obsString);
+ }
+
#endif // DEBUG
COMP_HANDLE comp = m_RootCompiler->info.compCompHnd;
@@ -740,6 +758,7 @@ InlineStrategy::InlineStrategy(Compiler* compiler)
, m_HasForceViaDiscretionary(false)
#if defined(DEBUG) || defined(INLINE_DATA)
, m_MethodXmlFilePosition(0)
+ , m_Random(nullptr)
#endif // defined(DEBUG) || defined(INLINE_DATA)
{
@@ -1155,10 +1174,10 @@ InlineContext* InlineStrategy::NewRoot()
InlineContext* InlineStrategy::NewSuccess(InlineInfo* inlineInfo)
{
InlineContext* calleeContext = new (m_Compiler, CMK_Inlining) InlineContext(this);
- GenTree* stmt = inlineInfo->iciStmt;
+ GenTreeStmt* stmt = inlineInfo->iciStmt;
BYTE* calleeIL = inlineInfo->inlineCandidateInfo->methInfo.ILCode;
unsigned calleeILSize = inlineInfo->inlineCandidateInfo->methInfo.ILCodeSize;
- InlineContext* parentContext = stmt->gtStmt.gtInlineContext;
+ InlineContext* parentContext = stmt->gtInlineContext;
noway_assert(parentContext != nullptr);
@@ -1213,35 +1232,22 @@ InlineContext* InlineStrategy::NewSuccess(InlineInfo* inlineInfo)
// A new InlineContext for diagnostic purposes, or nullptr if
// the desired context could not be created.
-InlineContext* InlineStrategy::NewFailure(GenTree* stmt, InlineResult* inlineResult)
+InlineContext* InlineStrategy::NewFailure(GenTreeStmt* stmt, InlineResult* inlineResult)
{
- // Check for a parent context first. We may insert new statements
- // between the caller and callee that do not pick up either's
- // context, and these statements may have calls that we later
- // examine and fail to inline.
- //
- // See fgInlinePrependStatements for examples.
-
- InlineContext* parentContext = stmt->gtStmt.gtInlineContext;
-
- if (parentContext == nullptr)
- {
- // Assume for now this is a failure to inline a call in a
- // statement inserted between caller and callee. Just ignore
- // it for the time being.
-
- return nullptr;
- }
-
+ // Check for a parent context first. We should now have a parent
+ // context for all statements.
+ InlineContext* parentContext = stmt->gtInlineContext;
+ assert(parentContext != nullptr);
InlineContext* failedContext = new (m_Compiler, CMK_Inlining) InlineContext(this);
- failedContext->m_Parent = parentContext;
- // Push on front here will put siblings in reverse lexical
- // order which we undo in the dumper
+ // Pushing the new context on the front of the parent child list
+ // will put siblings in reverse lexical order which we undo in the
+ // dumper.
+ failedContext->m_Parent = parentContext;
failedContext->m_Sibling = parentContext->m_Child;
parentContext->m_Child = failedContext;
failedContext->m_Child = nullptr;
- failedContext->m_Offset = stmt->AsStmt()->gtStmtILoffsx;
+ failedContext->m_Offset = stmt->gtStmtILoffsx;
failedContext->m_Observation = inlineResult->GetObservation();
failedContext->m_Callee = inlineResult->GetCallee();
failedContext->m_Success = false;
@@ -1354,7 +1360,7 @@ void InlineStrategy::DumpDataEnsurePolicyIsSet()
// successful policy, so fake one up.
if (m_LastSuccessfulPolicy == nullptr)
{
- const bool isPrejitRoot = (opts.eeFlags & CORJIT_FLG_PREJIT) != 0;
+ const bool isPrejitRoot = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT);
m_LastSuccessfulPolicy = InlinePolicy::GetPolicy(m_Compiler, isPrejitRoot);
// Add in a bit of data....
@@ -1388,7 +1394,7 @@ void InlineStrategy::DumpDataHeader(FILE* file)
void InlineStrategy::DumpDataSchema(FILE* file)
{
DumpDataEnsurePolicyIsSet();
- fprintf(file, "Method,Version,HotSize,ColdSize,JitTime,SizeEstimate,TimeEstimate");
+ fprintf(file, "Method,Version,HotSize,ColdSize,JitTime,SizeEstimate,TimeEstimate,");
m_LastSuccessfulPolicy->DumpSchema(file);
}
@@ -1424,7 +1430,7 @@ void InlineStrategy::DumpDataContents(FILE* file)
microsecondsSpentJitting = (unsigned)((counts / countsPerSec) * 1000 * 1000);
}
- fprintf(file, "%08X,%u,%u,%u,%u,%d,%d", currentMethodToken, m_InlineCount, info.compTotalHotCodeSize,
+ fprintf(file, "%08X,%u,%u,%u,%u,%d,%d,", currentMethodToken, m_InlineCount, info.compTotalHotCodeSize,
info.compTotalColdCodeSize, microsecondsSpentJitting, m_CurrentSizeEstimate / 10, m_CurrentTimeEstimate);
m_LastSuccessfulPolicy->DumpData(file);
}
@@ -1461,10 +1467,22 @@ void InlineStrategy::DumpXml(FILE* file, unsigned indent)
fprintf(file, "<InlineForest>\n");
fprintf(file, "<Policy>%s</Policy>\n", m_LastSuccessfulPolicy->GetName());
- if (JitConfig.JitInlineDumpData() != 0)
+ const int dumpDataSetting = JitConfig.JitInlineDumpData();
+ if (dumpDataSetting != 0)
{
fprintf(file, "<DataSchema>");
- DumpDataSchema(file);
+
+ if (dumpDataSetting == 1)
+ {
+ // JitInlineDumpData=1 -- dump schema for data plus deltas
+ DumpDataSchema(file);
+ }
+ else if (dumpDataSetting == 2)
+ {
+ // JitInlineDumpData=2 -- dump schema for data only
+ m_LastSuccessfulPolicy->DumpSchema(file);
+ }
+
fprintf(file, "</DataSchema>\n");
}
@@ -1484,7 +1502,7 @@ void InlineStrategy::DumpXml(FILE* file, unsigned indent)
const Compiler::Info& info = m_Compiler->info;
const Compiler::Options& opts = m_Compiler->opts;
- const bool isPrejitRoot = (opts.eeFlags & CORJIT_FLG_PREJIT) != 0;
+ const bool isPrejitRoot = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT);
const bool isForceInline = (info.compFlags & CORINFO_FLG_FORCEINLINE) != 0;
// We'd really like the method identifier to be unique and
@@ -1589,6 +1607,52 @@ void InlineStrategy::FinalizeXml(FILE* file)
ReplayPolicy::FinalizeXml();
}
+//------------------------------------------------------------------------
+// GetRandom: setup or access random state
+//
+// Return Value:
+// New or pre-existing random state.
+//
+// Notes:
+// Random state is kept per jit compilation request. Seed is partially
+// specified externally (via stress or policy setting) and partially
+// specified internally via method hash.
+
+CLRRandom* InlineStrategy::GetRandom()
+{
+ if (m_Random == nullptr)
+ {
+ int externalSeed = 0;
+
+#ifdef DEBUG
+
+ if (m_Compiler->compRandomInlineStress())
+ {
+ externalSeed = getJitStressLevel();
+ }
+
+#endif // DEBUG
+
+ int randomPolicyFlag = JitConfig.JitInlinePolicyRandom();
+ if (randomPolicyFlag != 0)
+ {
+ externalSeed = randomPolicyFlag;
+ }
+
+ int internalSeed = m_Compiler->info.compMethodHash();
+
+ assert(externalSeed != 0);
+ assert(internalSeed != 0);
+
+ int seed = externalSeed ^ internalSeed;
+
+ m_Random = new (m_Compiler, CMK_Inlining) CLRRandom();
+ m_Random->Init(seed);
+ }
+
+ return m_Random;
+}
+
#endif // defined(DEBUG) || defined(INLINE_DATA)
//------------------------------------------------------------------------
diff --git a/src/jit/inline.def b/src/jit/inline.def
index 2c933fb8a9..ff0b21100e 100644
--- a/src/jit/inline.def
+++ b/src/jit/inline.def
@@ -40,7 +40,6 @@ INLINE_OBSERVATION(HAS_MANAGED_VARARGS, bool, "managed varargs",
INLINE_OBSERVATION(HAS_NATIVE_VARARGS, bool, "native varargs", FATAL, CALLEE)
INLINE_OBSERVATION(HAS_NO_BODY, bool, "has no body", FATAL, CALLEE)
INLINE_OBSERVATION(HAS_NULL_FOR_LDELEM, bool, "has null pointer for ldelem", FATAL, CALLEE)
-INLINE_OBSERVATION(HAS_PINNED_LOCALS, bool, "has pinned locals", FATAL, CALLEE)
INLINE_OBSERVATION(IS_ARRAY_METHOD, bool, "is array method", FATAL, CALLEE)
INLINE_OBSERVATION(IS_GENERIC_VIRTUAL, bool, "generic virtual", FATAL, CALLEE)
INLINE_OBSERVATION(IS_JIT_NOINLINE, bool, "noinline per JitNoinline", FATAL, CALLEE)
@@ -78,6 +77,8 @@ INLINE_OBSERVATION(BELOW_ALWAYS_INLINE_SIZE, bool, "below ALWAYS_INLINE size"
INLINE_OBSERVATION(CLASS_PROMOTABLE, bool, "promotable value class", INFORMATION, CALLEE)
INLINE_OBSERVATION(DOES_NOT_RETURN, bool, "does not return", INFORMATION, CALLEE)
INLINE_OBSERVATION(END_OPCODE_SCAN, bool, "done looking at opcodes", INFORMATION, CALLEE)
+INLINE_OBSERVATION(HAS_GC_STRUCT, bool, "has gc field in struct local", INFORMATION, CALLEE)
+INLINE_OBSERVATION(HAS_PINNED_LOCALS, bool, "has pinned locals", INFORMATION, CALLEE)
INLINE_OBSERVATION(HAS_SIMD, bool, "has SIMD arg, local, or ret", INFORMATION, CALLEE)
INLINE_OBSERVATION(HAS_SWITCH, bool, "has switch", INFORMATION, CALLEE)
INLINE_OBSERVATION(IL_CODE_SIZE, int, "number of bytes of IL", INFORMATION, CALLEE)
@@ -112,7 +113,6 @@ INLINE_OBSERVATION(HAS_NEWOBJ, bool, "has newobj",
// ------ Call Site Correctness -------
INLINE_OBSERVATION(ARG_HAS_NULL_THIS, bool, "this pointer argument is null", FATAL, CALLSITE)
-INLINE_OBSERVATION(ARG_HAS_SIDE_EFFECT, bool, "argument has side effect", FATAL, CALLSITE)
INLINE_OBSERVATION(ARG_IS_MKREFANY, bool, "argument is mkrefany", FATAL, CALLSITE)
INLINE_OBSERVATION(ARG_NO_BASH_TO_INT, bool, "argument can't bash to int", FATAL, CALLSITE)
INLINE_OBSERVATION(ARG_NO_BASH_TO_REF, bool, "argument can't bash to ref", FATAL, CALLSITE)
@@ -122,7 +122,6 @@ INLINE_OBSERVATION(CANT_EMBED_VARARGS_COOKIE, bool, "can't embed varargs cooki
INLINE_OBSERVATION(CLASS_INIT_FAILURE_SPEC, bool, "speculative class init failed", FATAL, CALLSITE)
INLINE_OBSERVATION(COMPILATION_ERROR, bool, "compilation error", FATAL, CALLSITE)
INLINE_OBSERVATION(COMPILATION_FAILURE, bool, "failed to compile", FATAL, CALLSITE)
-INLINE_OBSERVATION(CONDITIONAL_THROW, bool, "conditional throw", FATAL, CALLSITE)
INLINE_OBSERVATION(CROSS_BOUNDARY_CALLI, bool, "cross-boundary calli", FATAL, CALLSITE)
INLINE_OBSERVATION(CROSS_BOUNDARY_SECURITY, bool, "cross-boundary security check", FATAL, CALLSITE)
INLINE_OBSERVATION(EXCEEDS_THRESHOLD, bool, "exceeds profit threshold", FATAL, CALLSITE)
@@ -140,7 +139,7 @@ INLINE_OBSERVATION(IS_TOO_DEEP, bool, "too deep",
INLINE_OBSERVATION(IS_VIRTUAL, bool, "virtual", FATAL, CALLSITE)
INLINE_OBSERVATION(IS_VM_NOINLINE, bool, "noinline per VM", FATAL, CALLSITE)
INLINE_OBSERVATION(IS_WITHIN_CATCH, bool, "within catch region", FATAL, CALLSITE)
-INLINE_OBSERVATION(IS_WITHIN_FILTER, bool, "within filterregion", FATAL, CALLSITE)
+INLINE_OBSERVATION(IS_WITHIN_FILTER, bool, "within filter region", FATAL, CALLSITE)
INLINE_OBSERVATION(LDARGA_NOT_LOCAL_VAR, bool, "ldarga not on local var", FATAL, CALLSITE)
INLINE_OBSERVATION(LDFLD_NEEDS_HELPER, bool, "ldfld needs helper", FATAL, CALLSITE)
INLINE_OBSERVATION(LDVIRTFN_ON_NON_VIRTUAL, bool, "ldvirtfn on non-virtual", FATAL, CALLSITE)
@@ -149,6 +148,7 @@ INLINE_OBSERVATION(NOT_CANDIDATE, bool, "not inline candidate",
INLINE_OBSERVATION(NOT_PROFITABLE_INLINE, bool, "unprofitable inline", FATAL, CALLSITE)
INLINE_OBSERVATION(OVER_BUDGET, bool, "inline exceeds budget", FATAL, CALLSITE)
INLINE_OBSERVATION(OVER_INLINE_LIMIT, bool, "limited by JitInlineLimit", FATAL, CALLSITE)
+INLINE_OBSERVATION(PIN_IN_TRY_REGION, bool, "within try region, pinned", FATAL, CALLSITE)
INLINE_OBSERVATION(RANDOM_REJECT, bool, "random reject", FATAL, CALLSITE)
INLINE_OBSERVATION(REQUIRES_SAME_THIS, bool, "requires same this", FATAL, CALLSITE)
INLINE_OBSERVATION(RETURN_TYPE_MISMATCH, bool, "return type mismatch", FATAL, CALLSITE)
@@ -157,12 +157,14 @@ INLINE_OBSERVATION(TOO_MANY_LOCALS, bool, "too many locals",
// ------ Call Site Performance -------
+INLINE_OBSERVATION(RARE_GC_STRUCT, bool, "rarely called, has gc struct", INFORMATION, CALLSITE)
// ------ Call Site Information -------
INLINE_OBSERVATION(CONSTANT_ARG_FEEDS_TEST, bool, "constant argument feeds test", INFORMATION, CALLSITE)
INLINE_OBSERVATION(DEPTH, int, "depth", INFORMATION, CALLSITE)
INLINE_OBSERVATION(FREQUENCY, int, "rough call site frequency", INFORMATION, CALLSITE)
+INLINE_OBSERVATION(IN_TRY_REGION, bool, "call site in try region", INFORMATION, CALLSITE)
INLINE_OBSERVATION(IS_PROFITABLE_INLINE, bool, "profitable inline", INFORMATION, CALLSITE)
INLINE_OBSERVATION(IS_SAME_THIS, bool, "same this as root caller", INFORMATION, CALLSITE)
INLINE_OBSERVATION(IS_SIZE_DECREASING_INLINE, bool, "size decreasing inline", INFORMATION, CALLSITE)
diff --git a/src/jit/inline.h b/src/jit/inline.h
index e3d5750754..2634ebe6fa 100644
--- a/src/jit/inline.h
+++ b/src/jit/inline.h
@@ -85,11 +85,6 @@ const unsigned int MAX_INL_ARGS = 10; // does not include obj pointer
const unsigned int MAX_INL_LCLS = 8;
#endif // LEGACY_BACKEND
-// Flags lost during inlining.
-
-#define CORJIT_FLG_LOST_WHEN_INLINING \
- (CORJIT_FLG_BBOPT | CORJIT_FLG_BBINSTR | CORJIT_FLG_PROF_ENTERLEAVE | CORJIT_FLG_DEBUG_EnC | CORJIT_FLG_DEBUG_INFO)
-
// Forward declarations
class InlineStrategy;
@@ -542,6 +537,7 @@ struct InlLclVarInfo
var_types lclTypeInfo;
typeInfo lclVerTypeInfo;
bool lclHasLdlocaOp; // Is there LDLOCA(s) operation on this argument?
+ bool lclIsPinned;
};
// InlineInfo provides detailed information about a particular inline candidate.
@@ -568,12 +564,13 @@ struct InlineInfo
InlLclVarInfo lclVarInfo[MAX_INL_LCLS + MAX_INL_ARGS + 1]; // type information from local sig
bool thisDereferencedFirst;
+ bool hasPinnedLocals;
#ifdef FEATURE_SIMD
bool hasSIMDTypeArgLocalOrReturn;
#endif // FEATURE_SIMD
GenTreeCall* iciCall; // The GT_CALL node to be inlined.
- GenTree* iciStmt; // The statement iciCall is in.
+ GenTreeStmt* iciStmt; // The statement iciCall is in.
BasicBlock* iciBlock; // The basic block iciStmt is in.
};
@@ -706,7 +703,7 @@ public:
InlineContext* NewSuccess(InlineInfo* inlineInfo);
// Create context for a failing inline.
- InlineContext* NewFailure(GenTree* stmt, InlineResult* inlineResult);
+ InlineContext* NewFailure(GenTreeStmt* stmt, InlineResult* inlineResult);
// Compiler associated with this strategy
Compiler* GetCompiler() const
@@ -823,6 +820,9 @@ public:
m_MethodXmlFilePosition = val;
}
+ // Set up or access random state (for use by RandomPolicy)
+ CLRRandom* GetRandom();
+
#endif // defined(DEBUG) || defined(INLINE_DATA)
// Some inline limit values
@@ -887,7 +887,8 @@ private:
bool m_HasForceViaDiscretionary;
#if defined(DEBUG) || defined(INLINE_DATA)
- long m_MethodXmlFilePosition;
+ long m_MethodXmlFilePosition;
+ CLRRandom* m_Random;
#endif // defined(DEBUG) || defined(INLINE_DATA)
};
diff --git a/src/jit/inlinepolicy.cpp b/src/jit/inlinepolicy.cpp
index f80f3a5ec0..61e70c3ed4 100644
--- a/src/jit/inlinepolicy.cpp
+++ b/src/jit/inlinepolicy.cpp
@@ -27,22 +27,22 @@
InlinePolicy* InlinePolicy::GetPolicy(Compiler* compiler, bool isPrejitRoot)
{
-#ifdef DEBUG
+#if defined(DEBUG) || defined(INLINE_DATA)
- // Optionally install the RandomPolicy.
- bool useRandomPolicy = compiler->compRandomInlineStress();
+#if defined(DEBUG)
+ const bool useRandomPolicyForStress = compiler->compRandomInlineStress();
+#else
+ const bool useRandomPolicyForStress = false;
+#endif // defined(DEBUG)
+
+ const bool useRandomPolicy = (JitConfig.JitInlinePolicyRandom() != 0);
- if (useRandomPolicy)
+ // Optionally install the RandomPolicy.
+ if (useRandomPolicyForStress || useRandomPolicy)
{
- unsigned seed = getJitStressLevel();
- assert(seed != 0);
- return new (compiler, CMK_Inlining) RandomPolicy(compiler, isPrejitRoot, seed);
+ return new (compiler, CMK_Inlining) RandomPolicy(compiler, isPrejitRoot);
}
-#endif // DEBUG
-
-#if defined(DEBUG) || defined(INLINE_DATA)
-
// Optionally install the ReplayPolicy.
bool useReplayPolicy = JitConfig.JitInlinePolicyReplay() != 0;
@@ -106,7 +106,7 @@ InlinePolicy* InlinePolicy::GetPolicy(Compiler* compiler, bool isPrejitRoot)
void LegalPolicy::NoteFatal(InlineObservation obs)
{
// As a safeguard, all fatal impact must be
- // reported via noteFatal.
+ // reported via NoteFatal.
assert(InlGetImpact(obs) == InlineImpact::FATAL);
NoteInternal(obs);
assert(InlDecisionIsFailure(m_Decision));
@@ -243,7 +243,7 @@ void LegacyPolicy::NoteBool(InlineObservation obs, bool value)
InlineImpact impact = InlGetImpact(obs);
// As a safeguard, all fatal impact must be
- // reported via noteFatal.
+ // reported via NoteFatal.
assert(impact != InlineImpact::FATAL);
// Handle most information here
@@ -383,6 +383,12 @@ void LegacyPolicy::NoteBool(InlineObservation obs, bool value)
break;
}
+ case InlineObservation::CALLEE_HAS_PINNED_LOCALS:
+ // The legacy policy is to never inline methods with
+ // pinned locals.
+ SetNever(obs);
+ break;
+
default:
// Ignore the remainder for now
break;
@@ -443,16 +449,16 @@ void LegacyPolicy::NoteInt(InlineObservation obs, int value)
// Now that we know size and forceinline state,
// update candidacy.
- if (m_CodeSize <= InlineStrategy::ALWAYS_INLINE_SIZE)
- {
- // Candidate based on small size
- SetCandidate(InlineObservation::CALLEE_BELOW_ALWAYS_INLINE_SIZE);
- }
- else if (m_IsForceInline)
+ if (m_IsForceInline)
{
// Candidate based on force inline
SetCandidate(InlineObservation::CALLEE_IS_FORCE_INLINE);
}
+ else if (m_CodeSize <= InlineStrategy::ALWAYS_INLINE_SIZE)
+ {
+ // Candidate based on small size
+ SetCandidate(InlineObservation::CALLEE_BELOW_ALWAYS_INLINE_SIZE);
+ }
else if (m_CodeSize <= m_RootCompiler->m_inlineStrategy->GetMaxInlineILSize())
{
// Candidate, pending profitability evaluation
@@ -842,11 +848,21 @@ int LegacyPolicy::CodeSizeEstimate()
// NoteBool: handle a boolean observation with non-fatal impact
//
// Arguments:
-// obs - the current obsevation
+// obs - the current observation
// value - the value of the observation
void EnhancedLegacyPolicy::NoteBool(InlineObservation obs, bool value)
{
+
+#ifdef DEBUG
+ // Check the impact
+ InlineImpact impact = InlGetImpact(obs);
+
+ // As a safeguard, all fatal impact must be
+ // reported via NoteFatal.
+ assert(impact != InlineImpact::FATAL);
+#endif // DEBUG
+
switch (obs)
{
case InlineObservation::CALLEE_DOES_NOT_RETURN:
@@ -854,6 +870,36 @@ void EnhancedLegacyPolicy::NoteBool(InlineObservation obs, bool value)
m_IsNoReturnKnown = true;
break;
+ case InlineObservation::CALLSITE_RARE_GC_STRUCT:
+ // If this is a discretionary or always inline candidate
+ // with a gc struct, we may change our mind about inlining
+ // if the call site is rare, to avoid costs associated with
+ // zeroing the GC struct up in the root prolog.
+ if (m_Observation == InlineObservation::CALLEE_BELOW_ALWAYS_INLINE_SIZE)
+ {
+ assert(m_CallsiteFrequency == InlineCallsiteFrequency::UNUSED);
+ SetFailure(obs);
+ return;
+ }
+ else if (m_Observation == InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE)
+ {
+ assert(m_CallsiteFrequency == InlineCallsiteFrequency::RARE);
+ SetFailure(obs);
+ return;
+ }
+ break;
+
+ case InlineObservation::CALLEE_HAS_PINNED_LOCALS:
+ if (m_CallsiteIsInTryRegion)
+ {
+ // Inlining a method with pinned locals in a try
+ // region requires wrapping the inline body in a
+ // try/finally to ensure unpinning. Bail instead.
+ SetFailure(InlineObservation::CALLSITE_PIN_IN_TRY_REGION);
+ return;
+ }
+ break;
+
default:
// Pass all other information to the legacy policy
LegacyPolicy::NoteBool(obs, value);
@@ -928,7 +974,7 @@ bool EnhancedLegacyPolicy::PropagateNeverToRuntime() const
return propagate;
}
-#ifdef DEBUG
+#if defined(DEBUG) || defined(INLINE_DATA)
//------------------------------------------------------------------------
// RandomPolicy: construct a new RandomPolicy
@@ -936,89 +982,10 @@ bool EnhancedLegacyPolicy::PropagateNeverToRuntime() const
// Arguments:
// compiler -- compiler instance doing the inlining (root compiler)
// isPrejitRoot -- true if this compiler is prejitting the root method
-// seed -- seed value for the random number generator
-
-RandomPolicy::RandomPolicy(Compiler* compiler, bool isPrejitRoot, unsigned seed)
- : LegalPolicy(isPrejitRoot)
- , m_RootCompiler(compiler)
- , m_Random(nullptr)
- , m_CodeSize(0)
- , m_IsForceInline(false)
- , m_IsForceInlineKnown(false)
-{
- // If necessary, setup and seed the random state.
- if (compiler->inlRNG == nullptr)
- {
- compiler->inlRNG = new (compiler, CMK_Inlining) CLRRandom();
- unsigned hash = m_RootCompiler->info.compMethodHash();
- assert(hash != 0);
- assert(seed != 0);
- int hashSeed = static_cast<int>(hash ^ seed);
- compiler->inlRNG->Init(hashSeed);
- }
-
- m_Random = compiler->inlRNG;
-}
-
-//------------------------------------------------------------------------
-// NoteSuccess: handle finishing all the inlining checks successfully
-
-void RandomPolicy::NoteSuccess()
+RandomPolicy::RandomPolicy(Compiler* compiler, bool isPrejitRoot) : DiscretionaryPolicy(compiler, isPrejitRoot)
{
- assert(InlDecisionIsCandidate(m_Decision));
- m_Decision = InlineDecision::SUCCESS;
-}
-
-//------------------------------------------------------------------------
-// NoteBool: handle a boolean observation with non-fatal impact
-//
-// Arguments:
-// obs - the current obsevation
-// value - the value of the observation
-void RandomPolicy::NoteBool(InlineObservation obs, bool value)
-{
- // Check the impact
- InlineImpact impact = InlGetImpact(obs);
-
- // As a safeguard, all fatal impact must be
- // reported via noteFatal.
- assert(impact != InlineImpact::FATAL);
-
- // Handle most information here
- bool isInformation = (impact == InlineImpact::INFORMATION);
- bool propagate = !isInformation;
-
- if (isInformation)
- {
- switch (obs)
- {
- case InlineObservation::CALLEE_IS_FORCE_INLINE:
- // The RandomPolicy still honors force inlines.
- //
- // We may make the force-inline observation more than
- // once. All observations should agree.
- assert(!m_IsForceInlineKnown || (m_IsForceInline == value));
- m_IsForceInline = value;
- m_IsForceInlineKnown = true;
- break;
-
- case InlineObservation::CALLEE_HAS_SWITCH:
- case InlineObservation::CALLEE_UNSUPPORTED_OPCODE:
- // Pass these on, they should cause inlining to fail.
- propagate = true;
- break;
-
- default:
- // Ignore the remainder for now
- break;
- }
- }
-
- if (propagate)
- {
- NoteInternal(obs);
- }
+ m_Random = compiler->m_inlineStrategy->GetRandom();
}
//------------------------------------------------------------------------
@@ -1032,7 +999,6 @@ void RandomPolicy::NoteInt(InlineObservation obs, int value)
{
switch (obs)
{
-
case InlineObservation::CALLEE_IL_CODE_SIZE:
{
assert(m_IsForceInlineKnown);
@@ -1054,7 +1020,8 @@ void RandomPolicy::NoteInt(InlineObservation obs, int value)
}
default:
- // Ignore all other information
+ // Defer to superclass for all other information
+ DiscretionaryPolicy::NoteInt(obs, value);
break;
}
}
@@ -1087,6 +1054,16 @@ void RandomPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
}
}
+ // If we're also dumping inline data, make additional observations
+ // based on the method info, and estimate code size and perf
+ // impact, so that the reports have the necessary data.
+ if (JitConfig.JitInlineDumpData() != 0)
+ {
+ MethodInfoObservations(methodInfo);
+ EstimateCodeSize();
+ EstimatePerformanceImpact();
+ }
+
// Use a probability curve that roughly matches the observed
// behavior of the LegacyPolicy. That way we're inlining
// differently but not creating enormous methods.
@@ -1165,7 +1142,7 @@ void RandomPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
}
}
-#endif // DEBUG
+#endif // defined(DEBUG) || defined(INLINE_DATA)
#ifdef _MSC_VER
// Disable warning about new array member initialization behavior
@@ -1181,7 +1158,7 @@ void RandomPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
// clang-format off
DiscretionaryPolicy::DiscretionaryPolicy(Compiler* compiler, bool isPrejitRoot)
- : LegacyPolicy(compiler, isPrejitRoot)
+ : EnhancedLegacyPolicy(compiler, isPrejitRoot)
, m_Depth(0)
, m_BlockCount(0)
, m_Maxstack(0)
@@ -1227,6 +1204,7 @@ DiscretionaryPolicy::DiscretionaryPolicy(Compiler* compiler, bool isPrejitRoot)
, m_IsSameThis(false)
, m_CallerHasNewArray(false)
, m_CallerHasNewObj(false)
+ , m_CalleeHasGCStruct(false)
{
// Empty
}
@@ -1278,8 +1256,17 @@ void DiscretionaryPolicy::NoteBool(InlineObservation obs, bool value)
m_CallerHasNewObj = value;
break;
+ case InlineObservation::CALLEE_HAS_GC_STRUCT:
+ m_CalleeHasGCStruct = value;
+ break;
+
+ case InlineObservation::CALLSITE_RARE_GC_STRUCT:
+ // This is redundant since this policy tracks call site
+ // hotness for all candidates. So ignore.
+ break;
+
default:
- LegacyPolicy::NoteBool(obs, value);
+ EnhancedLegacyPolicy::NoteBool(obs, value);
break;
}
}
@@ -1295,7 +1282,6 @@ void DiscretionaryPolicy::NoteInt(InlineObservation obs, int value)
{
switch (obs)
{
-
case InlineObservation::CALLEE_IL_CODE_SIZE:
// Override how code size is handled
{
@@ -1323,7 +1309,7 @@ void DiscretionaryPolicy::NoteInt(InlineObservation obs, int value)
// on similarity of impact on codegen.
OPCODE opcode = static_cast<OPCODE>(value);
ComputeOpcodeBin(opcode);
- LegacyPolicy::NoteInt(obs, value);
+ EnhancedLegacyPolicy::NoteInt(obs, value);
break;
}
@@ -1344,8 +1330,8 @@ void DiscretionaryPolicy::NoteInt(InlineObservation obs, int value)
break;
default:
- // Delegate remainder to the LegacyPolicy.
- LegacyPolicy::NoteInt(obs, value);
+ // Delegate remainder to the super class.
+ EnhancedLegacyPolicy::NoteInt(obs, value);
break;
}
}
@@ -1660,8 +1646,8 @@ void DiscretionaryPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo
// model for actual inlining.
EstimatePerformanceImpact();
- // Delegate to LegacyPolicy for the rest
- LegacyPolicy::DetermineProfitability(methodInfo);
+ // Delegate to super class for the rest
+ EnhancedLegacyPolicy::DetermineProfitability(methodInfo);
}
//------------------------------------------------------------------------
@@ -1869,7 +1855,7 @@ int DiscretionaryPolicy::CodeSizeEstimate()
void DiscretionaryPolicy::DumpSchema(FILE* file) const
{
- fprintf(file, ",ILSize");
+ fprintf(file, "ILSize");
fprintf(file, ",CallsiteFrequency");
fprintf(file, ",InstructionCount");
fprintf(file, ",LoadStoreCount");
@@ -1938,6 +1924,8 @@ void DiscretionaryPolicy::DumpSchema(FILE* file) const
fprintf(file, ",IsSameThis");
fprintf(file, ",CallerHasNewArray");
fprintf(file, ",CallerHasNewObj");
+ fprintf(file, ",CalleeDoesNotReturn");
+ fprintf(file, ",CalleeHasGCStruct");
}
//------------------------------------------------------------------------
@@ -1949,7 +1937,7 @@ void DiscretionaryPolicy::DumpSchema(FILE* file) const
void DiscretionaryPolicy::DumpData(FILE* file) const
{
- fprintf(file, ",%u", m_CodeSize);
+ fprintf(file, "%u", m_CodeSize);
fprintf(file, ",%u", m_CallsiteFrequency);
fprintf(file, ",%u", m_InstructionCount);
fprintf(file, ",%u", m_LoadStoreCount);
@@ -2018,6 +2006,8 @@ void DiscretionaryPolicy::DumpData(FILE* file) const
fprintf(file, ",%u", m_IsSameThis ? 1 : 0);
fprintf(file, ",%u", m_CallerHasNewArray ? 1 : 0);
fprintf(file, ",%u", m_CallerHasNewObj ? 1 : 0);
+ fprintf(file, ",%u", m_IsNoReturn ? 1 : 0);
+ fprintf(file, ",%u", m_CalleeHasGCStruct ? 1 : 0);
}
#endif // defined(DEBUG) || defined(INLINE_DATA)
@@ -2473,7 +2463,7 @@ bool ReplayPolicy::FindMethod()
// See if token matches
unsigned token = 0;
- int count = sscanf(buffer, " <Token>%u</Token> ", &token);
+ int count = sscanf_s(buffer, " <Token>%u</Token> ", &token);
if ((count != 1) || (token != methodToken))
{
continue;
@@ -2487,7 +2477,7 @@ bool ReplayPolicy::FindMethod()
// See if hash matches
unsigned hash = 0;
- count = sscanf(buffer, " <Hash>%u</Hash> ", &hash);
+ count = sscanf_s(buffer, " <Hash>%u</Hash> ", &hash);
if ((count != 1) || (hash != methodHash))
{
continue;
@@ -2646,7 +2636,7 @@ bool ReplayPolicy::FindInline(unsigned token, unsigned hash, unsigned offset)
// Match token
unsigned inlineToken = 0;
- int count = sscanf(buffer, " <Token>%u</Token> ", &inlineToken);
+ int count = sscanf_s(buffer, " <Token>%u</Token> ", &inlineToken);
if ((count != 1) || (inlineToken != token))
{
@@ -2661,7 +2651,7 @@ bool ReplayPolicy::FindInline(unsigned token, unsigned hash, unsigned offset)
// Match hash
unsigned inlineHash = 0;
- count = sscanf(buffer, " <Hash>%u</Hash> ", &inlineHash);
+ count = sscanf_s(buffer, " <Hash>%u</Hash> ", &inlineHash);
if ((count != 1) || (inlineHash != hash))
{
@@ -2676,7 +2666,7 @@ bool ReplayPolicy::FindInline(unsigned token, unsigned hash, unsigned offset)
// Match offset
unsigned inlineOffset = 0;
- count = sscanf(buffer, " <Offset>%u</Offset> ", &inlineOffset);
+ count = sscanf_s(buffer, " <Offset>%u</Offset> ", &inlineOffset);
if ((count != 1) || (inlineOffset != offset))
{
continue;
@@ -2695,7 +2685,7 @@ bool ReplayPolicy::FindInline(unsigned token, unsigned hash, unsigned offset)
if (fgets(buffer, sizeof(buffer), s_ReplayFile) != nullptr)
{
unsigned collectData = 0;
- count = sscanf(buffer, " <CollectData>%u</CollectData> ", &collectData);
+ count = sscanf_s(buffer, " <CollectData>%u</CollectData> ", &collectData);
if (count == 1)
{
diff --git a/src/jit/inlinepolicy.h b/src/jit/inlinepolicy.h
index 62031c86a0..3239dcbe89 100644
--- a/src/jit/inlinepolicy.h
+++ b/src/jit/inlinepolicy.h
@@ -98,6 +98,7 @@ public:
, m_HasSimd(false)
, m_LooksLikeWrapperMethod(false)
, m_MethodIsMostlyLoadStore(false)
+ , m_CallsiteIsInTryRegion(false)
{
// empty
}
@@ -165,6 +166,7 @@ protected:
bool m_HasSimd : 1;
bool m_LooksLikeWrapperMethod : 1;
bool m_MethodIsMostlyLoadStore : 1;
+ bool m_CallsiteIsInTryRegion : 1;
};
// EnhancedLegacyPolicy extends the legacy policy by rejecting
@@ -196,65 +198,15 @@ protected:
bool m_IsNoReturnKnown : 1;
};
-#ifdef DEBUG
-
-// RandomPolicy implements a policy that inlines at random.
-// It is mostly useful for stress testing.
-
-class RandomPolicy : public LegalPolicy
-{
-public:
- // Construct a RandomPolicy
- RandomPolicy(Compiler* compiler, bool isPrejitRoot, unsigned seed);
-
- // Policy observations
- void NoteSuccess() override;
- void NoteBool(InlineObservation obs, bool value) override;
- void NoteInt(InlineObservation obs, int value) override;
-
- // Policy determinations
- void DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) override;
-
- // Policy policies
- bool PropagateNeverToRuntime() const override
- {
- return true;
- }
- bool IsLegacyPolicy() const override
- {
- return false;
- }
-
- // Policy estimates
- int CodeSizeEstimate() override
- {
- return 0;
- }
-
- const char* GetName() const override
- {
- return "RandomPolicy";
- }
-
-private:
- // Data members
- Compiler* m_RootCompiler;
- CLRRandom* m_Random;
- unsigned m_CodeSize;
- bool m_IsForceInline : 1;
- bool m_IsForceInlineKnown : 1;
-};
-
-#endif // DEBUG
-
-// DiscretionaryPolicy is a variant of the legacy policy. It differs
-// in that there is no ALWAYS_INLINE class, there is no IL size limit,
-// it does not try and maintain legacy compatabilty, and in prejit mode,
-// discretionary failures do not set the "NEVER" inline bit.
+// DiscretionaryPolicy is a variant of the enhanced legacy policy. It
+// differs in that there is no ALWAYS_INLINE class, there is no IL
+// size limit, it does not try and maintain legacy compatabilty, and
+// in prejit mode, discretionary failures do not set the "NEVER"
+// inline bit.
//
// It is useful for gathering data about inline costs.
-class DiscretionaryPolicy : public LegacyPolicy
+class DiscretionaryPolicy : public EnhancedLegacyPolicy
{
public:
// Construct a DiscretionaryPolicy
@@ -266,10 +218,6 @@ public:
// Policy policies
bool PropagateNeverToRuntime() const override;
- bool IsLegacyPolicy() const override
- {
- return false;
- }
// Policy determinations
void DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) override;
@@ -346,6 +294,7 @@ protected:
bool m_IsSameThis;
bool m_CallerHasNewArray;
bool m_CallerHasNewObj;
+ bool m_CalleeHasGCStruct;
};
// ModelPolicy is an experimental policy that uses the results
@@ -382,6 +331,35 @@ public:
#if defined(DEBUG) || defined(INLINE_DATA)
+// RandomPolicy implements a policy that inlines at random.
+// It is mostly useful for stress testing.
+
+class RandomPolicy : public DiscretionaryPolicy
+{
+public:
+ // Construct a RandomPolicy
+ RandomPolicy(Compiler* compiler, bool isPrejitRoot);
+
+ // Policy observations
+ void NoteInt(InlineObservation obs, int value) override;
+
+ // Policy determinations
+ void DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) override;
+
+ const char* GetName() const override
+ {
+ return "RandomPolicy";
+ }
+
+private:
+ // Data members
+ CLRRandom* m_Random;
+};
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
// FullPolicy is an experimental policy that will always inline if
// possible, subject to externally settable depth and size limits.
//
diff --git a/src/jit/instr.cpp b/src/jit/instr.cpp
index d516e0dea4..edc4483c6b 100644
--- a/src/jit/instr.cpp
+++ b/src/jit/instr.cpp
@@ -149,8 +149,6 @@ const char* CodeGen::genSizeStr(emitAttr attr)
nullptr,
"xmmword ptr ",
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
- nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
- nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
"ymmword ptr"
};
@@ -3054,7 +3052,7 @@ bool CodeGenInterface::validImmForBL(ssize_t addr)
return
// If we are running the altjit for NGEN, then assume we can use the "BL" instruction.
// This matches the usual behavior for NGEN, since we normally do generate "BL".
- (!compiler->info.compMatchedVM && (compiler->opts.eeFlags & CORJIT_FLG_PREJIT)) ||
+ (!compiler->info.compMatchedVM && compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) ||
(compiler->eeGetRelocTypeHint((void*)addr) == IMAGE_REL_BASED_THUMB_BRANCH24);
}
bool CodeGen::arm_Valid_Imm_For_BL(ssize_t addr)
@@ -3240,7 +3238,7 @@ instruction CodeGen::ins_Move_Extend(var_types srcType, bool srcInReg)
*
* Parameters
* srcType - source type
- * aligned - whether source is 16-byte aligned if srcType is a SIMD type
+ * aligned - whether source is properly aligned if srcType is a SIMD type
*/
instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false*/)
{
@@ -3258,8 +3256,7 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false*
#endif // FEATURE_SIMD
if (compiler->canUseAVX())
{
- // TODO-CQ: consider alignment of AVX vectors.
- return INS_movupd;
+ return (aligned) ? INS_movapd : INS_movupd;
}
else
{
@@ -3404,7 +3401,7 @@ instruction CodeGen::ins_Copy(var_types dstType)
*
* Parameters
* dstType - destination type
- * aligned - whether destination is 16-byte aligned if dstType is a SIMD type
+ * aligned - whether destination is properly aligned if dstType is a SIMD type
*/
instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false*/)
{
@@ -3422,8 +3419,7 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false
#endif // FEATURE_SIMD
if (compiler->canUseAVX())
{
- // TODO-CQ: consider alignment of AVX vectors.
- return INS_movupd;
+ return (aligned) ? INS_movapd : INS_movupd;
}
else
{
diff --git a/src/jit/instr.h b/src/jit/instr.h
index c38f8d2073..2d50234fdc 100644
--- a/src/jit/instr.h
+++ b/src/jit/instr.h
@@ -284,15 +284,19 @@ END_DECLARE_TYPED_ENUM(emitAttr,unsigned)
#define EmitSize(x) (EA_ATTR(genTypeSize(TypeGet(x))))
// Enum specifying the instruction set for generating floating point or SIMD code.
+// These enums are ordered such that each one is inclusive of previous instruction sets
+// and the VM ensures this as well when setting the CONFIG flags.
enum InstructionSet
{
#ifdef _TARGET_XARCH_
- InstructionSet_SSE2,
- InstructionSet_AVX,
+ InstructionSet_SSE2, // SSE2 Instruction set
+ InstructionSet_SSE3_4, // SSE3, SSSE3, SSE4.1 and SSE4.2 instruction set
+ InstructionSet_AVX, // AVX2 instruction set
+ // TODO-Cleaup - This should be named as InstructionSet_AVX2
#elif defined(_TARGET_ARM_)
InstructionSet_NEON,
#endif
- InstructionSet_NONE
+ InstructionSet_NONE // No instruction set is available indicating an invalid value
};
// clang-format on
diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h
index 436563babf..4317334bf2 100644
--- a/src/jit/instrsxarch.h
+++ b/src/jit/instrsxarch.h
@@ -178,6 +178,7 @@ INST3(FIRST_SSE2_INSTRUCTION, "FIRST_SSE2_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CO
// These are the SSE instructions used on x86
INST3( mov_i2xmm, "movd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x6E)) // Move int reg to a xmm reg. reg1=xmm reg, reg2=int reg
INST3( mov_xmm2i, "movd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x7E)) // Move xmm reg to an int reg. reg1=xmm reg, reg2=int reg
+INST3( pmovmskb, "pmovmskb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xD7)) // Move the MSB bits of all bytes in a xmm reg to an int reg
INST3( movq, "movq" , 0, IUM_WR, 0, 0, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E))
INST3( movsdsse2, "movsd" , 0, IUM_WR, 0, 0, SSEDBL(0x11), BAD_CODE, SSEDBL(0x10))
@@ -317,6 +318,8 @@ INST3( insertps, "insertps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SS
INST3( pcmpeqq, "pcmpeqq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x29)) // Packed compare 64-bit integers for equality
INST3( pcmpgtq, "pcmpgtq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x37)) // Packed compare 64-bit integers for equality
INST3( pmulld, "pmulld" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x40)) // Packed multiply 32 bit unsigned integers and store lower 32 bits of each result
+INST3( ptest, "ptest" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x17)) // Packed logical compare
+INST3( phaddd, "phaddd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x02)) // Packed horizontal add
INST3(LAST_SSE4_INSTRUCTION, "LAST_SSE4_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
INST3(FIRST_AVX_INSTRUCTION, "FIRST_AVX_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
@@ -367,25 +370,25 @@ INST2(sar_N , "sar" , 0, IUM_RW, 0, 1, 0x0038C0, 0x0038C0)
INST1(r_movsb, "rep movsb" , 0, IUM_RD, 0, 0, 0x00A4F3)
INST1(r_movsd, "rep movsd" , 0, IUM_RD, 0, 0, 0x00A5F3)
-#ifndef LEGACY_BACKEND
+#if !defined(LEGACY_BACKEND) && defined(_TARGET_AMD64_)
INST1(r_movsq, "rep movsq" , 0, IUM_RD, 0, 0, 0xF3A548)
-#endif // !LEGACY_BACKEND
+#endif // !LEGACY_BACKEND || !defined(_TARGET_AMD64_)
INST1(movsb , "movsb" , 0, IUM_RD, 0, 0, 0x0000A4)
INST1(movsd , "movsd" , 0, IUM_RD, 0, 0, 0x0000A5)
-#ifndef LEGACY_BACKEND
+#if !defined(LEGACY_BACKEND) && defined(_TARGET_AMD64_)
INST1(movsq, "movsq" , 0, IUM_RD, 0, 0, 0x00A548)
-#endif // !LEGACY_BACKEND
+#endif // !LEGACY_BACKEND || !defined(_TARGET_AMD64_)
INST1(r_stosb, "rep stosb" , 0, IUM_RD, 0, 0, 0x00AAF3)
INST1(r_stosd, "rep stosd" , 0, IUM_RD, 0, 0, 0x00ABF3)
-#ifndef LEGACY_BACKEND
+#if !defined(LEGACY_BACKEND) && defined(_TARGET_AMD64_)
INST1(r_stosq, "rep stosq" , 0, IUM_RD, 0, 0, 0xF3AB48)
-#endif // !LEGACY_BACKEND
+#endif // !LEGACY_BACKEND || !defined(_TARGET_AMD64_)
INST1(stosb, "stosb" , 0, IUM_RD, 0, 0, 0x0000AA)
INST1(stosd, "stosd" , 0, IUM_RD, 0, 0, 0x0000AB)
-#ifndef LEGACY_BACKEND
+#if !defined(LEGACY_BACKEND) && defined(_TARGET_AMD64_)
INST1(stosq, "stosq" , 0, IUM_RD, 0, 0, 0x00AB48)
-#endif // !LEGACY_BACKEND
+#endif // !LEGACY_BACKEND || !defined(_TARGET_AMD64_)
INST1(int3 , "int3" , 0, IUM_RD, 0, 0, 0x0000CC)
INST1(nop , "nop" , 0, IUM_RD, 0, 0, 0x000090)
diff --git a/src/jit/jit.h b/src/jit/jit.h
index 7bf5cd4051..220294f825 100644
--- a/src/jit/jit.h
+++ b/src/jit/jit.h
@@ -28,6 +28,7 @@
#ifdef _MSC_VER
// These don't seem useful, so turning them off is no big deal
+#pragma warning(disable : 4065) // "switch statement contains 'default' but no 'case' labels" (happens due to #ifdefs)
#pragma warning(disable : 4510) // can't generate default constructor
#pragma warning(disable : 4511) // can't generate copy constructor
#pragma warning(disable : 4512) // can't generate assignment constructor
@@ -209,6 +210,7 @@
#include "corhdr.h"
#include "corjit.h"
+#include "jitee.h"
#define __OPERATOR_NEW_INLINE 1 // indicate that I will define these
#define __PLACEMENT_NEW_INLINE // don't bring in the global placement new, it is easy to make a mistake
@@ -259,6 +261,15 @@ struct CLRConfig
#define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(x)
#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || (defined(_TARGET_X86_) && !defined(LEGACY_BACKEND))
+#define FEATURE_PUT_STRUCT_ARG_STK 1
+#define PUT_STRUCT_ARG_STK_ONLY_ARG(x) , x
+#define PUT_STRUCT_ARG_STK_ONLY(x) x
+#else // !(defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)|| (defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)))
+#define PUT_STRUCT_ARG_STK_ONLY_ARG(x)
+#define PUT_STRUCT_ARG_STK_ONLY(x)
+#endif // !(defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)|| (defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)))
+
#if defined(UNIX_AMD64_ABI)
#define UNIX_AMD64_ABI_ONLY_ARG(x) , x
#define UNIX_AMD64_ABI_ONLY(x) x
@@ -377,17 +388,6 @@ typedef ptrdiff_t ssize_t;
/*****************************************************************************/
-// Debugging support is ON by default. Can be turned OFF by
-// adding /DDEBUGGING_SUPPORT=0 on the command line.
-
-#ifndef DEBUGGING_SUPPORT
-#define DEBUGGING_SUPPORT
-#elif !DEBUGGING_SUPPORT
-#undef DEBUGGING_SUPPORT
-#endif
-
-/*****************************************************************************/
-
// Late disassembly is OFF by default. Can be turned ON by
// adding /DLATE_DISASM=1 on the command line.
// Always OFF in the non-debug version
@@ -465,6 +465,8 @@ typedef ptrdiff_t ssize_t;
#define MEASURE_NODE_SIZE 0 // Collect stats about GenTree node allocations.
#define MEASURE_PTRTAB_SIZE 0 // Collect stats about GC pointer table allocations.
#define EMITTER_STATS 0 // Collect stats on the emitter.
+#define NODEBASH_STATS 0 // Collect stats on changed gtOper values in GenTree's.
+#define COUNT_AST_OPERS 0 // Display use counts for GenTree operators.
#define VERBOSE_SIZES 0 // Always display GC info sizes. If set, DISPLAY_SIZES must also be set.
#define VERBOSE_VERIFY 0 // Dump additional information when verifying code. Useful to debug verification bugs.
@@ -472,9 +474,30 @@ typedef ptrdiff_t ssize_t;
#ifdef DEBUG
#define MEASURE_MEM_ALLOC 1 // Collect memory allocation stats.
#define LOOP_HOIST_STATS 1 // Collect loop hoisting stats.
+#define TRACK_LSRA_STATS 1 // Collect LSRA stats
#else
#define MEASURE_MEM_ALLOC 0 // You can set this to 1 to get memory stats in retail, as well
#define LOOP_HOIST_STATS 0 // You can set this to 1 to get loop hoist stats in retail, as well
+#define TRACK_LSRA_STATS 0 // You can set this to 1 to get LSRA stats in retail, as well
+#endif
+
+// Timing calls to clr.dll is only available under certain conditions.
+#ifndef FEATURE_JIT_METHOD_PERF
+#define MEASURE_CLRAPI_CALLS 0 // Can't time these calls without METHOD_PERF.
+#endif
+#ifdef DEBUG
+#define MEASURE_CLRAPI_CALLS 0 // No point in measuring DEBUG code.
+#endif
+#if !defined(_HOST_X86_) && !defined(_HOST_AMD64_)
+#define MEASURE_CLRAPI_CALLS 0 // Cycle counters only hooked up on x86/x64.
+#endif
+#if !defined(_MSC_VER) && !defined(__clang__)
+#define MEASURE_CLRAPI_CALLS 0 // Only know how to do this with VC and Clang.
+#endif
+
+// If none of the above set the flag to 0, it's available.
+#ifndef MEASURE_CLRAPI_CALLS
+#define MEASURE_CLRAPI_CALLS 0 // Set to 1 to measure time in ICorJitInfo calls.
#endif
/*****************************************************************************/
@@ -686,7 +709,7 @@ inline size_t unsigned_abs(ssize_t x)
/*****************************************************************************/
-#if CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_NODE_SIZE
+#if CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_NODE_SIZE || MEASURE_MEM_ALLOC
class Histogram
{
@@ -807,7 +830,7 @@ extern int jitNativeCode(CORINFO_METHOD_HANDLE methodHnd,
CORINFO_METHOD_INFO* methodInfo,
void** methodCodePtr,
ULONG* methodCodeSize,
- CORJIT_FLAGS* compileFlags,
+ JitFlags* compileFlags,
void* inlineInfoPtr);
#ifdef _HOST_64BIT_
diff --git a/src/jit/jit.settings.targets b/src/jit/jit.settings.targets
index 9dbc225843..6c0474a00c 100644
--- a/src/jit/jit.settings.targets
+++ b/src/jit/jit.settings.targets
@@ -86,10 +86,11 @@
<CppCompile Include="..\jitconfig.cpp" />
<CppCompile Include="..\hostallocator.cpp" />
<CppCompile Include="..\objectalloc.cpp" />
- <CppCompile Inlcude="..\sideeffects.cpp" />
+ <CppCompile Include="..\sideeffects.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='True'" Include="..\CodeGenLegacy.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\Lower.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\LSRA.cpp" />
+ <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\codegenlinear.cpp" />
</ItemGroup>
<ItemGroup Condition="'$(TargetArch)'=='i386'">
<CppCompile Include="..\emitXArch.cpp" />
diff --git a/src/jit/jitconfig.h b/src/jit/jitconfig.h
index d5b4e30796..9186e12982 100644
--- a/src/jit/jitconfig.h
+++ b/src/jit/jitconfig.h
@@ -5,6 +5,8 @@
#ifndef _JITCONFIG_H_
#define _JITCONFIG_H_
+#include "switches.h"
+
struct CORINFO_SIG_INFO;
class ICorJitHost;
diff --git a/src/jit/jitconfigvalues.h b/src/jit/jitconfigvalues.h
index 6579817249..39a2505246 100644
--- a/src/jit/jitconfigvalues.h
+++ b/src/jit/jitconfigvalues.h
@@ -17,10 +17,10 @@ CONFIG_INTEGER(DebugBreakOnVerificationFailure, W("DebugBreakOnVerificationFailu
// verification failure
CONFIG_INTEGER(DiffableDasm, W("JitDiffableDasm"), 0) // Make the disassembly diff-able
CONFIG_INTEGER(DisplayLoopHoistStats, W("JitLoopHoistStats"), 0) // Display JIT loop hoisting statistics
-CONFIG_INTEGER(DisplayMemStats, W("JitMemStats"), 0) // Display JIT memory usage statistics
-CONFIG_INTEGER(DumpJittedMethods, W("DumpJittedMethods"), 0) // Prints all jitted methods to the console
-CONFIG_INTEGER(EnablePCRelAddr, W("JitEnablePCRelAddr"), 1) // Whether absolute addr be encoded as PC-rel offset by
- // RyuJIT where possible
+CONFIG_INTEGER(DisplayLsraStats, W("JitLsraStats"), 0) // Display JIT Linear Scan Register Allocator statistics
+CONFIG_INTEGER(DumpJittedMethods, W("DumpJittedMethods"), 0) // Prints all jitted methods to the console
+CONFIG_INTEGER(EnablePCRelAddr, W("JitEnablePCRelAddr"), 1) // Whether absolute addr be encoded as PC-rel offset by
+ // RyuJIT where possible
CONFIG_INTEGER(InterpreterFallback, W("InterpreterFallback"), 0) // Fallback to the interpreter when the JIT compiler
// fails
CONFIG_INTEGER(JitAssertOnMaxRAPasses, W("JitAssertOnMaxRAPasses"), 0)
@@ -154,10 +154,12 @@ CONFIG_METHODSET(JitNoProcedureSplittingEH, W("JitNoProcedureSplittingEH")) // D
// exception handling
CONFIG_METHODSET(JitStressOnly, W("JitStressOnly")) // Internal Jit stress mode: stress only the specified method(s)
CONFIG_METHODSET(JitUnwindDump, W("JitUnwindDump")) // Dump the unwind codes for the method
-CONFIG_METHODSET(NgenDisasm, W("NgenDisasm")) // Same as JitDisasm, but for ngen
-CONFIG_METHODSET(NgenDump, W("NgenDump")) // Same as JitDump, but for ngen
-CONFIG_METHODSET(NgenDumpIR, W("NgenDumpIR")) // Same as JitDumpIR, but for ngen
-CONFIG_METHODSET(NgenEHDump, W("NgenEHDump")) // Dump the EH table for the method, as reported to the VM
+CONFIG_METHODSET(JitOptRepeat, W("JitOptRepeat")) // Runs optimizer multiple times on the method
+CONFIG_INTEGER(JitOptRepeatCount, W("JitOptRepeatCount"), 2) // Number of times to repeat opts when repeating
+CONFIG_METHODSET(NgenDisasm, W("NgenDisasm")) // Same as JitDisasm, but for ngen
+CONFIG_METHODSET(NgenDump, W("NgenDump")) // Same as JitDump, but for ngen
+CONFIG_METHODSET(NgenDumpIR, W("NgenDumpIR")) // Same as JitDumpIR, but for ngen
+CONFIG_METHODSET(NgenEHDump, W("NgenEHDump")) // Dump the EH table for the method, as reported to the VM
CONFIG_METHODSET(NgenGCDump, W("NgenGCDump"))
CONFIG_METHODSET(NgenUnwindDump, W("NgenUnwindDump")) // Dump the unwind codes for the method
CONFIG_STRING(JitDumpFg, W("JitDumpFg")) // Dumps Xml/Dot Flowgraph for specified method
@@ -186,6 +188,10 @@ CONFIG_STRING(NgenDumpIRFormat, W("NgenDumpIRFormat")) // Same as JitD
CONFIG_STRING(NgenDumpIRPhase, W("NgenDumpIRPhase")) // Same as JitDumpIRPhase, but for ngen
#endif // defined(DEBUG)
+#ifdef FEATURE_ENABLE_NO_RANGE_CHECKS
+CONFIG_INTEGER(JitNoRangeChks, W("JitNoRngChks"), 0) // If 1, don't generate range checks
+#endif
+
// AltJitAssertOnNYI should be 0 on targets where JIT is under developement or bring up stage, so as to facilitate
// fallback to main JIT on hitting a NYI.
#if defined(_TARGET_ARM64_) || defined(_TARGET_X86_)
@@ -194,11 +200,17 @@ CONFIG_INTEGER(AltJitAssertOnNYI, W("AltJitAssertOnNYI"), 0) // Controls the Alt
CONFIG_INTEGER(AltJitAssertOnNYI, W("AltJitAssertOnNYI"), 1) // Controls the AltJit behavior of NYI stuff
#endif // defined(_TARGET_ARM64_) || defined(_TARGET_X86_)
+#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
+CONFIG_INTEGER(EnableSSE3_4, W("EnableSSE3_4"), 1) // Enable SSE3, SSSE3, SSE 4.1 and 4.2 instruction set as default
+#endif
+
#if defined(_TARGET_AMD64_)
-CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 1) // Enable AVX instruction set for wide operations as default
-#else // !defined(_TARGET_AMD64_)
+CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 1) // Enable AVX instruction set for wide operations as default.
+// When both AVX and SSE3_4 are set, we will use the most capable instruction set available
+// which will prefer AVX over SSE3/4.
+#else // !defined(_TARGET_AMD64_)
CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 0) // Enable AVX instruction set for wide operations as default
-#endif // defined(_TARGET_AMD64_)
+#endif // defined(_TARGET_AMD64_)
#if !defined(DEBUG) && !defined(_DEBUG)
CONFIG_INTEGER(JitEnableNoWayAssert, W("JitEnableNoWayAssert"), 0)
@@ -206,9 +218,17 @@ CONFIG_INTEGER(JitEnableNoWayAssert, W("JitEnableNoWayAssert"), 0)
CONFIG_INTEGER(JitEnableNoWayAssert, W("JitEnableNoWayAssert"), 1)
#endif // !defined(DEBUG) && !defined(_DEBUG)
+// The following should be wrapped inside "#if MEASURE_MEM_ALLOC / #endif", but
+// some files include this one without bringing in the definitions from "jit.h"
+// so we don't always know what the "true" value of that flag should be. For now
+// we take the easy way out and always include the flag, even in release builds
+// (normally MEASURE_MEM_ALLOC is off for release builds but if it's toggled on
+// for release in "jit.h" the flag would be missing for some includers).
+// TODO-Cleanup: need to make 'MEASURE_MEM_ALLOC' well-defined here at all times.
+CONFIG_INTEGER(DisplayMemStats, W("JitMemStats"), 0) // Display JIT memory usage statistics
+
CONFIG_INTEGER(JitAggressiveInlining, W("JitAggressiveInlining"), 0) // Aggressive inlining of all methods
-CONFIG_INTEGER(JitELTHookEnabled, W("JitELTHookEnabled"), 0) // On ARM, setting this will emit Enter/Leave/TailCall
- // callbacks
+CONFIG_INTEGER(JitELTHookEnabled, W("JitELTHookEnabled"), 0) // If 1, emit Enter/Leave/TailCall callbacks
CONFIG_INTEGER(JitInlineSIMDMultiplier, W("JitInlineSIMDMultiplier"), 3)
#if defined(FEATURE_ENABLE_NO_RANGE_CHECKS)
@@ -242,6 +262,8 @@ CONFIG_INTEGER(JitInlineLimit, W("JitInlineLimit"), -1)
CONFIG_INTEGER(JitInlinePolicyDiscretionary, W("JitInlinePolicyDiscretionary"), 0)
CONFIG_INTEGER(JitInlinePolicyFull, W("JitInlinePolicyFull"), 0)
CONFIG_INTEGER(JitInlinePolicySize, W("JitInlinePolicySize"), 0)
+CONFIG_INTEGER(JitInlinePolicyRandom, W("JitInlinePolicyRandom"), 0) // nozero enables; value is the external random
+ // seed
CONFIG_INTEGER(JitInlinePolicyReplay, W("JitInlinePolicyReplay"), 0)
CONFIG_STRING(JitNoInlineRange, W("JitNoInlineRange"))
CONFIG_STRING(JitInlineReplayFile, W("JitInlineReplayFile"))
@@ -250,6 +272,8 @@ CONFIG_STRING(JitInlineReplayFile, W("JitInlineReplayFile"))
CONFIG_INTEGER(JitInlinePolicyLegacy, W("JitInlinePolicyLegacy"), 0)
CONFIG_INTEGER(JitInlinePolicyModel, W("JitInlinePolicyModel"), 0)
+CONFIG_INTEGER(JitEECallTimingInfo, W("JitEECallTimingInfo"), 0)
+
#undef CONFIG_INTEGER
#undef CONFIG_STRING
#undef CONFIG_METHODSET
diff --git a/src/jit/jitee.h b/src/jit/jitee.h
new file mode 100644
index 0000000000..f9bd83f5bb
--- /dev/null
+++ b/src/jit/jitee.h
@@ -0,0 +1,264 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// This class wraps the CORJIT_FLAGS type in the JIT-EE interface (in corjit.h) such that the JIT can
+// build with either the old flags (COR_JIT_EE_VERSION <= 460) or the new flags (COR_JIT_EE_VERSION > 460).
+// It actually is exactly the same as the new definition, and must be kept up-to-date with the new definition.
+// When built against an old JIT-EE interface, the old flags are converted into this structure.
+class JitFlags
+{
+public:
+ // clang-format off
+ enum JitFlag
+ {
+ JIT_FLAG_SPEED_OPT = 0,
+ JIT_FLAG_SIZE_OPT = 1,
+ JIT_FLAG_DEBUG_CODE = 2, // generate "debuggable" code (no code-mangling optimizations)
+ JIT_FLAG_DEBUG_EnC = 3, // We are in Edit-n-Continue mode
+ JIT_FLAG_DEBUG_INFO = 4, // generate line and local-var info
+ JIT_FLAG_MIN_OPT = 5, // disable all jit optimizations (not necesarily debuggable code)
+ JIT_FLAG_GCPOLL_CALLS = 6, // Emit calls to JIT_POLLGC for thread suspension.
+ JIT_FLAG_MCJIT_BACKGROUND = 7, // Calling from multicore JIT background thread, do not call JitComplete
+
+ #if defined(_TARGET_X86_)
+
+ JIT_FLAG_PINVOKE_RESTORE_ESP = 8, // Restore ESP after returning from inlined PInvoke
+ JIT_FLAG_TARGET_P4 = 9,
+ JIT_FLAG_USE_FCOMI = 10, // Generated code may use fcomi(p) instruction
+ JIT_FLAG_USE_CMOV = 11, // Generated code may use cmov instruction
+ JIT_FLAG_USE_SSE2 = 12, // Generated code may use SSE-2 instructions
+
+ #else // !defined(_TARGET_X86_)
+
+ JIT_FLAG_UNUSED1 = 8,
+ JIT_FLAG_UNUSED2 = 9,
+ JIT_FLAG_UNUSED3 = 10,
+ JIT_FLAG_UNUSED4 = 11,
+ JIT_FLAG_UNUSED5 = 12,
+
+ #endif // !defined(_TARGET_X86_)
+
+ #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
+
+ JIT_FLAG_USE_SSE3_4 = 13,
+ JIT_FLAG_USE_AVX = 14,
+ JIT_FLAG_USE_AVX2 = 15,
+ JIT_FLAG_USE_AVX_512 = 16,
+ JIT_FLAG_FEATURE_SIMD = 17,
+
+ #else // !defined(_TARGET_X86_) && !defined(_TARGET_AMD64_)
+
+ JIT_FLAG_UNUSED6 = 13,
+ JIT_FLAG_UNUSED7 = 14,
+ JIT_FLAG_UNUSED8 = 15,
+ JIT_FLAG_UNUSED9 = 16,
+ JIT_FLAG_UNUSED10 = 17,
+
+ #endif // !defined(_TARGET_X86_) && !defined(_TARGET_AMD64_)
+
+ JIT_FLAG_MAKEFINALCODE = 18, // Use the final code generator, i.e., not the interpreter.
+ JIT_FLAG_READYTORUN = 19, // Use version-resilient code generation
+ JIT_FLAG_PROF_ENTERLEAVE = 20, // Instrument prologues/epilogues
+ JIT_FLAG_PROF_REJIT_NOPS = 21, // Insert NOPs to ensure code is re-jitable
+ JIT_FLAG_PROF_NO_PINVOKE_INLINE = 22, // Disables PInvoke inlining
+ JIT_FLAG_SKIP_VERIFICATION = 23, // (lazy) skip verification - determined without doing a full resolve. See comment below
+ JIT_FLAG_PREJIT = 24, // jit or prejit is the execution engine.
+ JIT_FLAG_RELOC = 25, // Generate relocatable code
+ JIT_FLAG_IMPORT_ONLY = 26, // Only import the function
+ JIT_FLAG_IL_STUB = 27, // method is an IL stub
+ JIT_FLAG_PROCSPLIT = 28, // JIT should separate code into hot and cold sections
+ JIT_FLAG_BBINSTR = 29, // Collect basic block profile information
+ JIT_FLAG_BBOPT = 30, // Optimize method based on profile information
+ JIT_FLAG_FRAMED = 31, // All methods have an EBP frame
+ JIT_FLAG_ALIGN_LOOPS = 32, // add NOPs before loops to align them at 16 byte boundaries
+ JIT_FLAG_PUBLISH_SECRET_PARAM = 33, // JIT must place stub secret param into local 0. (used by IL stubs)
+ JIT_FLAG_GCPOLL_INLINE = 34, // JIT must inline calls to GCPoll when possible
+ JIT_FLAG_SAMPLING_JIT_BACKGROUND = 35, // JIT is being invoked as a result of stack sampling for hot methods in the background
+ JIT_FLAG_USE_PINVOKE_HELPERS = 36, // The JIT should use the PINVOKE_{BEGIN,END} helpers instead of emitting inline transitions
+ JIT_FLAG_REVERSE_PINVOKE = 37, // The JIT should insert REVERSE_PINVOKE_{ENTER,EXIT} helpers into method prolog/epilog
+ JIT_FLAG_DESKTOP_QUIRKS = 38, // The JIT should generate desktop-quirk-compatible code
+ };
+ // clang-format on
+
+ JitFlags() : m_jitFlags(0)
+ {
+ // empty
+ }
+
+ // Convenience constructor to set exactly one flags.
+ JitFlags(JitFlag flag) : m_jitFlags(0)
+ {
+ Set(flag);
+ }
+
+ void Reset()
+ {
+ m_jitFlags = 0;
+ }
+
+ void Set(JitFlag flag)
+ {
+ m_jitFlags |= 1ULL << (unsigned __int64)flag;
+ }
+
+ void Clear(JitFlag flag)
+ {
+ m_jitFlags &= ~(1ULL << (unsigned __int64)flag);
+ }
+
+ bool IsSet(JitFlag flag) const
+ {
+ return (m_jitFlags & (1ULL << (unsigned __int64)flag)) != 0;
+ }
+
+ void Add(const JitFlags& other)
+ {
+ m_jitFlags |= other.m_jitFlags;
+ }
+
+ void Remove(const JitFlags& other)
+ {
+ m_jitFlags &= ~other.m_jitFlags;
+ }
+
+ bool IsEmpty() const
+ {
+ return m_jitFlags == 0;
+ }
+
+#if COR_JIT_EE_VERSION <= 460
+
+ void SetFromOldFlags(unsigned corJitFlags, unsigned corJitFlags2)
+ {
+ Reset();
+
+#define CONVERT_OLD_FLAG(oldf, newf) \
+ if ((corJitFlags & (oldf)) != 0) \
+ this->Set(JitFlags::newf);
+#define CONVERT_OLD_FLAG2(oldf, newf) \
+ if ((corJitFlags2 & (oldf)) != 0) \
+ this->Set(JitFlags::newf);
+
+ CONVERT_OLD_FLAG(CORJIT_FLG_SPEED_OPT, JIT_FLAG_SPEED_OPT)
+ CONVERT_OLD_FLAG(CORJIT_FLG_SIZE_OPT, JIT_FLAG_SIZE_OPT)
+ CONVERT_OLD_FLAG(CORJIT_FLG_DEBUG_CODE, JIT_FLAG_DEBUG_CODE)
+ CONVERT_OLD_FLAG(CORJIT_FLG_DEBUG_EnC, JIT_FLAG_DEBUG_EnC)
+ CONVERT_OLD_FLAG(CORJIT_FLG_DEBUG_INFO, JIT_FLAG_DEBUG_INFO)
+ CONVERT_OLD_FLAG(CORJIT_FLG_MIN_OPT, JIT_FLAG_MIN_OPT)
+ CONVERT_OLD_FLAG(CORJIT_FLG_GCPOLL_CALLS, JIT_FLAG_GCPOLL_CALLS)
+ CONVERT_OLD_FLAG(CORJIT_FLG_MCJIT_BACKGROUND, JIT_FLAG_MCJIT_BACKGROUND)
+
+#if defined(_TARGET_X86_)
+
+ CONVERT_OLD_FLAG(CORJIT_FLG_PINVOKE_RESTORE_ESP, JIT_FLAG_PINVOKE_RESTORE_ESP)
+ CONVERT_OLD_FLAG(CORJIT_FLG_TARGET_P4, JIT_FLAG_TARGET_P4)
+ CONVERT_OLD_FLAG(CORJIT_FLG_USE_FCOMI, JIT_FLAG_USE_FCOMI)
+ CONVERT_OLD_FLAG(CORJIT_FLG_USE_CMOV, JIT_FLAG_USE_CMOV)
+ CONVERT_OLD_FLAG(CORJIT_FLG_USE_SSE2, JIT_FLAG_USE_SSE2)
+
+#elif defined(_TARGET_AMD64_)
+
+ CONVERT_OLD_FLAG(CORJIT_FLG_USE_SSE3_4, JIT_FLAG_USE_SSE3_4)
+ CONVERT_OLD_FLAG(CORJIT_FLG_USE_AVX, JIT_FLAG_USE_AVX)
+ CONVERT_OLD_FLAG(CORJIT_FLG_USE_AVX2, JIT_FLAG_USE_AVX2)
+ CONVERT_OLD_FLAG(CORJIT_FLG_USE_AVX_512, JIT_FLAG_USE_AVX_512)
+ CONVERT_OLD_FLAG(CORJIT_FLG_FEATURE_SIMD, JIT_FLAG_FEATURE_SIMD)
+
+#endif // !defined(_TARGET_X86_) && !defined(_TARGET_AMD64_)
+
+ CONVERT_OLD_FLAG(CORJIT_FLG_MAKEFINALCODE, JIT_FLAG_MAKEFINALCODE)
+ CONVERT_OLD_FLAG(CORJIT_FLG_READYTORUN, JIT_FLAG_READYTORUN)
+ CONVERT_OLD_FLAG(CORJIT_FLG_PROF_ENTERLEAVE, JIT_FLAG_PROF_ENTERLEAVE)
+ CONVERT_OLD_FLAG(CORJIT_FLG_PROF_REJIT_NOPS, JIT_FLAG_PROF_REJIT_NOPS)
+ CONVERT_OLD_FLAG(CORJIT_FLG_PROF_NO_PINVOKE_INLINE, JIT_FLAG_PROF_NO_PINVOKE_INLINE)
+ CONVERT_OLD_FLAG(CORJIT_FLG_SKIP_VERIFICATION, JIT_FLAG_SKIP_VERIFICATION)
+ CONVERT_OLD_FLAG(CORJIT_FLG_PREJIT, JIT_FLAG_PREJIT)
+ CONVERT_OLD_FLAG(CORJIT_FLG_RELOC, JIT_FLAG_RELOC)
+ CONVERT_OLD_FLAG(CORJIT_FLG_IMPORT_ONLY, JIT_FLAG_IMPORT_ONLY)
+ CONVERT_OLD_FLAG(CORJIT_FLG_IL_STUB, JIT_FLAG_IL_STUB)
+ CONVERT_OLD_FLAG(CORJIT_FLG_PROCSPLIT, JIT_FLAG_PROCSPLIT)
+ CONVERT_OLD_FLAG(CORJIT_FLG_BBINSTR, JIT_FLAG_BBINSTR)
+ CONVERT_OLD_FLAG(CORJIT_FLG_BBOPT, JIT_FLAG_BBOPT)
+ CONVERT_OLD_FLAG(CORJIT_FLG_FRAMED, JIT_FLAG_FRAMED)
+ CONVERT_OLD_FLAG(CORJIT_FLG_ALIGN_LOOPS, JIT_FLAG_ALIGN_LOOPS)
+ CONVERT_OLD_FLAG(CORJIT_FLG_PUBLISH_SECRET_PARAM, JIT_FLAG_PUBLISH_SECRET_PARAM)
+ CONVERT_OLD_FLAG(CORJIT_FLG_GCPOLL_INLINE, JIT_FLAG_GCPOLL_INLINE)
+
+ CONVERT_OLD_FLAG2(CORJIT_FLG2_SAMPLING_JIT_BACKGROUND, JIT_FLAG_SAMPLING_JIT_BACKGROUND)
+
+#undef CONVERT_OLD_FLAG
+#undef CONVERT_OLD_FLAG2
+ }
+
+#else // COR_JIT_EE_VERSION > 460
+
+ void SetFromFlags(CORJIT_FLAGS flags)
+ {
+ // We don't want to have to check every one, so we assume it is exactly the same values as the JitFlag
+ // values defined in this type.
+ m_jitFlags = flags.GetFlagsRaw();
+
+ C_ASSERT(sizeof(m_jitFlags) == sizeof(CORJIT_FLAGS));
+
+#define FLAGS_EQUAL(a, b) C_ASSERT((unsigned)(a) == (unsigned)(b))
+
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_SPEED_OPT, JIT_FLAG_SPEED_OPT);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_SIZE_OPT, JIT_FLAG_SIZE_OPT);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_DEBUG_CODE, JIT_FLAG_DEBUG_CODE);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_DEBUG_EnC, JIT_FLAG_DEBUG_EnC);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_DEBUG_INFO, JIT_FLAG_DEBUG_INFO);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_MIN_OPT, JIT_FLAG_MIN_OPT);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_GCPOLL_CALLS, JIT_FLAG_GCPOLL_CALLS);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_MCJIT_BACKGROUND, JIT_FLAG_MCJIT_BACKGROUND);
+
+#if defined(_TARGET_X86_)
+
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_PINVOKE_RESTORE_ESP, JIT_FLAG_PINVOKE_RESTORE_ESP);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_TARGET_P4, JIT_FLAG_TARGET_P4);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_USE_FCOMI, JIT_FLAG_USE_FCOMI);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_USE_CMOV, JIT_FLAG_USE_CMOV);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE2, JIT_FLAG_USE_SSE2);
+
+#endif
+
+#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
+
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE3_4, JIT_FLAG_USE_SSE3_4);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_USE_AVX, JIT_FLAG_USE_AVX);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_USE_AVX2, JIT_FLAG_USE_AVX2);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_USE_AVX_512, JIT_FLAG_USE_AVX_512);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_FEATURE_SIMD, JIT_FLAG_FEATURE_SIMD);
+
+#endif
+
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_MAKEFINALCODE, JIT_FLAG_MAKEFINALCODE);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_READYTORUN, JIT_FLAG_READYTORUN);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_PROF_ENTERLEAVE, JIT_FLAG_PROF_ENTERLEAVE);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_PROF_REJIT_NOPS, JIT_FLAG_PROF_REJIT_NOPS);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_PROF_NO_PINVOKE_INLINE, JIT_FLAG_PROF_NO_PINVOKE_INLINE);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_SKIP_VERIFICATION, JIT_FLAG_SKIP_VERIFICATION);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_PREJIT, JIT_FLAG_PREJIT);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_RELOC, JIT_FLAG_RELOC);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_IMPORT_ONLY, JIT_FLAG_IMPORT_ONLY);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_IL_STUB, JIT_FLAG_IL_STUB);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_PROCSPLIT, JIT_FLAG_PROCSPLIT);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_BBINSTR, JIT_FLAG_BBINSTR);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_BBOPT, JIT_FLAG_BBOPT);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_FRAMED, JIT_FLAG_FRAMED);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_ALIGN_LOOPS, JIT_FLAG_ALIGN_LOOPS);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_PUBLISH_SECRET_PARAM, JIT_FLAG_PUBLISH_SECRET_PARAM);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_GCPOLL_INLINE, JIT_FLAG_GCPOLL_INLINE);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_SAMPLING_JIT_BACKGROUND, JIT_FLAG_SAMPLING_JIT_BACKGROUND);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_USE_PINVOKE_HELPERS, JIT_FLAG_USE_PINVOKE_HELPERS);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_REVERSE_PINVOKE, JIT_FLAG_REVERSE_PINVOKE);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_DESKTOP_QUIRKS, JIT_FLAG_DESKTOP_QUIRKS);
+
+#undef FLAGS_EQUAL
+ }
+
+#endif // COR_JIT_EE_VERSION > 460
+
+private:
+ unsigned __int64 m_jitFlags;
+};
diff --git a/src/jit/jiteh.cpp b/src/jit/jiteh.cpp
index b20c2f8a9a..4b3ceaecf6 100644
--- a/src/jit/jiteh.cpp
+++ b/src/jit/jiteh.cpp
@@ -2979,7 +2979,7 @@ void Compiler::dispOutgoingEHClause(unsigned num, const CORINFO_EH_CLAUSE& claus
// Note: the flags field is kind of weird. It should be compared for equality
// to determine the type of clause, even though it looks like a bitfield. In
// Particular, CORINFO_EH_CLAUSE_NONE is zero, so you can "&" to check it.
- // You do need to mask off the bits, though, because COR_ILEXCEPTION_CLAUSE_DUPLICATED
+ // You do need to mask off the bits, though, because CORINFO_EH_CLAUSE_DUPLICATE
// is and'ed in.
const DWORD CORINFO_EH_CLAUSE_TYPE_MASK = 0x7;
switch (clause.Flags & CORINFO_EH_CLAUSE_TYPE_MASK)
@@ -3013,15 +3013,19 @@ void Compiler::dispOutgoingEHClause(unsigned num, const CORINFO_EH_CLAUSE& claus
}
if ((clause.TryOffset == clause.TryLength) && (clause.TryOffset == clause.HandlerOffset) &&
- ((clause.Flags & (COR_ILEXCEPTION_CLAUSE_DUPLICATED | COR_ILEXCEPTION_CLAUSE_FINALLY)) ==
- (COR_ILEXCEPTION_CLAUSE_DUPLICATED | COR_ILEXCEPTION_CLAUSE_FINALLY)))
+ ((clause.Flags & (CORINFO_EH_CLAUSE_DUPLICATE | CORINFO_EH_CLAUSE_FINALLY)) ==
+ (CORINFO_EH_CLAUSE_DUPLICATE | CORINFO_EH_CLAUSE_FINALLY)))
{
printf(" cloned finally");
}
- else if (clause.Flags & COR_ILEXCEPTION_CLAUSE_DUPLICATED)
+ else if (clause.Flags & CORINFO_EH_CLAUSE_DUPLICATE)
{
printf(" duplicated");
}
+ else if (clause.Flags & CORINFO_EH_CLAUSE_SAMETRY)
+ {
+ printf(" same try");
+ }
printf("\n");
}
diff --git a/src/jit/jitgcinfo.h b/src/jit/jitgcinfo.h
index b93ac3376c..3f8d8afe88 100644
--- a/src/jit/jitgcinfo.h
+++ b/src/jit/jitgcinfo.h
@@ -380,6 +380,9 @@ private:
public:
void gcUpdateForRegVarMove(regMaskTP srcMask, regMaskTP dstMask, LclVarDsc* varDsc);
#endif // !LEGACY_BACKEND
+
+private:
+ ReturnKind getReturnKind();
};
inline unsigned char encodeUnsigned(BYTE* dest, unsigned value)
diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp
index 369c96322d..ea9c573a02 100644
--- a/src/jit/lclvars.cpp
+++ b/src/jit/lclvars.cpp
@@ -385,8 +385,9 @@ void Compiler::lvaInitThisPtr(InitVarDscInfo* varDscInfo)
if (simdBaseType != TYP_UNKNOWN)
{
assert(varTypeIsSIMD(type));
- varDsc->lvSIMDType = true;
- varDsc->lvBaseType = simdBaseType;
+ varDsc->lvSIMDType = true;
+ varDsc->lvBaseType = simdBaseType;
+ varDsc->lvExactSize = genTypeSize(type);
}
}
#endif // FEATURE_SIMD
@@ -1448,12 +1449,16 @@ void Compiler::lvaCanPromoteStructType(CORINFO_CLASS_HANDLE typeHnd,
#if 1 // TODO-Cleanup: Consider removing this entire #if block in the future
- // This method has two callers. The one in Importer.cpp passes sortFields == false
- // and the other passes sortFields == true.
- // This is a workaround that leave the inlining behavior the same and before while still
- // performing extra struct promotions when compiling the method.
- //
+// This method has two callers. The one in Importer.cpp passes sortFields == false
+// and the other passes sortFields == true.
+// This is a workaround that leaves the inlining behavior the same as before while still
+// performing extra struct promotions when compiling the method.
+//
+// The x86 legacy back-end can't handle the more general RyuJIT struct promotion (notably structs
+// with holes), in genPushArgList(), so in that case always check for custom layout.
+#if FEATURE_FIXED_OUT_ARGS || !defined(LEGACY_BACKEND)
if (!sortFields) // the condition "!sortFields" really means "we are inlining"
+#endif
{
treatAsOverlapping = StructHasCustomLayout(typeFlags);
}
@@ -1736,7 +1741,7 @@ void Compiler::lvaPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo* Stru
}
}
-#if !defined(_TARGET_64BIT_)
+#if !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_)
//------------------------------------------------------------------------
// lvaPromoteLongVars: "Struct promote" all register candidate longs as if they are structs of two ints.
//
@@ -1752,29 +1757,18 @@ void Compiler::lvaPromoteLongVars()
{
return;
}
+
// The lvaTable might grow as we grab temps. Make a local copy here.
unsigned startLvaCount = lvaCount;
for (unsigned lclNum = 0; lclNum < startLvaCount; lclNum++)
{
LclVarDsc* varDsc = &lvaTable[lclNum];
if (!varTypeIsLong(varDsc) || varDsc->lvDoNotEnregister || varDsc->lvIsMultiRegArgOrRet() ||
- (varDsc->lvRefCnt == 0))
+ (varDsc->lvRefCnt == 0) || varDsc->lvIsStructField || (fgNoStructPromotion && varDsc->lvIsParam))
{
continue;
}
- // Will this work ???
- // We can't have nested promoted structs.
- if (varDsc->lvIsStructField)
- {
- if (lvaGetPromotionType(varDsc->lvParentLcl) != PROMOTION_TYPE_INDEPENDENT)
- {
- continue;
- }
- varDsc->lvIsStructField = false;
- varDsc->lvTracked = false;
- }
-
varDsc->lvFieldCnt = 2;
varDsc->lvFieldLclStart = lvaCount;
varDsc->lvPromoted = true;
@@ -1823,7 +1817,7 @@ void Compiler::lvaPromoteLongVars()
}
#endif // DEBUG
}
-#endif // !_TARGET_64BIT_
+#endif // !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_)
/*****************************************************************************
* Given a fldOffset in a promoted struct var, return the index of the local
@@ -1904,6 +1898,10 @@ void Compiler::lvaSetVarDoNotEnregister(unsigned varNum DEBUGARG(DoNotEnregister
JITDUMP("it is a struct\n");
assert(varTypeIsStruct(varDsc));
break;
+ case DNER_IsStructArg:
+ JITDUMP("it is a struct arg\n");
+ assert(varTypeIsStruct(varDsc));
+ break;
case DNER_BlockOp:
JITDUMP("written in a block op\n");
varDsc->lvLclBlockOpAddr = 1;
@@ -2038,7 +2036,7 @@ void Compiler::lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, bool
}
#ifndef _TARGET_64BIT_
- bool fDoubleAlignHint = FALSE;
+ BOOL fDoubleAlignHint = FALSE;
#ifdef _TARGET_X86_
fDoubleAlignHint = TRUE;
#endif
@@ -2697,6 +2695,10 @@ void Compiler::lvaSortByRefCount()
lvaTrackedCount = 0;
lvaTrackedCountInSizeTUnits = 0;
+#ifdef DEBUG
+ VarSetOps::AssignNoCopy(this, lvaTrackedVars, VarSetOps::MakeEmpty(this));
+#endif
+
if (lvaCount == 0)
{
return;
@@ -3386,26 +3388,30 @@ void Compiler::lvaMarkLocalVars()
#endif // !FEATURE_EH_FUNCLETS
-#if FEATURE_EH_FUNCLETS
- if (ehNeedsPSPSym())
+ // PSPSym and LocAllocSPvar are not used by the CoreRT ABI
+ if (!IsTargetAbi(CORINFO_CORERT_ABI))
{
- lvaPSPSym = lvaGrabTempWithImplicitUse(false DEBUGARG("PSPSym"));
- LclVarDsc* lclPSPSym = &lvaTable[lvaPSPSym];
- lclPSPSym->lvType = TYP_I_IMPL;
- }
+#if FEATURE_EH_FUNCLETS
+ if (ehNeedsPSPSym())
+ {
+ lvaPSPSym = lvaGrabTempWithImplicitUse(false DEBUGARG("PSPSym"));
+ LclVarDsc* lclPSPSym = &lvaTable[lvaPSPSym];
+ lclPSPSym->lvType = TYP_I_IMPL;
+ }
#endif // FEATURE_EH_FUNCLETS
- if (compLocallocUsed)
- {
- lvaLocAllocSPvar = lvaGrabTempWithImplicitUse(false DEBUGARG("LocAllocSPvar"));
- LclVarDsc* locAllocSPvar = &lvaTable[lvaLocAllocSPvar];
- locAllocSPvar->lvType = TYP_I_IMPL;
+ // TODO: LocAllocSPvar should be only required by the implicit frame layout expected by the VM on x86.
+ // It should be removed on other platforms once we check there are no other implicit dependencies.
+ if (compLocallocUsed)
+ {
+ lvaLocAllocSPvar = lvaGrabTempWithImplicitUse(false DEBUGARG("LocAllocSPvar"));
+ LclVarDsc* locAllocSPvar = &lvaTable[lvaLocAllocSPvar];
+ locAllocSPvar->lvType = TYP_I_IMPL;
+ }
}
BasicBlock* block;
-#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
-
#ifndef DEBUG
// Assign slot numbers to all variables.
// If compiler generated local variables, slot numbers will be
@@ -3428,8 +3434,6 @@ void Compiler::lvaMarkLocalVars()
}
}
-#endif // defined(DEBUGGING_SUPPORT) || defined(DEBUG)
-
/* Mark all local variable references */
lvaRefCountingStarted = true;
@@ -4062,12 +4066,11 @@ void Compiler::lvaFixVirtualFrameOffsets()
LclVarDsc* varDsc;
#if FEATURE_EH_FUNCLETS && defined(_TARGET_AMD64_)
- if (ehNeedsPSPSym())
+ if (lvaPSPSym != BAD_VAR_NUM)
{
// We need to fix the offset of the PSPSym so there is no padding between it and the outgoing argument space.
// Without this code, lvaAlignFrame might have put the padding lower than the PSPSym, which would be between
// the PSPSym and the outgoing argument space.
- assert(lvaPSPSym != BAD_VAR_NUM);
varDsc = &lvaTable[lvaPSPSym];
assert(varDsc->lvFramePointerBased); // We always access it RBP-relative.
assert(!varDsc->lvMustInit); // It is never "must init".
@@ -4453,7 +4456,9 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
noway_assert(argSize);
if (Target::g_tgtArgOrder == Target::ARG_ORDER_L2R)
+ {
argOffs -= argSize;
+ }
unsigned fieldVarNum = BAD_VAR_NUM;
@@ -4543,7 +4548,9 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
}
if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg)
+ {
argOffs += argSize;
+ }
return argOffs;
}
@@ -4973,13 +4980,12 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
#endif //_TARGET_AMD64_
#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARMARCH_)
- if (ehNeedsPSPSym())
+ if (lvaPSPSym != BAD_VAR_NUM)
{
// On ARM/ARM64, if we need a PSPSym, allocate it first, before anything else, including
// padding (so we can avoid computing the same padding in the funclet
// frame). Note that there is no special padding requirement for the PSPSym.
noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer
- assert(lvaPSPSym != BAD_VAR_NUM); // We should have created the PSPSym variable
stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs);
}
#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARMARCH_)
@@ -5033,7 +5039,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaSecurityObject, TARGET_POINTER_SIZE, stkOffs);
}
- if (compLocallocUsed)
+ if (lvaLocAllocSPvar != BAD_VAR_NUM)
{
#ifdef JIT32_GCENCODER
noway_assert(codeGen->isFramePointerUsed()); // else offsets of locals of frameless methods will be incorrect
@@ -5278,7 +5284,9 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
// a local variable which will need stack frame space.
//
if (!varDsc->lvIsRegArg)
+ {
continue;
+ }
#ifdef _TARGET_ARM64_
if (info.compIsVarArgs)
@@ -5477,13 +5485,12 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
}
#if FEATURE_EH_FUNCLETS && defined(_TARGET_AMD64_)
- if (ehNeedsPSPSym())
+ if (lvaPSPSym != BAD_VAR_NUM)
{
// On AMD64, if we need a PSPSym, allocate it last, immediately above the outgoing argument
// space. Any padding will be higher on the stack than this
// (including the padding added by lvaAlignFrame()).
noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer
- assert(lvaPSPSym != BAD_VAR_NUM); // We should have created the PSPSym variable
stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs);
}
#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_AMD64_)
diff --git a/src/jit/legacyjit/.gitmirror b/src/jit/legacyjit/.gitmirror
new file mode 100644
index 0000000000..f507630f94
--- /dev/null
+++ b/src/jit/legacyjit/.gitmirror
@@ -0,0 +1 @@
+Only contents of this folder, excluding subfolders, will be mirrored by the Git-TFS Mirror. \ No newline at end of file
diff --git a/src/jit/legacyjit/CMakeLists.txt b/src/jit/legacyjit/CMakeLists.txt
new file mode 100644
index 0000000000..73a4600a66
--- /dev/null
+++ b/src/jit/legacyjit/CMakeLists.txt
@@ -0,0 +1,62 @@
+project(legacyjit)
+
+add_definitions(-DLEGACY_BACKEND)
+add_definitions(-DALT_JIT)
+add_definitions(-DFEATURE_NO_HOST)
+add_definitions(-DSELF_NO_HOST)
+add_definitions(-DFEATURE_READYTORUN_COMPILER)
+remove_definitions(-DFEATURE_MERGE_JIT_AND_ENGINE)
+
+# No SIMD in legacy back-end.
+remove_definitions(-DFEATURE_SIMD)
+remove_definitions(-DFEATURE_AVX_SUPPORT)
+
+if(WIN32)
+ add_definitions(-DFX_VER_INTERNALNAME_STR=legacyjit.dll)
+endif(WIN32)
+
+add_library_clr(legacyjit
+ SHARED
+ ${SHARED_LIB_SOURCES}
+)
+
+add_dependencies(legacyjit jit_exports)
+
+set_property(TARGET legacyjit APPEND_STRING PROPERTY LINK_FLAGS ${JIT_EXPORTS_LINKER_OPTION})
+set_property(TARGET legacyjit APPEND_STRING PROPERTY LINK_DEPENDS ${JIT_EXPORTS_FILE})
+
+set(RYUJIT_LINK_LIBRARIES
+ utilcodestaticnohost
+ gcinfo
+)
+
+if(CLR_CMAKE_PLATFORM_UNIX)
+ list(APPEND RYUJIT_LINK_LIBRARIES
+ mscorrc_debug
+ coreclrpal
+ palrt
+ )
+else()
+ list(APPEND RYUJIT_LINK_LIBRARIES
+ ${STATIC_MT_CRT_LIB}
+ ${STATIC_MT_VCRT_LIB}
+ kernel32.lib
+ advapi32.lib
+ ole32.lib
+ oleaut32.lib
+ uuid.lib
+ user32.lib
+ version.lib
+ shlwapi.lib
+ bcrypt.lib
+ crypt32.lib
+ RuntimeObject.lib
+ )
+endif(CLR_CMAKE_PLATFORM_UNIX)
+
+target_link_libraries(legacyjit
+ ${RYUJIT_LINK_LIBRARIES}
+)
+
+# add the install targets
+install_clr(legacyjit)
diff --git a/src/jit/lir.cpp b/src/jit/lir.cpp
index 94206def1c..35dd1815ef 100644
--- a/src/jit/lir.cpp
+++ b/src/jit/lir.cpp
@@ -190,12 +190,13 @@ void LIR::Use::ReplaceWith(Compiler* compiler, GenTree* replacement)
assert(IsDummyUse() || m_range->Contains(m_user));
assert(m_range->Contains(replacement));
- GenTree* replacedNode = *m_edge;
-
- *m_edge = replacement;
- if (!IsDummyUse() && m_user->IsCall())
+ if (!IsDummyUse())
+ {
+ m_user->ReplaceOperand(m_edge, replacement);
+ }
+ else
{
- compiler->fgFixupArgTabEntryPtr(m_user, replacedNode, replacement);
+ *m_edge = replacement;
}
}
@@ -256,7 +257,7 @@ unsigned LIR::Use::ReplaceWithLclVar(Compiler* compiler, unsigned blockWeight, u
assert(m_range->Contains(m_user));
assert(m_range->Contains(*m_edge));
- GenTree* node = *m_edge;
+ GenTree* const node = *m_edge;
if (lclNum == BAD_VAR_NUM)
{
@@ -267,9 +268,11 @@ unsigned LIR::Use::ReplaceWithLclVar(Compiler* compiler, unsigned blockWeight, u
compiler->lvaTable[lclNum].incRefCnts(blockWeight, compiler);
compiler->lvaTable[lclNum].incRefCnts(blockWeight, compiler);
- GenTreeLclVar* store = compiler->gtNewTempAssign(lclNum, node)->AsLclVar();
+ GenTreeLclVar* const store = compiler->gtNewTempAssign(lclNum, node)->AsLclVar();
+ assert(store != nullptr);
+ assert(store->gtOp1 == node);
- GenTree* load =
+ GenTree* const load =
new (compiler, GT_LCL_VAR) GenTreeLclVar(store->TypeGet(), store->AsLclVarCommon()->GetLclNum(), BAD_IL_OFFSET);
m_range->InsertAfter(node, store, load);
@@ -678,7 +681,7 @@ void LIR::Range::FinishInsertBefore(GenTree* insertionPoint, GenTree* first, Gen
assert(m_lastNode != nullptr);
assert(m_lastNode->gtNext == nullptr);
m_lastNode->gtNext = first;
- first->gtPrev = m_lastNode;
+ first->gtPrev = m_lastNode;
}
m_lastNode = last;
}
@@ -866,7 +869,7 @@ void LIR::Range::FinishInsertAfter(GenTree* insertionPoint, GenTree* first, GenT
assert(m_firstNode != nullptr);
assert(m_firstNode->gtPrev == nullptr);
m_firstNode->gtPrev = last;
- last->gtNext = m_firstNode;
+ last->gtNext = m_firstNode;
}
m_firstNode = first;
}
@@ -1157,7 +1160,6 @@ void LIR::Range::Delete(Compiler* compiler, BasicBlock* block, ReadOnlyRange&& r
Delete(compiler, block, range.m_firstNode, range.m_lastNode);
}
-
//------------------------------------------------------------------------
// LIR::Range::TryGetUse: Try to find the use for a given node.
//
@@ -1616,22 +1618,21 @@ void LIR::InsertBeforeTerminator(BasicBlock* block, LIR::Range&& range)
#if DEBUG
switch (block->bbJumpKind)
{
- case BBJ_COND:
- assert(insertionPoint->OperGet() == GT_JTRUE);
- break;
+ case BBJ_COND:
+ assert(insertionPoint->OperIsConditionalJump());
+ break;
- case BBJ_SWITCH:
- assert((insertionPoint->OperGet() == GT_SWITCH) || (insertionPoint->OperGet() == GT_SWITCH_TABLE));
- break;
+ case BBJ_SWITCH:
+ assert((insertionPoint->OperGet() == GT_SWITCH) || (insertionPoint->OperGet() == GT_SWITCH_TABLE));
+ break;
- case BBJ_RETURN:
- assert((insertionPoint->OperGet() == GT_RETURN) ||
- (insertionPoint->OperGet() == GT_JMP) ||
- (insertionPoint->OperGet() == GT_CALL));
- break;
+ case BBJ_RETURN:
+ assert((insertionPoint->OperGet() == GT_RETURN) || (insertionPoint->OperGet() == GT_JMP) ||
+ (insertionPoint->OperGet() == GT_CALL));
+ break;
- default:
- unreached();
+ default:
+ unreached();
}
#endif
}
diff --git a/src/jit/liveness.cpp b/src/jit/liveness.cpp
index 19d326303e..423d72b9b2 100644
--- a/src/jit/liveness.cpp
+++ b/src/jit/liveness.cpp
@@ -76,7 +76,6 @@ void Compiler::fgMarkUseDef(GenTreeLclVarCommon* tree, GenTree* asgdLclVar)
if ((lhsLclNum == lclNum) && ((tree->gtFlags & GTF_VAR_DEF) == 0) && (tree != asgdLclVar))
{
/* bingo - we have an x = f(x) case */
- noway_assert(lvaTable[lhsLclNum].lvType != TYP_STRUCT);
asgdLclVar->gtFlags |= GTF_VAR_USEDEF;
rhsUSEDEF = true;
}
@@ -699,10 +698,6 @@ void Compiler::fgPerBlockLocalVarLiveness()
}
}
-/*****************************************************************************/
-#ifdef DEBUGGING_SUPPORT
-/*****************************************************************************/
-
// Helper functions to mark variables live over their entire scope
void Compiler::fgBeginScopeLife(VARSET_TP* inScope, VarScopeDsc* var)
@@ -1113,7 +1108,7 @@ void Compiler::fgExtendDbgLifetimes()
// Create initialization node
if (!block->IsLIR())
{
- GenTree* varNode = gtNewLclvNode(varNum, type);
+ GenTree* varNode = gtNewLclvNode(varNum, type);
GenTree* initNode = gtNewAssignNode(varNode, zero);
// Create a statement for the initializer, sequence it, and append it to the current BB.
@@ -1124,7 +1119,8 @@ void Compiler::fgExtendDbgLifetimes()
}
else
{
- GenTree* store = new (this, GT_STORE_LCL_VAR) GenTreeLclVar(GT_STORE_LCL_VAR, type, varNum, BAD_IL_OFFSET);
+ GenTree* store =
+ new (this, GT_STORE_LCL_VAR) GenTreeLclVar(GT_STORE_LCL_VAR, type, varNum, BAD_IL_OFFSET);
store->gtOp.gtOp1 = zero;
store->gtFlags |= (GTF_VAR_DEF | GTF_ASG);
@@ -1133,7 +1129,7 @@ void Compiler::fgExtendDbgLifetimes()
#if !defined(_TARGET_64BIT_) && !defined(LEGACY_BACKEND)
DecomposeLongs::DecomposeRange(this, blockWeight, initRange);
-#endif
+#endif // !defined(_TARGET_64BIT_) && !defined(LEGACY_BACKEND)
// Naively inserting the initializer at the end of the block may add code after the block's
// terminator, in which case the inserted code will never be executed (and the IR for the
@@ -1184,10 +1180,6 @@ void Compiler::fgExtendDbgLifetimes()
#endif // DEBUG
}
-/*****************************************************************************/
-#endif // DEBUGGING_SUPPORT
-/*****************************************************************************/
-
VARSET_VALRET_TP Compiler::fgGetHandlerLiveVars(BasicBlock* block)
{
noway_assert(block);
@@ -1905,9 +1897,7 @@ VARSET_VALRET_TP Compiler::fgComputeLife(VARSET_VALARG_TP lifeArg,
VARSET_TP VARSET_INIT(this, life, lifeArg); // lifeArg is const ref; copy to allow modification.
VARSET_TP VARSET_INIT(this, keepAliveVars, volatileVars);
-#ifdef DEBUGGING_SUPPORT
VarSetOps::UnionD(this, keepAliveVars, compCurBB->bbScope); // Don't kill vars in scope
-#endif
noway_assert(VarSetOps::Equal(this, VarSetOps::Intersection(this, keepAliveVars, life), keepAliveVars));
noway_assert(compCurStmt->gtOper == GT_STMT);
@@ -1955,9 +1945,7 @@ VARSET_VALRET_TP Compiler::fgComputeLifeLIR(VARSET_VALARG_TP lifeArg, BasicBlock
VARSET_TP VARSET_INIT(this, life, lifeArg); // lifeArg is const ref; copy to allow modification.
VARSET_TP VARSET_INIT(this, keepAliveVars, volatileVars);
-#ifdef DEBUGGING_SUPPORT
VarSetOps::UnionD(this, keepAliveVars, block->bbScope); // Don't kill vars in scope
-#endif
noway_assert(VarSetOps::Equal(this, VarSetOps::Intersection(this, keepAliveVars, life), keepAliveVars));
@@ -1980,9 +1968,9 @@ VARSET_VALRET_TP Compiler::fgComputeLifeLIR(VARSET_VALARG_TP lifeArg, BasicBlock
else if (node->OperIsNonPhiLocal() || node->OperIsLocalAddr())
{
bool isDeadStore = fgComputeLifeLocal(life, keepAliveVars, node, node);
- if (isDeadStore)
+ if (isDeadStore && fgTryRemoveDeadLIRStore(blockRange, node, &next))
{
- fgTryRemoveDeadLIRStore(blockRange, node, &next);
+ fgStmtRemoved = true;
}
}
}
@@ -2018,9 +2006,8 @@ VARSET_VALRET_TP Compiler::fgComputeLife(VARSET_VALARG_TP lifeArg,
GenTreePtr gtColon = NULL;
VARSET_TP VARSET_INIT(this, keepAliveVars, volatileVars);
-#ifdef DEBUGGING_SUPPORT
VarSetOps::UnionD(this, keepAliveVars, compCurBB->bbScope); /* Dont kill vars in scope */
-#endif
+
noway_assert(VarSetOps::Equal(this, VarSetOps::Intersection(this, keepAliveVars, life), keepAliveVars));
noway_assert(compCurStmt->gtOper == GT_STMT);
noway_assert(endNode || (startNode == compCurStmt->gtStmt.gtStmtExpr));
@@ -2548,10 +2535,10 @@ bool Compiler::fgRemoveDeadStore(
switch (asgNode->gtOper)
{
case GT_ASG_ADD:
- asgNode->gtOper = GT_ADD;
+ asgNode->SetOperRaw(GT_ADD);
break;
case GT_ASG_SUB:
- asgNode->gtOper = GT_SUB;
+ asgNode->SetOperRaw(GT_SUB);
break;
default:
// Only add and sub allowed, we don't have ASG_MUL and ASG_DIV for ints, and
@@ -2854,10 +2841,6 @@ void Compiler::fgInterBlockLocalVarLiveness()
fgLiveVarAnalysis();
-//-------------------------------------------------------------------------
-
-#ifdef DEBUGGING_SUPPORT
-
/* For debuggable code, we mark vars as live over their entire
* reported scope, so that it will be visible over the entire scope
*/
@@ -2867,8 +2850,6 @@ void Compiler::fgInterBlockLocalVarLiveness()
fgExtendDbgLifetimes();
}
-#endif // DEBUGGING_SUPPORT
-
/*-------------------------------------------------------------------------
* Variables involved in exception-handlers and finally blocks need
* to be specially marked
diff --git a/src/jit/loopcloning.cpp b/src/jit/loopcloning.cpp
index 8ce015e607..a1ba14292a 100644
--- a/src/jit/loopcloning.cpp
+++ b/src/jit/loopcloning.cpp
@@ -698,7 +698,7 @@ void LoopCloneContext::CondToStmtInBlock(Compiler* comp,
comp->fgInsertStmtAtEnd(block, stmt);
// Remorph.
- comp->fgMorphBlockStmt(block, stmt DEBUGARG("Loop cloning condition"));
+ comp->fgMorphBlockStmt(block, stmt->AsStmt() DEBUGARG("Loop cloning condition"));
}
//--------------------------------------------------------------------------------------------------
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index 09eb9146ac..a6e50b304c 100644
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -135,6 +135,15 @@ GenTree* Lowering::LowerNode(GenTree* node)
LowerCall(node);
break;
+ case GT_LT:
+ case GT_LE:
+ case GT_GT:
+ case GT_GE:
+ case GT_EQ:
+ case GT_NE:
+ LowerCompare(node);
+ break;
+
case GT_JMP:
LowerJmpMethod(node);
break;
@@ -169,13 +178,33 @@ GenTree* Lowering::LowerNode(GenTree* node)
// produces a TYP_SIMD16 result
node->gtType = TYP_SIMD16;
}
+
+#ifdef _TARGET_XARCH_
+ if ((node->AsSIMD()->gtSIMDIntrinsicID == SIMDIntrinsicGetItem) && (node->gtGetOp1()->OperGet() == GT_IND))
+ {
+ // If SIMD vector is already in memory, we force its
+ // addr to be evaluated into a reg. This would allow
+ // us to generate [regBase] or [regBase+offset] or
+ // [regBase+sizeOf(SIMD vector baseType)*regIndex]
+ // to access the required SIMD vector element directly
+ // from memory.
+ //
+ // TODO-CQ-XARCH: If addr of GT_IND is GT_LEA, we
+ // might be able update GT_LEA to fold the regIndex
+ // or offset in some cases. Instead with this
+ // approach we always evaluate GT_LEA into a reg.
+ // Ideally, we should be able to lower GetItem intrinsic
+ // into GT_IND(newAddr) where newAddr combines
+ // the addr of SIMD vector with the given index.
+ node->gtOp.gtOp1->gtFlags |= GTF_IND_REQ_ADDR_IN_REG;
+ }
+#endif
break;
case GT_LCL_VAR:
case GT_STORE_LCL_VAR:
if (node->TypeGet() == TYP_SIMD12)
{
-#ifdef _TARGET_64BIT_
// Assumption 1:
// RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off
// to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for
@@ -198,10 +227,29 @@ GenTree* Lowering::LowerNode(GenTree* node)
// Vector3 return values are returned two return registers and Caller assembles them into a
// single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3
// type args in prolog and Vector3 type return value of a call
+ //
+ // RyuJIT x86 Windows: all non-param Vector3 local vars are allocated as 16 bytes. Vector3 arguments
+ // are pushed as 12 bytes. For return values, a 16-byte local is allocated and the address passed
+ // as a return buffer pointer. The callee doesn't write the high 4 bytes, and we don't need to clear
+ // it either.
+
+ unsigned varNum = node->AsLclVarCommon()->GetLclNum();
+ LclVarDsc* varDsc = &comp->lvaTable[varNum];
+
+#if defined(_TARGET_64BIT_)
+ assert(varDsc->lvSize() == 16);
node->gtType = TYP_SIMD16;
-#else
- NYI("Lowering of TYP_SIMD12 locals");
-#endif // _TARGET_64BIT_
+#else // !_TARGET_64BIT_
+ if (varDsc->lvSize() == 16)
+ {
+ node->gtType = TYP_SIMD16;
+ }
+ else
+ {
+ // The following assert is guaranteed by lvSize().
+ assert(varDsc->lvIsParam);
+ }
+#endif // !_TARGET_64BIT_
}
#endif // FEATURE_SIMD
__fallthrough;
@@ -215,7 +263,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
#if FEATURE_MULTIREG_RET
GenTree* src = node->gtGetOp1();
assert((src->OperGet() == GT_CALL) && src->AsCall()->HasMultiRegRetVal());
-#else // !FEATURE_MULTIREG_RET
+#else // !FEATURE_MULTIREG_RET
assert(!"Unexpected struct local store in Lowering");
#endif // !FEATURE_MULTIREG_RET
}
@@ -680,7 +728,7 @@ void Lowering::ReplaceArgWithPutArgOrCopy(GenTree** argSlot, GenTree* putArgOrCo
// Arguments:
// call - the call whose arg is being rewritten.
// arg - the arg being rewritten.
-// info - the ArgTabEntry information for the argument.
+// info - the fgArgTabEntry information for the argument.
// type - the type of the argument.
//
// Return Value:
@@ -692,11 +740,11 @@ void Lowering::ReplaceArgWithPutArgOrCopy(GenTree** argSlot, GenTree* putArgOrCo
//
// Notes:
// For System V systems with native struct passing (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING defined)
-// this method allocates a single GT_PUTARG_REG for 1 eightbyte structs and a GT_LIST of two GT_PUTARG_REGs
+// this method allocates a single GT_PUTARG_REG for 1 eightbyte structs and a GT_FIELD_LIST of two GT_PUTARG_REGs
// for two eightbyte structs.
//
// For STK passed structs the method generates GT_PUTARG_STK tree. For System V systems with native struct passing
-// (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING defined) this method also sets the GP pointers count and the pointers
+// (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING defined) this method also sets the GC pointers count and the pointers
// layout object, so the codegen of the GT_PUTARG_STK could use this for optimizing copying to the stack by value.
// (using block copy primitives for non GC pointers and a single TARGET_POINTER_SIZE copy with recording GC info.)
//
@@ -753,8 +801,8 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
// In this case a new tree is created that is GT_PUTARG_REG
// with a op1 the original argument.
// 2. The struct is contained in 2 eightbytes:
- // in this case the arg comes as a GT_LIST of two GT_LCL_FLDs - the two eightbytes of the struct.
- // The code creates a GT_PUTARG_REG node for each GT_LCL_FLD in the GT_LIST
+ // in this case the arg comes as a GT_FIELD_LIST of two GT_LCL_FLDs - the two eightbytes of the struct.
+ // The code creates a GT_PUTARG_REG node for each GT_LCL_FLD in the GT_FIELD_LIST
// and splices it in the list with the corresponding original GT_LCL_FLD tree as op1.
assert(info->structDesc.eightByteCount != 0);
@@ -826,25 +874,25 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
//
// clang-format on
- assert(arg->OperGet() == GT_LIST);
+ assert(arg->OperGet() == GT_FIELD_LIST);
- GenTreeArgList* argListPtr = arg->AsArgList();
- assert(argListPtr->IsAggregate());
+ GenTreeFieldList* fieldListPtr = arg->AsFieldList();
+ assert(fieldListPtr->IsFieldListHead());
- for (unsigned ctr = 0; argListPtr != nullptr; argListPtr = argListPtr->Rest(), ctr++)
+ for (unsigned ctr = 0; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), ctr++)
{
// Create a new GT_PUTARG_REG node with op1 the original GT_LCL_FLD.
GenTreePtr newOper = comp->gtNewOperNode(
GT_PUTARG_REG,
comp->GetTypeFromClassificationAndSizes(info->structDesc.eightByteClassifications[ctr],
info->structDesc.eightByteSizes[ctr]),
- argListPtr->gtOp.gtOp1);
+ fieldListPtr->gtOp.gtOp1);
- // Splice in the new GT_PUTARG_REG node in the GT_LIST
- ReplaceArgWithPutArgOrCopy(&argListPtr->gtOp.gtOp1, newOper);
+ // Splice in the new GT_PUTARG_REG node in the GT_FIELD_LIST
+ ReplaceArgWithPutArgOrCopy(&fieldListPtr->gtOp.gtOp1, newOper);
}
- // Just return arg. The GT_LIST is not replaced.
+ // Just return arg. The GT_FIELD_LIST is not replaced.
// Nothing more to do.
return arg;
}
@@ -857,26 +905,26 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
else
#else // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
#if FEATURE_MULTIREG_ARGS
- if ((info->numRegs > 1) && (arg->OperGet() == GT_LIST))
+ if ((info->numRegs > 1) && (arg->OperGet() == GT_FIELD_LIST))
{
- assert(arg->OperGet() == GT_LIST);
+ assert(arg->OperGet() == GT_FIELD_LIST);
- GenTreeArgList* argListPtr = arg->AsArgList();
- assert(argListPtr->IsAggregate());
+ GenTreeFieldList* fieldListPtr = arg->AsFieldList();
+ assert(fieldListPtr->IsFieldListHead());
- for (unsigned ctr = 0; argListPtr != nullptr; argListPtr = argListPtr->Rest(), ctr++)
+ for (unsigned ctr = 0; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), ctr++)
{
- GenTreePtr curOp = argListPtr->gtOp.gtOp1;
+ GenTreePtr curOp = fieldListPtr->gtOp.gtOp1;
var_types curTyp = curOp->TypeGet();
// Create a new GT_PUTARG_REG node with op1
GenTreePtr newOper = comp->gtNewOperNode(GT_PUTARG_REG, curTyp, curOp);
- // Splice in the new GT_PUTARG_REG node in the GT_LIST
- ReplaceArgWithPutArgOrCopy(&argListPtr->gtOp.gtOp1, newOper);
+ // Splice in the new GT_PUTARG_REG node in the GT_FIELD_LIST
+ ReplaceArgWithPutArgOrCopy(&fieldListPtr->gtOp.gtOp1, newOper);
}
- // Just return arg. The GT_LIST is not replaced.
+ // Just return arg. The GT_FIELD_LIST is not replaced.
// Nothing more to do.
return arg;
}
@@ -893,23 +941,20 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
// This provides the info to put this argument in in-coming arg area slot
// instead of in out-going arg area slot.
- FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(assert(info->isStruct == varTypeIsStruct(type))); // Make sure state is
- // correct
+ PUT_STRUCT_ARG_STK_ONLY(assert(info->isStruct == varTypeIsStruct(type))); // Make sure state is
+ // correct
#if FEATURE_FASTTAILCALL
putArg = new (comp, GT_PUTARG_STK)
- GenTreePutArgStk(GT_PUTARG_STK, type, arg,
- info->slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(info->numSlots)
- FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(info->isStruct),
+ GenTreePutArgStk(GT_PUTARG_STK, type, arg, info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots),
call->IsFastTailCall() DEBUGARG(call));
#else
putArg = new (comp, GT_PUTARG_STK)
GenTreePutArgStk(GT_PUTARG_STK, type, arg,
- info->slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(info->numSlots)
- FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(info->isStruct) DEBUGARG(call));
+ info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots) DEBUGARG(call));
#endif
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
// If the ArgTabEntry indicates that this arg is a struct
// get and store the number of slots that are references.
// This is later used in the codegen for PUT_ARG_STK implementation
@@ -919,8 +964,6 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
// pair copying using XMM registers or rep mov instructions.
if (info->isStruct)
{
- unsigned numRefs = 0;
- BYTE* gcLayout = new (comp, CMK_Codegen) BYTE[info->numSlots];
// We use GT_OBJ for non-SIMD struct arguments. However, for
// SIMD arguments the GT_OBJ has already been transformed.
if (arg->gtOper != GT_OBJ)
@@ -929,13 +972,14 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
}
else
{
+ unsigned numRefs = 0;
+ BYTE* gcLayout = new (comp, CMK_Codegen) BYTE[info->numSlots];
assert(!varTypeIsSIMD(arg));
numRefs = comp->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
+ putArg->AsPutArgStk()->setGcPointers(numRefs, gcLayout);
}
-
- putArg->AsPutArgStk()->setGcPointers(numRefs, gcLayout);
}
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#endif // FEATURE_PUT_STRUCT_ARG_STK
}
if (arg->InReg())
@@ -1011,6 +1055,22 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg)
type = TYP_INT;
}
+#if defined(FEATURE_SIMD) && defined(_TARGET_X86_)
+ // Non-param TYP_SIMD12 local var nodes are massaged in Lower to TYP_SIMD16 to match their
+ // allocated size (see lvSize()). However, when passing the variables as arguments, and
+ // storing the variables to the outgoing argument area on the stack, we must use their
+ // actual TYP_SIMD12 type, so exactly 12 bytes is allocated and written.
+ if (type == TYP_SIMD16)
+ {
+ if ((arg->OperGet() == GT_LCL_VAR) || (arg->OperGet() == GT_STORE_LCL_VAR))
+ {
+ unsigned varNum = arg->AsLclVarCommon()->GetLclNum();
+ LclVarDsc* varDsc = &comp->lvaTable[varNum];
+ type = varDsc->lvType;
+ }
+ }
+#endif // defined(FEATURE_SIMD) && defined(_TARGET_X86_)
+
GenTreePtr putArg;
// If we hit this we are probably double-lowering.
@@ -1068,7 +1128,7 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg)
putArg = NewPutArg(call, arg, info, type);
// In the case of register passable struct (in one or two registers)
- // the NewPutArg returns a new node (GT_PUTARG_REG or a GT_LIST with two GT_PUTARG_REGs.)
+ // the NewPutArg returns a new node (GT_PUTARG_REG or a GT_FIELD_LIST with two GT_PUTARG_REGs.)
// If an extra node is returned, splice it in the right place in the tree.
if (arg != putArg)
{
@@ -1367,6 +1427,7 @@ void Lowering::CheckVSQuirkStackPaddingNeeded(GenTreeCall* call)
// Inserts profiler hook, GT_PROF_HOOK for a tail call node.
//
+// AMD64:
// We need to insert this after all nested calls, but before all the arguments to this call have been set up.
// To do this, we look for the first GT_PUTARG_STK or GT_PUTARG_REG, and insert the hook immediately before
// that. If there are no args, then it should be inserted before the call node.
@@ -1391,16 +1452,30 @@ void Lowering::CheckVSQuirkStackPaddingNeeded(GenTreeCall* call)
// In this case, the GT_PUTARG_REG src is a nested call. We need to put the instructions after that call
// (as shown). We assume that of all the GT_PUTARG_*, only the first one can have a nested call.
//
+// X86:
+// Insert the profiler hook immediately before the call. The profiler hook will preserve
+// all argument registers (ECX, EDX), but nothing else.
+//
// Params:
// callNode - tail call node
-// insertionPoint - if caller has an insertion point; If null
-// profiler hook is inserted before args are setup
+// insertionPoint - if non-null, insert the profiler hook before this point.
+// If null, insert the profiler hook before args are setup
// but after all arg side effects are computed.
+//
void Lowering::InsertProfTailCallHook(GenTreeCall* call, GenTree* insertionPoint)
{
assert(call->IsTailCall());
assert(comp->compIsProfilerHookNeeded());
+#if defined(_TARGET_X86_)
+
+ if (insertionPoint == nullptr)
+ {
+ insertionPoint = call;
+ }
+
+#else // !defined(_TARGET_X86_)
+
if (insertionPoint == nullptr)
{
GenTreePtr tmp = nullptr;
@@ -1437,6 +1512,8 @@ void Lowering::InsertProfTailCallHook(GenTreeCall* call, GenTree* insertionPoint
}
}
+#endif // !defined(_TARGET_X86_)
+
assert(insertionPoint != nullptr);
GenTreePtr profHookNode = new (comp, GT_PROF_HOOK) GenTree(GT_PROF_HOOK, TYP_VOID);
BlockRange().InsertBefore(insertionPoint, profHookNode);
@@ -1705,7 +1782,10 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget
assert(!comp->opts.compNeedSecurityCheck); // tail call from methods that need security check
assert(!call->IsUnmanaged()); // tail calls to unamanaged methods
assert(!comp->compLocallocUsed); // tail call from methods that also do localloc
- assert(!comp->getNeedsGSSecurityCookie()); // jit64 compat: tail calls from methods that need GS check
+
+#ifdef _TARGET_AMD64_
+ assert(!comp->getNeedsGSSecurityCookie()); // jit64 compat: tail calls from methods that need GS check
+#endif // _TARGET_AMD64_
// We expect to see a call that meets the following conditions
assert(call->IsTailCallViaHelper());
@@ -1713,8 +1793,9 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget
// The TailCall helper call never returns to the caller and is not GC interruptible.
// Therefore the block containing the tail call should be a GC safe point to avoid
- // GC starvation.
- assert(comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT);
+ // GC starvation. It is legal for the block to be unmarked iff the entry block is a
+ // GC safe point, as the entry block trivially dominates every reachable block.
+ assert((comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT) || (comp->fgFirstBB->bbFlags & BBF_GC_SAFE_POINT));
// If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that
// a method returns. This is a case of caller method has both PInvokes and tail calls.
@@ -1839,16 +1920,268 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget
// Now add back tail call flags for identifying this node as tail call dispatched via helper.
call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
+#ifdef PROFILING_SUPPORTED
// Insert profiler tail call hook if needed.
// Since we don't know the insertion point, pass null for second param.
if (comp->compIsProfilerHookNeeded())
{
InsertProfTailCallHook(call, nullptr);
}
+#endif // PROFILING_SUPPORTED
+
+ assert(call->IsTailCallViaHelper());
return result;
}
+//------------------------------------------------------------------------
+// Lowering::LowerCompare: lowers a compare node.
+//
+// For 64-bit targets, this doesn't do much of anything: all comparisons
+// that we support can be handled in code generation on such targets.
+//
+// For 32-bit targets, however, any comparison that feeds a `GT_JTRUE`
+// node must be lowered such that the liveness of the operands to the
+// comparison is properly visible to the rest of the backend. As such,
+// a 64-bit comparison is lowered from something like this:
+//
+// ------------ BB02 [004..014) -> BB02 (cond), preds={BB02,BB01} succs={BB03,BB02}
+// N001 ( 1, 1) [000006] ------------ t6 = lclVar int V02 loc0 u:5 $148
+//
+// /--* t6 int
+// N002 ( 2, 3) [000007] ---------U-- t7 = * cast long <- ulong <- uint $3c0
+//
+// N003 ( 3, 10) [000009] ------------ t9 = lconst long 0x0000000000000003 $101
+//
+// /--* t7 long
+// +--* t9 long
+// N004 ( 9, 17) [000010] N------N-U-- t10 = * < int $149
+//
+// /--* t10 int
+// N005 ( 11, 19) [000011] ------------ * jmpTrue void
+//
+// To something like this:
+//
+// ------------ BB02 [004..014) -> BB03 (cond), preds={BB06,BB07,BB01} succs={BB06,BB03}
+// [000099] ------------ t99 = const int 0
+//
+// [000101] ------------ t101 = const int 0
+//
+// /--* t99 int
+// +--* t101 int
+// N004 ( 9, 17) [000010] N------N-U-- t10 = * > int $149
+//
+// /--* t10 int
+// N005 ( 11, 19) [000011] ------------ * jmpTrue void
+//
+//
+// ------------ BB06 [???..???) -> BB02 (cond), preds={BB02} succs={BB07,BB02}
+// [000105] -------N-U-- jcc void cond=<
+//
+//
+// ------------ BB07 [???..???) -> BB02 (cond), preds={BB06} succs={BB03,BB02}
+// N001 ( 1, 1) [000006] ------------ t6 = lclVar int V02 loc0 u:5 $148
+//
+// N003 ( 3, 10) [000009] ------------ t9 = const int 3
+//
+// /--* t6 int
+// +--* t9 int
+// [000106] N------N-U-- t106 = * < int
+//
+// /--* t106 int
+// [000107] ------------ * jmpTrue void
+//
+// Which will eventually generate code similar to the following:
+//
+// 33DB xor ebx, ebx
+// 85DB test ebx, ebx
+// 7707 ja SHORT G_M50523_IG04
+// 72E7 jb SHORT G_M50523_IG03
+// 83F803 cmp eax, 3
+// 72E2 jb SHORT G_M50523_IG03
+//
+void Lowering::LowerCompare(GenTree* cmp)
+{
+#ifndef _TARGET_64BIT_
+ if (cmp->gtGetOp1()->TypeGet() != TYP_LONG)
+ {
+ return;
+ }
+
+ LIR::Use cmpUse;
+
+ if (!BlockRange().TryGetUse(cmp, &cmpUse) || cmpUse.User()->OperGet() != GT_JTRUE)
+ {
+ return;
+ }
+
+ GenTree* src1 = cmp->gtGetOp1();
+ GenTree* src2 = cmp->gtGetOp2();
+ unsigned weight = m_block->getBBWeight(comp);
+
+ LIR::Use loSrc1(BlockRange(), &(src1->gtOp.gtOp1), src1);
+ LIR::Use loSrc2(BlockRange(), &(src2->gtOp.gtOp1), src2);
+
+ if (loSrc1.Def()->OperGet() != GT_CNS_INT && loSrc1.Def()->OperGet() != GT_LCL_VAR)
+ {
+ loSrc1.ReplaceWithLclVar(comp, weight);
+ }
+
+ if (loSrc2.Def()->OperGet() != GT_CNS_INT && loSrc2.Def()->OperGet() != GT_LCL_VAR)
+ {
+ loSrc2.ReplaceWithLclVar(comp, weight);
+ }
+
+ BasicBlock* jumpDest = m_block->bbJumpDest;
+ BasicBlock* nextDest = m_block->bbNext;
+ BasicBlock* newBlock = comp->fgSplitBlockAtEnd(m_block);
+
+ cmp->gtType = TYP_INT;
+ cmp->gtOp.gtOp1 = src1->gtOp.gtOp2;
+ cmp->gtOp.gtOp2 = src2->gtOp.gtOp2;
+
+ if (cmp->OperGet() == GT_EQ || cmp->OperGet() == GT_NE)
+ {
+ // 64-bit equality comparisons (no matter the polarity) require two 32-bit comparisons: one for the upper 32
+ // bits and one for the lower 32 bits. As such, we update the flow graph like so:
+ //
+ // Before:
+ // BB0: cond
+ // / \
+ // false true
+ // | |
+ // BB1 BB2
+ //
+ // After:
+ // BB0: cond(hi)
+ // / \
+ // false true
+ // | |
+ // | BB3: cond(lo)
+ // | / \
+ // | false true
+ // \ / |
+ // BB1 BB2
+ //
+
+ BlockRange().Remove(loSrc1.Def());
+ BlockRange().Remove(loSrc2.Def());
+ GenTree* loCmp = comp->gtNewOperNode(cmp->OperGet(), TYP_INT, loSrc1.Def(), loSrc2.Def());
+ loCmp->gtFlags = cmp->gtFlags;
+ GenTree* loJtrue = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, loCmp);
+ LIR::AsRange(newBlock).InsertAfter(nullptr, loSrc1.Def(), loSrc2.Def(), loCmp, loJtrue);
+
+ m_block->bbJumpKind = BBJ_COND;
+
+ if (cmp->OperGet() == GT_EQ)
+ {
+ cmp->gtOper = GT_NE;
+ m_block->bbJumpDest = nextDest;
+ nextDest->bbFlags |= BBF_JMP_TARGET;
+ comp->fgAddRefPred(nextDest, m_block);
+ }
+ else
+ {
+ m_block->bbJumpDest = jumpDest;
+ comp->fgAddRefPred(jumpDest, m_block);
+ }
+
+ assert(newBlock->bbJumpKind == BBJ_COND);
+ assert(newBlock->bbJumpDest == jumpDest);
+ }
+ else
+ {
+ // 64-bit ordinal comparisons are more complicated: they require two comparisons for the upper 32 bits and one
+ // comparison for the lower 32 bits. We update the flowgraph as such:
+ //
+ // Before:
+ // BB0: cond
+ // / \
+ // false true
+ // | |
+ // BB1 BB2
+ //
+ // After:
+ // BB0: (!cond(hi) && !eq(hi))
+ // / \
+ // true false
+ // | |
+ // | BB3: (cond(hi) && !eq(hi))
+ // | / \
+ // | false true
+ // | | |
+ // | BB4: cond(lo) |
+ // | / \ |
+ // | false true |
+ // \ / \ /
+ // BB1 BB2
+ //
+ //
+ // Note that the actual comparisons used to implement "(!cond(hi) && !eq(hi))" and "(cond(hi) && !eq(hi))"
+ // differ based on the original condition, and all consist of a single node. The switch statement below
+ // performs the necessary mapping.
+ //
+
+ genTreeOps hiCmpOper;
+ genTreeOps loCmpOper;
+
+ switch (cmp->OperGet())
+ {
+ case GT_LT:
+ cmp->gtOper = GT_GT;
+ hiCmpOper = GT_LT;
+ loCmpOper = GT_LT;
+ break;
+ case GT_LE:
+ cmp->gtOper = GT_GT;
+ hiCmpOper = GT_LT;
+ loCmpOper = GT_LE;
+ break;
+ case GT_GT:
+ cmp->gtOper = GT_LT;
+ hiCmpOper = GT_GT;
+ loCmpOper = GT_GT;
+ break;
+ case GT_GE:
+ cmp->gtOper = GT_LT;
+ hiCmpOper = GT_GT;
+ loCmpOper = GT_GE;
+ break;
+ default:
+ unreached();
+ }
+
+ BasicBlock* newBlock2 = comp->fgSplitBlockAtEnd(newBlock);
+
+ GenTree* hiJcc = new (comp, GT_JCC) GenTreeJumpCC(hiCmpOper);
+ hiJcc->gtFlags = cmp->gtFlags;
+ LIR::AsRange(newBlock).InsertAfter(nullptr, hiJcc);
+
+ BlockRange().Remove(loSrc1.Def());
+ BlockRange().Remove(loSrc2.Def());
+ GenTree* loCmp = comp->gtNewOperNode(loCmpOper, TYP_INT, loSrc1.Def(), loSrc2.Def());
+ loCmp->gtFlags = cmp->gtFlags | GTF_UNSIGNED;
+ GenTree* loJtrue = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, loCmp);
+ LIR::AsRange(newBlock2).InsertAfter(nullptr, loSrc1.Def(), loSrc2.Def(), loCmp, loJtrue);
+
+ m_block->bbJumpKind = BBJ_COND;
+ m_block->bbJumpDest = nextDest;
+ nextDest->bbFlags |= BBF_JMP_TARGET;
+ comp->fgAddRefPred(nextDest, m_block);
+
+ newBlock->bbJumpKind = BBJ_COND;
+ newBlock->bbJumpDest = jumpDest;
+ comp->fgAddRefPred(jumpDest, newBlock);
+
+ assert(newBlock2->bbJumpKind == BBJ_COND);
+ assert(newBlock2->bbJumpDest == jumpDest);
+ }
+
+ BlockRange().Remove(src1);
+ BlockRange().Remove(src2);
+#endif
+}
+
// Lower "jmp <method>" tail call to insert PInvoke method epilog if required.
void Lowering::LowerJmpMethod(GenTree* jmp)
{
@@ -2334,8 +2667,12 @@ void Lowering::InsertPInvokeMethodProlog()
DISPTREERANGE(firstBlockRange, storeFP);
// --------------------------------------------------------
+ // On 32-bit targets, CORINFO_HELP_INIT_PINVOKE_FRAME initializes the PInvoke frame and then pushes it onto
+ // the current thread's Frame stack. On 64-bit targets, it only initializes the PInvoke frame.
+ CLANG_FORMAT_COMMENT_ANCHOR;
- if (comp->opts.eeFlags & CORJIT_FLG_IL_STUB)
+#ifdef _TARGET_64BIT_
+ if (comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB))
{
// Push a frame - if we are NOT in an IL stub, this is done right before the call
// The init routine sets InlinedCallFrame's m_pNext, so we just set the thead's top-of-stack
@@ -2343,6 +2680,7 @@ void Lowering::InsertPInvokeMethodProlog()
firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd));
DISPTREERANGE(firstBlockRange, frameUpd);
}
+#endif // _TARGET_64BIT_
}
//------------------------------------------------------------------------
@@ -2405,9 +2743,14 @@ void Lowering::InsertPInvokeMethodEpilog(BasicBlock* returnBB DEBUGARG(GenTreePt
GenTree* storeGCState = SetGCState(1);
returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeGCState));
- if (comp->opts.eeFlags & CORJIT_FLG_IL_STUB)
+ // Pop the frame if necessary. This always happens in the epilog on 32-bit targets. For 64-bit targets, we only do
+ // this in the epilog for IL stubs; for non-IL stubs the frame is popped after every PInvoke call.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_64BIT_
+ if (comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB))
+#endif // _TARGET_64BIT_
{
- // Pop the frame, in non-stubs we do this around each PInvoke call
GenTree* frameUpd = CreateFrameLinkUpdate(PopFrame);
returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd));
}
@@ -2454,6 +2797,7 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call)
comp->fgMorphTree(helperCall);
BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, helperCall));
+ LowerNode(helperCall); // helper call is inserted before current node and should be lowered here.
return;
}
#endif
@@ -2464,7 +2808,7 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call)
// InlinedCallFrame.m_pCallSiteSP = SP // x86 only
// InlinedCallFrame.m_pCallerReturnAddress = return address
// Thread.gcState = 0
- // (non-stub) - update top Frame on TCB
+ // (non-stub) - update top Frame on TCB // 64-bit targets only
// ----------------------------------------------------------------------------------
// Setup InlinedCallFrame.callSiteTarget (which is how the JIT refers to it).
@@ -2474,11 +2818,19 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call)
if (callType == CT_INDIRECT)
{
+#if !defined(_TARGET_64BIT_)
+ // On 32-bit targets, indirect calls need the size of the stack args in InlinedCallFrame.m_Datum.
+ const unsigned numStkArgBytes = call->fgArgInfo->GetNextSlotNum() * TARGET_POINTER_SIZE;
+
+ src = comp->gtNewIconNode(numStkArgBytes, TYP_INT);
+#else
+ // On 64-bit targets, indirect calls may need the stub parameter value in InlinedCallFrame.m_Datum.
+ // If the stub parameter value is not needed, m_Datum will be initialized by the VM.
if (comp->info.compPublishStubParam)
{
- src = new (comp, GT_LCL_VAR) GenTreeLclVar(TYP_I_IMPL, comp->lvaStubArgumentVar, BAD_IL_OFFSET);
+ src = comp->gtNewLclvNode(comp->lvaStubArgumentVar, TYP_I_IMPL);
}
- // else { If we don't have secret parameter, m_Datum will be initialized by VM code }
+#endif // !defined(_TARGET_64BIT_)
}
else
{
@@ -2542,7 +2894,12 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call)
BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, storeLab));
- if (!(comp->opts.eeFlags & CORJIT_FLG_IL_STUB))
+ // Push the PInvoke frame if necessary. On 32-bit targets this only happens in the method prolog if a method
+ // contains PInvokes; on 64-bit targets this is necessary in non-stubs.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_64BIT_
+ if (!comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB))
{
// Set the TCB's frame to be the one we just created.
// Note the init routine for the InlinedCallFrame (CORINFO_HELP_INIT_PINVOKE_FRAME)
@@ -2552,6 +2909,7 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call)
GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame);
BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, frameUpd));
}
+#endif // _TARGET_64BIT_
// IMPORTANT **** This instruction must come last!!! ****
// It changes the thread's state to Preemptive mode
@@ -2583,7 +2941,7 @@ void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call)
// First argument is the address of the frame variable.
GenTree* frameAddr =
new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, BAD_IL_OFFSET);
- frameAddr->gtOper = GT_LCL_VAR_ADDR;
+ frameAddr->SetOperRaw(GT_LCL_VAR_ADDR);
// Insert call to CORINFO_HELP_JIT_PINVOKE_END
GenTree* helperCall =
@@ -2604,12 +2962,32 @@ void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call)
tree = CreateReturnTrapSeq();
BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
- // Pop the frame if necessasry
- if (!(comp->opts.eeFlags & CORJIT_FLG_IL_STUB))
+ // Pop the frame if necessary. On 32-bit targets this only happens in the method epilog; on 64-bit targets thi
+ // happens after every PInvoke call in non-stubs. 32-bit targets instead mark the frame as inactive.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_64BIT_
+ if (!comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB))
{
tree = CreateFrameLinkUpdate(PopFrame);
BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
}
+#else
+ const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = comp->eeGetEEInfo()->inlinedCallFrameInfo;
+
+ // ----------------------------------------------------------------------------------
+ // InlinedCallFrame.m_pCallerReturnAddress = nullptr
+
+ GenTreeLclFld* const storeCallSiteTracker =
+ new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar,
+ callFrameInfo.offsetOfReturnAddress);
+
+ GenTreeIntCon* const constantZero = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, 0);
+
+ storeCallSiteTracker->gtOp1 = constantZero;
+
+ BlockRange().InsertBefore(insertionPoint, constantZero, storeCallSiteTracker);
+#endif // _TARGET_64BIT_
}
//------------------------------------------------------------------------
@@ -2624,7 +3002,7 @@ void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call)
GenTree* Lowering::LowerNonvirtPinvokeCall(GenTreeCall* call)
{
// PInvoke lowering varies depending on the flags passed in by the EE. By default,
- // GC transitions are generated inline; if CORJIT_FLG2_USE_PINVOKE_HELPERS is specified,
+ // GC transitions are generated inline; if CORJIT_FLAG_USE_PINVOKE_HELPERS is specified,
// GC transitions are instead performed using helper calls. Examples of each case are given
// below. Note that the data structure that is used to store information about a call frame
// containing any P/Invoke calls is initialized in the method prolog (see
@@ -2697,7 +3075,7 @@ GenTree* Lowering::LowerNonvirtPinvokeCall(GenTreeCall* call)
#if COR_JIT_EE_VERSION > 460
comp->info.compCompHnd->getAddressOfPInvokeTarget(methHnd, &lookup);
#else
- void* pIndirection;
+ void* pIndirection;
lookup.accessType = IAT_PVALUE;
lookup.addr = comp->info.compCompHnd->getAddressOfPInvokeFixup(methHnd, &pIndirection);
if (lookup.addr == nullptr)
@@ -2866,14 +3244,10 @@ GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call)
}
#endif
- // TODO-Cleanup: Disable emitting random NOPs
-
// This is code to set up an indirect call to a stub address computed
// via dictionary lookup.
if (call->gtCallType == CT_INDIRECT)
{
- NYI_X86("Virtual Stub dispatched call lowering via dictionary lookup");
-
// The importer decided we needed a stub call via a computed
// stub dispatch address, i.e. an address which came from a dictionary lookup.
// - The dictionary lookup produces an indirected address, suitable for call
@@ -2886,6 +3260,8 @@ GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call)
// All we have to do here is add an indirection to generate the actual call target.
GenTree* ind = Ind(call->gtCallAddr);
+ ind->gtFlags |= GTF_IND_REQ_ADDR_IN_REG;
+
BlockRange().InsertAfter(call->gtCallAddr, ind);
call->gtCallAddr = ind;
}
@@ -2923,8 +3299,10 @@ GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call)
// So we don't use a register.
#ifndef _TARGET_X86_
// on x64 we must materialize the target using specific registers.
- addr->gtRegNum = REG_VIRTUAL_STUB_PARAM;
+ addr->gtRegNum = REG_VIRTUAL_STUB_PARAM;
+
indir->gtRegNum = REG_JUMP_THUNK_PARAM;
+ indir->gtFlags |= GTF_IND_REQ_ADDR_IN_REG;
#endif
result = indir;
}
@@ -3042,8 +3420,6 @@ bool Lowering::AreSourcesPossiblyModifiedLocals(GenTree* addr, GenTree* base, Ge
return true;
}
}
-
- unreached();
}
//------------------------------------------------------------------------
@@ -3082,9 +3458,9 @@ GenTree* Lowering::TryCreateAddrMode(LIR::Use&& use, bool isIndir)
{
// We can have an indirection on the rhs of a block copy (it is the source
// object). This is not a "regular" indirection.
- // (Note that the parent check could be costly.)
- GenTree* parent = indir->gtGetParent(nullptr);
- if ((parent != nullptr) && parent->OperIsIndir())
+ // (Note that the user check could be costly.)
+ LIR::Use indirUse;
+ if (BlockRange().TryGetUse(indir, &indirUse) && indirUse.User()->OperIsIndir())
{
isIndir = false;
}
@@ -3248,9 +3624,14 @@ void Lowering::LowerUnsignedDivOrMod(GenTree* node)
{
assert((node->OperGet() == GT_UDIV) || (node->OperGet() == GT_UMOD));
- GenTree* divisor = node->gtGetOp2();
+ GenTree* divisor = node->gtGetOp2();
+ GenTree* dividend = node->gtGetOp1();
- if (divisor->IsCnsIntOrI())
+ if (divisor->IsCnsIntOrI()
+#ifdef _TARGET_X86_
+ && (dividend->OperGet() != GT_LONG)
+#endif
+ )
{
size_t divisorValue = static_cast<size_t>(divisor->gtIntCon.IconValue());
@@ -3276,6 +3657,91 @@ void Lowering::LowerUnsignedDivOrMod(GenTree* node)
}
//------------------------------------------------------------------------
+// GetSignedMagicNumberForDivide: Generates a magic number and shift amount for
+// the magic number division optimization.
+//
+// Arguments:
+// denom - The denominator
+// shift - Pointer to the shift value to be returned
+//
+// Returns:
+// The magic number.
+//
+// Notes:
+// This code is previously from UTC where it notes it was taken from
+// _The_PowerPC_Compiler_Writer's_Guide_, pages 57-58. The paper is is based on
+// is "Division by invariant integers using multiplication" by Torbjorn Granlund
+// and Peter L. Montgomery in PLDI 94
+
+template <typename T>
+T GetSignedMagicNumberForDivide(T denom, int* shift /*out*/)
+{
+ // static SMAG smag;
+ const int bits = sizeof(T) * 8;
+ const int bits_minus_1 = bits - 1;
+
+ typedef typename jitstd::make_unsigned<T>::type UT;
+
+ const UT two_nminus1 = UT(1) << bits_minus_1;
+
+ int p;
+ UT absDenom;
+ UT absNc;
+ UT delta;
+ UT q1;
+ UT r1;
+ UT r2;
+ UT q2;
+ UT t;
+ T result_magic;
+ int result_shift;
+ int iters = 0;
+
+ absDenom = abs(denom);
+ t = two_nminus1 + ((unsigned int)denom >> 31);
+ absNc = t - 1 - (t % absDenom); // absolute value of nc
+ p = bits_minus_1; // initialize p
+ q1 = two_nminus1 / absNc; // initialize q1 = 2^p / abs(nc)
+ r1 = two_nminus1 - (q1 * absNc); // initialize r1 = rem(2^p, abs(nc))
+ q2 = two_nminus1 / absDenom; // initialize q1 = 2^p / abs(denom)
+ r2 = two_nminus1 - (q2 * absDenom); // initialize r1 = rem(2^p, abs(denom))
+
+ do
+ {
+ iters++;
+ p++;
+ q1 *= 2; // update q1 = 2^p / abs(nc)
+ r1 *= 2; // update r1 = rem(2^p / abs(nc))
+
+ if (r1 >= absNc)
+ { // must be unsigned comparison
+ q1++;
+ r1 -= absNc;
+ }
+
+ q2 *= 2; // update q2 = 2^p / abs(denom)
+ r2 *= 2; // update r2 = rem(2^p / abs(denom))
+
+ if (r2 >= absDenom)
+ { // must be unsigned comparison
+ q2++;
+ r2 -= absDenom;
+ }
+
+ delta = absDenom - r2;
+ } while (q1 < delta || (q1 == delta && r1 == 0));
+
+ result_magic = q2 + 1; // resulting magic number
+ if (denom < 0)
+ {
+ result_magic = -result_magic;
+ }
+ *shift = p - bits; // resulting shift
+
+ return result_magic;
+}
+
+//------------------------------------------------------------------------
// LowerSignedDivOrMod: transform integer GT_DIV/GT_MOD nodes with a power of 2
// const divisor into equivalent but faster sequences.
//
@@ -3313,8 +3779,10 @@ GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node)
ssize_t divisorValue = divisor->gtIntCon.IconValue();
- if (divisorValue == -1)
+ if (divisorValue == -1 || divisorValue == 0)
{
+ // x / 0 and x % 0 can't be optimized because they are required to throw an exception.
+
// x / -1 can't be optimized because INT_MIN / -1 is required to throw an exception.
// x % -1 is always 0 and the IL spec says that the rem instruction "can" throw an exception if x is
@@ -3343,14 +3811,122 @@ GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node)
if (!isPow2(absDivisorValue))
{
+#ifdef _TARGET_XARCH_
+ ssize_t magic;
+ int shift;
+
+ if (type == TYP_INT)
+ {
+ magic = GetSignedMagicNumberForDivide<int32_t>(static_cast<int32_t>(divisorValue), &shift);
+ }
+ else
+ {
+#ifdef _TARGET_64BIT_
+ magic = GetSignedMagicNumberForDivide<int64_t>(static_cast<int64_t>(divisorValue), &shift);
+#else
+ unreached();
+#endif
+ }
+
+ divisor->gtIntConCommon.SetIconValue(magic);
+
+ // Insert a new GT_MULHI node in front of the existing GT_DIV/GT_MOD node.
+ // The existing node will later be transformed into a GT_ADD/GT_SUB that
+ // computes the final result. This way don't need to find and change the
+ // use of the existing node.
+ GenTree* mulhi = comp->gtNewOperNode(GT_MULHI, type, divisor, dividend);
+ BlockRange().InsertBefore(divMod, mulhi);
+
+ // mulhi was the easy part. Now we need to generate different code depending
+ // on the divisor value:
+ // For 3 we need:
+ // div = signbit(mulhi) + mulhi
+ // For 5 we need:
+ // div = signbit(mulhi) + sar(mulhi, 1) ; requires shift adjust
+ // For 7 we need:
+ // mulhi += dividend ; requires add adjust
+ // div = signbit(mulhi) + sar(mulhi, 2) ; requires shift adjust
+ // For -3 we need:
+ // mulhi -= dividend ; requires sub adjust
+ // div = signbit(mulhi) + sar(mulhi, 1) ; requires shift adjust
+ bool requiresAddSubAdjust = signum(divisorValue) != signum(magic);
+ bool requiresShiftAdjust = shift != 0;
+ bool requiresDividendMultiuse = requiresAddSubAdjust || !isDiv;
+ unsigned curBBWeight = comp->compCurBB->getBBWeight(comp);
+ unsigned dividendLclNum = BAD_VAR_NUM;
+
+ if (requiresDividendMultiuse)
+ {
+ LIR::Use dividendUse(BlockRange(), &mulhi->gtOp.gtOp2, mulhi);
+ dividendLclNum = dividendUse.ReplaceWithLclVar(comp, curBBWeight);
+ }
+
+ GenTree* adjusted;
+
+ if (requiresAddSubAdjust)
+ {
+ dividend = comp->gtNewLclvNode(dividendLclNum, type);
+ comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp);
+
+ adjusted = comp->gtNewOperNode(divisorValue > 0 ? GT_ADD : GT_SUB, type, mulhi, dividend);
+ BlockRange().InsertBefore(divMod, dividend, adjusted);
+ }
+ else
+ {
+ adjusted = mulhi;
+ }
+
+ GenTree* shiftBy = comp->gtNewIconNode(genTypeSize(type) * 8 - 1, type);
+ GenTree* signBit = comp->gtNewOperNode(GT_RSZ, type, adjusted, shiftBy);
+ BlockRange().InsertBefore(divMod, shiftBy, signBit);
+
+ LIR::Use adjustedUse(BlockRange(), &signBit->gtOp.gtOp1, signBit);
+ unsigned adjustedLclNum = adjustedUse.ReplaceWithLclVar(comp, curBBWeight);
+ adjusted = comp->gtNewLclvNode(adjustedLclNum, type);
+ comp->lvaTable[adjustedLclNum].incRefCnts(curBBWeight, comp);
+ BlockRange().InsertBefore(divMod, adjusted);
+
+ if (requiresShiftAdjust)
+ {
+ shiftBy = comp->gtNewIconNode(shift, TYP_INT);
+ adjusted = comp->gtNewOperNode(GT_RSH, type, adjusted, shiftBy);
+ BlockRange().InsertBefore(divMod, shiftBy, adjusted);
+ }
+
+ if (isDiv)
+ {
+ divMod->SetOperRaw(GT_ADD);
+ divMod->gtOp.gtOp1 = adjusted;
+ divMod->gtOp.gtOp2 = signBit;
+ }
+ else
+ {
+ GenTree* div = comp->gtNewOperNode(GT_ADD, type, adjusted, signBit);
+
+ dividend = comp->gtNewLclvNode(dividendLclNum, type);
+ comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp);
+
+ // divisor % dividend = dividend - divisor x div
+ GenTree* divisor = comp->gtNewIconNode(divisorValue, type);
+ GenTree* mul = comp->gtNewOperNode(GT_MUL, type, div, divisor);
+ BlockRange().InsertBefore(divMod, dividend, div, divisor, mul);
+
+ divMod->SetOperRaw(GT_SUB);
+ divMod->gtOp.gtOp1 = dividend;
+ divMod->gtOp.gtOp2 = mul;
+ }
+
+ return mulhi;
+#else
+ // Currently there's no GT_MULHI for ARM32/64
return next;
+#endif
}
- // We're committed to the conversion now. Go find the use.
+ // We're committed to the conversion now. Go find the use if any.
LIR::Use use;
if (!BlockRange().TryGetUse(node, &use))
{
- assert(!"signed DIV/MOD node is unused");
return next;
}
@@ -3450,8 +4026,6 @@ void Lowering::LowerStoreInd(GenTree* node)
void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
{
GenTree* src = blkNode->Data();
- // TODO-1stClassStructs: Don't require this.
- assert(blkNode->OperIsInitBlkOp() || !src->OperIsLocal());
TryCreateAddrMode(LIR::Use(BlockRange(), &blkNode->Addr(), blkNode), false);
}
@@ -3817,17 +4391,17 @@ void Lowering::CheckCallArg(GenTree* arg)
break;
#endif
- case GT_LIST:
- {
- GenTreeArgList* list = arg->AsArgList();
- assert(list->IsAggregate());
+ case GT_FIELD_LIST:
+ {
+ GenTreeFieldList* list = arg->AsFieldList();
+ assert(list->IsFieldListHead());
- for (; list != nullptr; list = list->Rest())
- {
- assert(list->Current()->OperIsPutArg());
- }
+ for (; list != nullptr; list = list->Rest())
+ {
+ assert(list->Current()->OperIsPutArg());
}
- break;
+ }
+ break;
default:
assert(arg->OperIsPutArg());
diff --git a/src/jit/lower.h b/src/jit/lower.h
index 620636d8bd..c1cafb4ee8 100644
--- a/src/jit/lower.h
+++ b/src/jit/lower.h
@@ -65,6 +65,7 @@ private:
// Call Lowering
// ------------------------------
void LowerCall(GenTree* call);
+ void LowerCompare(GenTree* tree);
void LowerJmpMethod(GenTree* jmp);
void LowerRet(GenTree* ret);
GenTree* LowerDelegateInvoke(GenTreeCall* call);
@@ -127,8 +128,14 @@ private:
// return true if this call target is within range of a pc-rel call on the machine
bool IsCallTargetInRange(void* addr);
+#ifdef _TARGET_X86_
+ bool ExcludeNonByteableRegisters(GenTree* tree);
+#endif
+
void TreeNodeInfoInit(GenTree* stmt);
+ void TreeNodeInfoInitCheckByteable(GenTree* tree);
+
#if defined(_TARGET_XARCH_)
void TreeNodeInfoInitSimple(GenTree* tree);
@@ -190,6 +197,7 @@ private:
void TreeNodeInfoInitReturn(GenTree* tree);
void TreeNodeInfoInitShiftRotate(GenTree* tree);
void TreeNodeInfoInitCall(GenTreeCall* call);
+ void TreeNodeInfoInitCmp(GenTreePtr tree);
void TreeNodeInfoInitStructArg(GenTreePtr structArg);
void TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode);
void TreeNodeInfoInitLogicalOp(GenTree* tree);
@@ -200,11 +208,11 @@ private:
#endif // FEATURE_SIMD
void TreeNodeInfoInitCast(GenTree* tree);
#ifdef _TARGET_ARM64_
- void TreeNodeInfoInitPutArgStk(GenTree* argNode, fgArgTabEntryPtr info);
+ void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info);
#endif // _TARGET_ARM64_
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- void TreeNodeInfoInitPutArgStk(GenTree* tree);
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
+ void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* tree);
+#endif // FEATURE_PUT_STRUCT_ARG_STK
void TreeNodeInfoInitLclHeap(GenTree* tree);
void DumpNodeInfoMap();
@@ -226,8 +234,6 @@ private:
void SetMulOpCounts(GenTreePtr tree);
#endif // defined(_TARGET_XARCH_)
- void LowerCmp(GenTreePtr tree);
-
#if !CPU_LOAD_STORE_ARCH
bool IsRMWIndirCandidate(GenTree* operand, GenTree* storeInd);
bool IsBinOpInRMWStoreInd(GenTreePtr tree);
diff --git a/src/jit/lowerarm.cpp b/src/jit/lowerarm.cpp
index 67cea2ff4e..5bf23c4199 100644
--- a/src/jit/lowerarm.cpp
+++ b/src/jit/lowerarm.cpp
@@ -32,10 +32,76 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "lower.h"
#include "lsra.h"
-/* Lowering of GT_CAST nodes */
+//------------------------------------------------------------------------
+// LowerCast: Lower GT_CAST(srcType, DstType) nodes.
+//
+// Arguments:
+// tree - GT_CAST node to be lowered
+//
+// Return Value:
+// None.
+//
+// Notes:
+// Casts from small int type to float/double are transformed as follows:
+// GT_CAST(byte, float/double) = GT_CAST(GT_CAST(byte, int32), float/double)
+// GT_CAST(sbyte, float/double) = GT_CAST(GT_CAST(sbyte, int32), float/double)
+// GT_CAST(int16, float/double) = GT_CAST(GT_CAST(int16, int32), float/double)
+// GT_CAST(uint16, float/double) = GT_CAST(GT_CAST(uint16, int32), float/double)
+//
+// Similarly casts from float/double to a smaller int type are transformed as follows:
+// GT_CAST(float/double, byte) = GT_CAST(GT_CAST(float/double, int32), byte)
+// GT_CAST(float/double, sbyte) = GT_CAST(GT_CAST(float/double, int32), sbyte)
+// GT_CAST(float/double, int16) = GT_CAST(GT_CAST(double/double, int32), int16)
+// GT_CAST(float/double, uint16) = GT_CAST(GT_CAST(double/double, int32), uint16)
+//
+// Note that for the overflow conversions we still depend on helper calls and
+// don't expect to see them here.
+// i) GT_CAST(float/double, int type with overflow detection)
+
void Lowering::LowerCast(GenTree* tree)
{
- NYI_ARM("ARM Lowering for cast");
+ assert(tree->OperGet() == GT_CAST);
+
+ JITDUMP("LowerCast for: ");
+ DISPNODE(tree);
+ JITDUMP("\n");
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ var_types dstType = tree->CastToType();
+ var_types srcType = op1->TypeGet();
+ var_types tmpType = TYP_UNDEF;
+
+ // TODO-ARM-Cleanup: Remove following NYI assertions.
+ if (varTypeIsFloating(srcType))
+ {
+ NYI_ARM("Lowering for cast from float"); // Not tested yet.
+ noway_assert(!tree->gtOverflow());
+ }
+
+ // Case of src is a small type and dst is a floating point type.
+ if (varTypeIsSmall(srcType) && varTypeIsFloating(dstType))
+ {
+ NYI_ARM("Lowering for cast from small type to float"); // Not tested yet.
+ // These conversions can never be overflow detecting ones.
+ noway_assert(!tree->gtOverflow());
+ tmpType = TYP_INT;
+ }
+ // case of src is a floating point type and dst is a small type.
+ else if (varTypeIsFloating(srcType) && varTypeIsSmall(dstType))
+ {
+ NYI_ARM("Lowering for cast from float to small type"); // Not tested yet.
+ tmpType = TYP_INT;
+ }
+
+ if (tmpType != TYP_UNDEF)
+ {
+ GenTreePtr tmp = comp->gtNewCastNode(tmpType, op1, tmpType);
+ tmp->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT));
+
+ tree->gtFlags &= ~GTF_UNSIGNED;
+ tree->gtOp.gtOp1 = tmp;
+ BlockRange().InsertAfter(op1, tmp);
+ }
}
void Lowering::LowerRotate(GenTreePtr tree)
@@ -62,7 +128,73 @@ bool Lowering::IsCallTargetInRange(void* addr)
// return true if the immediate can be folded into an instruction, for example small enough and non-relocatable
bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode)
{
- NYI_ARM("ARM IsContainableImmed");
+ if (varTypeIsFloating(parentNode->TypeGet()))
+ {
+ // TODO-ARM-Cleanup: not tested yet.
+ NYI_ARM("ARM IsContainableImmed for floating point type");
+
+ // We can contain a floating point 0.0 constant in a compare instruction
+ switch (parentNode->OperGet())
+ {
+ default:
+ return false;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ if (childNode->IsIntegralConst(0))
+ return true;
+ break;
+ }
+ }
+ else
+ {
+ // Make sure we have an actual immediate
+ if (!childNode->IsCnsIntOrI())
+ return false;
+ if (childNode->IsIconHandle() && comp->opts.compReloc)
+ return false;
+
+ ssize_t immVal = childNode->gtIntCon.gtIconVal;
+ emitAttr attr = emitActualTypeSize(childNode->TypeGet());
+ emitAttr size = EA_SIZE(attr);
+
+ switch (parentNode->OperGet())
+ {
+ default:
+ return false;
+
+ case GT_ADD:
+ case GT_SUB:
+ if (emitter::emitIns_valid_imm_for_add(immVal, INS_FLAGS_DONT_CARE))
+ return true;
+ break;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ case GT_AND:
+ case GT_OR:
+ case GT_XOR:
+ if (emitter::emitIns_valid_imm_for_alu(immVal))
+ return true;
+ break;
+
+ case GT_STORE_LCL_VAR:
+ // TODO-ARM-Cleanup: not tested yet
+ NYI_ARM("ARM IsContainableImmed for GT_STORE_LCL_VAR");
+ if (immVal == 0)
+ return true;
+ break;
+ }
+ }
+
return false;
}
diff --git a/src/jit/lowerarm64.cpp b/src/jit/lowerarm64.cpp
index 1720c62acb..cc9e2266d2 100644
--- a/src/jit/lowerarm64.cpp
+++ b/src/jit/lowerarm64.cpp
@@ -126,6 +126,10 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
TreeNodeInfo* info = &(tree->gtLsraInfo);
RegisterType registerType = TypeGet(tree);
+ JITDUMP("TreeNodeInfoInit for: ");
+ DISPNODE(tree);
+ JITDUMP("\n");
+
switch (tree->OperGet())
{
GenTree* op1;
@@ -202,6 +206,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
__fallthrough;
case GT_LIST:
+ case GT_FIELD_LIST:
case GT_ARGPLACE:
case GT_NO_OP:
case GT_START_NONGC:
@@ -485,7 +490,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_LE:
case GT_GE:
case GT_GT:
- LowerCmp(tree);
+ TreeNodeInfoInitCmp(tree);
break;
case GT_CKFINITE:
@@ -524,12 +529,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
break;
case GT_BLK:
- case GT_OBJ:
case GT_DYN_BLK:
// These should all be eliminated prior to Lowering.
assert(!"Non-store block node in Lowering");
info->srcCount = 0;
info->dstCount = 0;
+ break;
case GT_STORE_BLK:
case GT_STORE_OBJ:
@@ -537,6 +542,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
TreeNodeInfoInitBlockStore(tree->AsBlk());
break;
+ case GT_INIT_VAL:
+ // Always a passthrough of its child's value.
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
case GT_LCLHEAP:
{
info->srcCount = 1;
@@ -977,7 +988,7 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
{
- assert(list->IsList());
+ assert(list->OperIsList());
GenTreePtr argNode = list->Current();
@@ -989,7 +1000,7 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
// late arg that is not passed in a register
assert(argNode->gtOper == GT_PUTARG_STK);
- TreeNodeInfoInitPutArgStk(argNode, curArgTabEntry);
+ TreeNodeInfoInitPutArgStk(argNode->AsPutArgStk(), curArgTabEntry);
continue;
}
@@ -1003,16 +1014,16 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
argNode = argNode->gtEffectiveVal();
- // A GT_LIST has a TYP_VOID, but is used to represent a multireg struct
- if (varTypeIsStruct(argNode) || (argNode->gtOper == GT_LIST))
+ // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct
+ if (varTypeIsStruct(argNode) || (argNode->gtOper == GT_FIELD_LIST))
{
GenTreePtr actualArgNode = argNode;
unsigned originalSize = 0;
- if (argNode->gtOper == GT_LIST)
+ if (argNode->gtOper == GT_FIELD_LIST)
{
// There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs)
- GenTreeArgList* argListPtr = argNode->AsArgList();
+ GenTreeFieldList* fieldListPtr = argNode->AsFieldList();
// Initailize the first register and the first regmask in our list
regNumber targetReg = argReg;
@@ -1020,9 +1031,9 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
unsigned iterationNum = 0;
originalSize = 0;
- for (; argListPtr; argListPtr = argListPtr->Rest())
+ for (; fieldListPtr; fieldListPtr = fieldListPtr->Rest())
{
- GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
+ GenTreePtr putArgRegNode = fieldListPtr->Current();
assert(putArgRegNode->gtOper == GT_PUTARG_REG);
GenTreePtr putArgChild = putArgRegNode->gtOp.gtOp1;
@@ -1115,7 +1126,7 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
assert(curArgTabEntry->regNum == REG_STK);
- TreeNodeInfoInitPutArgStk(arg, curArgTabEntry);
+ TreeNodeInfoInitPutArgStk(arg->AsPutArgStk(), curArgTabEntry);
}
else
{
@@ -1154,7 +1165,7 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
// Notes:
// Set the child node(s) to be contained when we have a multireg arg
//
-void Lowering::TreeNodeInfoInitPutArgStk(GenTree* argNode, fgArgTabEntryPtr info)
+void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info)
{
assert(argNode->gtOper == GT_PUTARG_STK);
@@ -1166,14 +1177,14 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTree* argNode, fgArgTabEntryPtr info
argNode->gtLsraInfo.srcCount = 1;
argNode->gtLsraInfo.dstCount = 0;
- // Do we have a TYP_STRUCT argument (or a GT_LIST), if so it must be a multireg pass-by-value struct
- if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_LIST))
+ // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct
+ if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_FIELD_LIST))
{
// We will use store instructions that each write a register sized value
- if (putArgChild->OperGet() == GT_LIST)
+ if (putArgChild->OperGet() == GT_FIELD_LIST)
{
- // We consume all of the items in the GT_LIST
+ // We consume all of the items in the GT_FIELD_LIST
argNode->gtLsraInfo.srcCount = info->numSlots;
}
else
@@ -1219,8 +1230,9 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTree* argNode, fgArgTabEntryPtr info
void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
{
- GenTree* dstAddr = blkNode->Addr();
- unsigned size;
+ GenTree* dstAddr = blkNode->Addr();
+ unsigned size = blkNode->gtBlkSize;
+ GenTree* source = blkNode->Data();
LinearScan* l = m_lsra;
Compiler* compiler = comp;
@@ -1228,16 +1240,44 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
// We may require an additional source or temp register for the size.
blkNode->gtLsraInfo.srcCount = 2;
blkNode->gtLsraInfo.dstCount = 0;
+ GenTreePtr srcAddrOrFill = nullptr;
+ bool isInitBlk = blkNode->OperIsInitBlkOp();
- if ((blkNode->OperGet() == GT_STORE_OBJ) && (blkNode->AsObj()->gtGcPtrCount == 0))
+ if (!isInitBlk)
{
- blkNode->SetOper(GT_STORE_BLK);
+ // CopyObj or CopyBlk
+ if ((blkNode->OperGet() == GT_STORE_OBJ) && ((blkNode->AsObj()->gtGcPtrCount == 0) || blkNode->gtBlkOpGcUnsafe))
+ {
+ blkNode->SetOper(GT_STORE_BLK);
+ }
+ if (source->gtOper == GT_IND)
+ {
+ srcAddrOrFill = blkNode->Data()->gtGetOp1();
+ // We're effectively setting source as contained, but can't call MakeSrcContained, because the
+ // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading.
+ // If srcAddr is already non-contained, we don't need to change it.
+ if (srcAddrOrFill->gtLsraInfo.getDstCount() == 0)
+ {
+ srcAddrOrFill->gtLsraInfo.setDstCount(1);
+ srcAddrOrFill->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount);
+ }
+ m_lsra->clearOperandCounts(source);
+ }
+ else if (!source->IsMultiRegCall() && !source->OperIsSIMD())
+ {
+ assert(source->IsLocal());
+ MakeSrcContained(blkNode, source);
+ }
}
- if (blkNode->OperIsInitBlkOp())
+ if (isInitBlk)
{
- unsigned size = blkNode->gtBlkSize;
- GenTreePtr initVal = blkNode->Data();
+ GenTreePtr initVal = source;
+ if (initVal->OperIsInitVal())
+ {
+ initVal = initVal->gtGetOp1();
+ }
+ srcAddrOrFill = initVal;
#if 0
// TODO-ARM64-CQ: Currently we generate a helper call for every
@@ -1264,8 +1304,6 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
initVal->gtType = TYP_LONG;
}
- MakeSrcContained(tree, blockSize);
-
// In case we have a buffer >= 16 bytes
// we can use SSE2 to do a 128-bit store in a single
// instruction.
@@ -1282,7 +1320,7 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
else
#endif // 0
{
- // The helper follows the regular AMD64 ABI.
+ // The helper follows the regular ABI.
dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0);
initVal->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1);
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
@@ -1306,34 +1344,12 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
{
// CopyObj or CopyBlk
// Sources are src and dest and size if not constant.
- unsigned size = blkNode->gtBlkSize;
- GenTreePtr source = blkNode->Data();
- GenTree* srcAddr = nullptr;
- if (source->gtOper == GT_IND)
- {
- srcAddr = blkNode->Data()->gtGetOp1();
- // We're effectively setting source as contained, but can't call MakeSrcContained, because the
- // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading.
- // If srcAddr is already non-contained, we don't need to change it.
- if (srcAddr->gtLsraInfo.getDstCount() == 0)
- {
- srcAddr->gtLsraInfo.setDstCount(1);
- srcAddr->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount);
- }
- m_lsra->clearOperandCounts(source);
- }
- else
- {
- assert(source->IsLocal());
- MakeSrcContained(blkNode, source);
- }
if (blkNode->OperGet() == GT_STORE_OBJ)
{
// CopyObj
GenTreeObj* objNode = blkNode->AsObj();
- GenTreePtr source = objNode->Data();
unsigned slots = objNode->gtSlots;
@@ -1362,16 +1378,19 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
blkNode->gtLsraInfo.internalIntCount = 1;
dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_DST_BYREF);
- srcAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_SRC_BYREF);
+ // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF.
+ // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF,
+ // which is killed by a StoreObj (and thus needn't be reserved).
+ if (srcAddrOrFill != nullptr)
+ {
+ srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_SRC_BYREF);
+ }
}
else
{
// CopyBlk
- unsigned size = blkNode->gtBlkSize;
- GenTreePtr dstAddr = blkNode->Addr();
- GenTreePtr srcAddr = blkNode->Data();
- short internalIntCount = 0;
- regMaskTP internalIntCandidates = RBM_NONE;
+ short internalIntCount = 0;
+ regMaskTP internalIntCandidates = RBM_NONE;
#if 0
// In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size
@@ -1379,11 +1398,8 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
// TODO-ARM64-CQ: cpblk loop unrolling is currently not implemented.
- if (blockSize->IsCnsIntOrI() && blockSize->gtIntCon.gtIconVal <= CPBLK_UNROLL_LIMIT)
+ if ((size != 0) && (size <= INITBLK_UNROLL_LIMIT))
{
- assert(!blockSize->IsIconHandle());
- ssize_t size = blockSize->gtIntCon.gtIconVal;
-
// If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2.
// Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of
// our framework assemblies, so this is the main code generation scheme we'll use.
@@ -1404,9 +1420,9 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
// If src or dst are on stack, we don't have to generate the address into a register
// because it's just some constant+SP
- if (srcAddr->OperIsLocalAddr())
+ if (srcAddr != nullptr && srcAddrOrFill->OperIsLocalAddr())
{
- MakeSrcContained(blkNode, srcAddr);
+ MakeSrcContained(blkNode, srcAddrOrFill);
}
if (dstAddr->OperIsLocalAddr())
@@ -1425,15 +1441,9 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0);
// The srcAddr goes in arg1.
- if (srcAddr != nullptr)
+ if (srcAddrOrFill != nullptr)
{
- srcAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1);
- }
- else
- {
- // This is a local; we'll use a temp register for its address.
- internalIntCandidates |= RBM_ARG_1;
- internalIntCount++;
+ srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1);
}
if (size != 0)
{
@@ -1447,7 +1457,6 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
blkNode->gtLsraInfo.setSrcCount(3);
GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
- assert(!blockSize->IsIconHandle());
blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2);
}
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
@@ -1860,7 +1869,7 @@ void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree)
}
}
-void Lowering::LowerCmp(GenTreePtr tree)
+void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree)
{
TreeNodeInfo* info = &(tree->gtLsraInfo);
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index 6f98eb6661..589cef482e 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -77,7 +77,7 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
// InitBlk
MakeSrcContained(storeLoc, op1);
}
- else if (storeLoc->TypeGet() == TYP_SIMD12)
+ else if ((storeLoc->TypeGet() == TYP_SIMD12) && (storeLoc->OperGet() == GT_STORE_LCL_FLD))
{
// Need an additional register to extract upper 4 bytes of Vector3.
info->internalFloatCount = 1;
@@ -177,6 +177,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
break;
case GT_LCL_FLD:
+ case GT_LCL_VAR:
info->srcCount = 0;
info->dstCount = 1;
@@ -185,9 +186,9 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
if (tree->TypeGet() == TYP_SIMD12)
{
// We need an internal register different from targetReg in which 'tree' produces its result
- // because both targetReg and internal reg will be in use at the same time. This is achieved
- // by asking for two internal registers.
- info->internalFloatCount = 2;
+ // because both targetReg and internal reg will be in use at the same time.
+ info->internalFloatCount = 1;
+ info->isInternalRegDelayFree = true;
info->setInternalCandidates(m_lsra, m_lsra->allSIMDRegs());
}
#endif
@@ -195,7 +196,16 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_STORE_LCL_FLD:
case GT_STORE_LCL_VAR:
- info->srcCount = 1;
+#ifdef _TARGET_X86_
+ if (tree->gtGetOp1()->OperGet() == GT_LONG)
+ {
+ info->srcCount = 2;
+ }
+ else
+#endif // _TARGET_X86_
+ {
+ info->srcCount = 1;
+ }
info->dstCount = 0;
LowerStoreLoc(tree->AsLclVarCommon());
break;
@@ -242,6 +252,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
break;
case GT_LIST:
+ case GT_FIELD_LIST:
case GT_ARGPLACE:
case GT_NO_OP:
case GT_START_NONGC:
@@ -319,9 +330,87 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
break;
case GT_JTRUE:
+ {
+ info->srcCount = 0;
+ info->dstCount = 0;
+
+ GenTree* cmp = tree->gtGetOp1();
+ l->clearDstCount(cmp);
+
+#ifdef FEATURE_SIMD
+ // Say we have the following IR
+ // simdCompareResult = GT_SIMD((In)Equality, v1, v2)
+ // integerCompareResult = GT_EQ/NE(simdCompareResult, true/false)
+ // GT_JTRUE(integerCompareResult)
+ //
+ // In this case we don't need to generate code for GT_EQ_/NE, since SIMD (In)Equality
+ // intrinsic would set or clear Zero flag.
+
+ genTreeOps cmpOper = cmp->OperGet();
+ if (cmpOper == GT_EQ || cmpOper == GT_NE)
+ {
+ GenTree* cmpOp1 = cmp->gtGetOp1();
+ GenTree* cmpOp2 = cmp->gtGetOp2();
+
+ if (cmpOp1->IsSIMDEqualityOrInequality() && (cmpOp2->IsIntegralConst(0) || cmpOp2->IsIntegralConst(1)))
+ {
+ // clear dstCount on SIMD node to indicate that
+ // result doesn't need to be materialized into a register.
+ l->clearOperandCounts(cmp);
+ l->clearDstCount(cmpOp1);
+ l->clearOperandCounts(cmpOp2);
+
+ // Codegen of SIMD (in)Equality uses target integer reg
+ // only for setting flags. Target reg is not needed on AVX
+ // when comparing against Vector Zero. In all other cases
+ // we need to reserve an int type internal register, since we
+ // have cleared dstCount.
+ if (compiler->canUseAVX() && cmpOp1->gtGetOp2()->IsIntegralConstVector(0))
+ {
+ // We don't need an internal register,since we use vptest
+ // for setting flags.
+ }
+ else
+ {
+ ++(cmpOp1->gtLsraInfo.internalIntCount);
+ regMaskTP internalCandidates = cmpOp1->gtLsraInfo.getInternalCandidates(l);
+ internalCandidates |= l->allRegs(TYP_INT);
+ cmpOp1->gtLsraInfo.setInternalCandidates(l, internalCandidates);
+ }
+
+ // We would have to reverse compare oper in the following cases:
+ // 1) SIMD Equality: Sets Zero flag on equal otherwise clears it.
+ // Therefore, if compare oper is == or != against false(0), we will
+ // be checking opposite of what is required.
+ //
+ // 2) SIMD inEquality: Clears Zero flag on true otherwise sets it.
+ // Therefore, if compare oper is == or != against true(1), we will
+ // be checking opposite of what is required.
+ GenTreeSIMD* simdNode = cmpOp1->AsSIMD();
+ if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality)
+ {
+ if (cmpOp2->IsIntegralConst(0))
+ {
+ cmp->SetOper(GenTree::ReverseRelop(cmpOper));
+ }
+ }
+ else
+ {
+ assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality);
+ if (cmpOp2->IsIntegralConst(1))
+ {
+ cmp->SetOper(GenTree::ReverseRelop(cmpOper));
+ }
+ }
+ }
+ }
+#endif // FEATURE_SIMD
+ }
+ break;
+
+ case GT_JCC:
info->srcCount = 0;
info->dstCount = 0;
- l->clearDstCount(tree->gtOp.gtOp1);
break;
case GT_JMP:
@@ -436,6 +525,9 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_MUL:
case GT_MULHI:
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+ case GT_MUL_LONG:
+#endif
SetMulOpCounts(tree);
break;
@@ -478,6 +570,11 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
info->internalFloatCount = 1;
info->setInternalCandidates(l, l->internalFloatRegCandidates());
}
+ else
+ {
+ // Codegen of this tree node sets ZF and SF flags.
+ tree->gtFlags |= GTF_ZSF_SET;
+ }
break;
case GT_NOT:
@@ -490,6 +587,10 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_RSZ:
case GT_ROL:
case GT_ROR:
+#ifdef _TARGET_X86_
+ case GT_LSH_HI:
+ case GT_RSH_LO:
+#endif
TreeNodeInfoInitShiftRotate(tree);
break;
@@ -499,7 +600,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_LE:
case GT_GE:
case GT_GT:
- LowerCmp(tree);
+ TreeNodeInfoInitCmp(tree);
break;
case GT_CKFINITE:
@@ -542,10 +643,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
}
break;
-#ifdef _TARGET_X86_
- case GT_OBJ:
- NYI_X86("GT_OBJ");
-#elif !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#if !defined(FEATURE_PUT_STRUCT_ARG_STK)
case GT_OBJ:
#endif
case GT_BLK:
@@ -556,11 +654,11 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
info->dstCount = 0;
break;
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
case GT_PUTARG_STK:
- TreeNodeInfoInitPutArgStk(tree);
+ TreeNodeInfoInitPutArgStk(tree->AsPutArgStk());
break;
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#endif // FEATURE_PUT_STRUCT_ARG_STK
case GT_STORE_BLK:
case GT_STORE_OBJ:
@@ -568,6 +666,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
TreeNodeInfoInitBlockStore(tree->AsBlk());
break;
+ case GT_INIT_VAL:
+ // Always a passthrough of its child's value.
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
case GT_LCLHEAP:
TreeNodeInfoInitLclHeap(tree);
break;
@@ -634,14 +738,20 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_ARR_OFFSET:
// This consumes the offset, if any, the arrObj and the effective index,
// and produces the flattened offset for this dimension.
- info->srcCount = 3;
- info->dstCount = 1;
- info->internalIntCount = 1;
+ info->srcCount = 3;
+ info->dstCount = 1;
+
// we don't want to generate code for this
if (tree->gtArrOffs.gtOffset->IsIntegralConst(0))
{
MakeSrcContained(tree, tree->gtArrOffs.gtOffset);
}
+ else
+ {
+ // Here we simply need an internal register, which must be different
+ // from any of the operand's registers, but may be the same as targetReg.
+ info->internalIntCount = 1;
+ }
break;
case GT_LEA:
@@ -725,15 +835,9 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
#endif
case GT_CLS_VAR:
- info->srcCount = 0;
- // GT_CLS_VAR, by the time we reach the backend, must always
- // be a pure use.
- // It will produce a result of the type of the
- // node, and use an internal register for the address.
-
- info->dstCount = 1;
- assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0);
- info->internalIntCount = 1;
+ // These nodes are eliminated by rationalizer.
+ JITDUMP("Unexpected node %s in Lower.\n", GenTree::NodeName(tree->OperGet()));
+ unreached();
break;
} // end switch (tree->OperGet())
@@ -813,27 +917,36 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
}
}
+ TreeNodeInfoInitCheckByteable(tree);
+
+ // We need to be sure that we've set info->srcCount and info->dstCount appropriately
+ assert((info->dstCount < 2) || (tree->IsMultiRegCall() && info->dstCount == MAX_RET_REG_COUNT));
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitCheckByteable: Check the tree to see if "byte-able" registers are
+// required, and set the tree node info accordingly.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitCheckByteable(GenTree* tree)
+{
#ifdef _TARGET_X86_
+ LinearScan* l = m_lsra;
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+
// Exclude RBM_NON_BYTE_REGS from dst candidates of tree node and src candidates of operands
// if the tree node is a byte type.
//
- // Example1: GT_STOREIND(byte, addr, op2) - storeind of byte sized value from op2 into mem 'addr'
- // Storeind itself will not produce any value and hence dstCount=0. But op2 could be TYP_INT
- // value. In this case we need to exclude esi/edi from the src candidates of op2.
- //
- // Example2: GT_CAST(int <- bool <- int) - here type of GT_CAST node is int and castToType is bool.
- //
- // Example3: GT_EQ(int, op1 of type ubyte, op2 of type ubyte) - in this case codegen uses
- // ubyte as the result of comparison and if the result needs to be materialized into a reg
- // simply zero extend it to TYP_INT size. Here is an example of generated code:
- // cmp dl, byte ptr[addr mode]
- // movzx edx, dl
- //
// Though this looks conservative in theory, in practice we could not think of a case where
// the below logic leads to conservative register specification. In future when or if we find
// one such case, this logic needs to be fine tuned for that case(s).
- if (varTypeIsByte(tree) || ((tree->OperGet() == GT_CAST) && varTypeIsByte(tree->CastToType())) ||
- (tree->OperIsCompare() && varTypeIsByte(tree->gtGetOp1()) && varTypeIsByte(tree->gtGetOp2())))
+
+ if (ExcludeNonByteableRegisters(tree))
{
regMaskTP regMask;
if (info->dstCount > 0)
@@ -870,9 +983,6 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
}
}
#endif //_TARGET_X86_
-
- // We need to be sure that we've set info->srcCount and info->dstCount appropriately
- assert((info->dstCount < 2) || (tree->IsMultiRegCall() && info->dstCount == MAX_RET_REG_COUNT));
}
//------------------------------------------------------------------------
@@ -1028,6 +1138,31 @@ void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree)
GenTreePtr shiftBy = tree->gtOp.gtOp2;
GenTreePtr source = tree->gtOp.gtOp1;
+#ifdef _TARGET_X86_
+ // The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that
+ // we can have a three operand form. Increment the srcCount.
+ if (tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO)
+ {
+ assert(source->OperGet() == GT_LONG);
+
+ info->srcCount++;
+
+ if (tree->OperGet() == GT_LSH_HI)
+ {
+ GenTreePtr sourceLo = source->gtOp.gtOp1;
+ sourceLo->gtLsraInfo.isDelayFree = true;
+ }
+ else
+ {
+ GenTreePtr sourceHi = source->gtOp.gtOp2;
+ sourceHi->gtLsraInfo.isDelayFree = true;
+ }
+
+ source->gtLsraInfo.hasDelayFreeSrc = true;
+ info->hasDelayFreeSrc = true;
+ }
+#endif
+
// x64 can encode 8 bits of shift and it will use 5 or 6. (the others are masked off)
// We will allow whatever can be encoded - hope you know what you are doing.
if (!IsContainableImmed(tree, shiftBy) || (shiftBy->gtIntConCommon.IconValue() > 255) ||
@@ -1040,6 +1175,17 @@ void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree)
else
{
MakeSrcContained(tree, shiftBy);
+
+ // Note that Rotate Left/Right instructions don't set ZF and SF flags.
+ //
+ // If the operand being shifted is 32-bits then upper three bits are masked
+ // by hardware to get actual shift count. Similarly for 64-bit operands
+ // shift count is narrowed to [0..63]. If the resulting shift count is zero,
+ // then shift operation won't modify flags.
+ //
+ // TODO-CQ-XARCH: We can optimize generating 'test' instruction for GT_EQ/NE(shift, 0)
+ // if the shift count is known to be non-zero and in the range depending on the
+ // operand size.
}
}
@@ -1088,6 +1234,12 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
assert(ctrlExpr == nullptr);
assert(call->gtCallAddr != nullptr);
ctrlExpr = call->gtCallAddr;
+
+#ifdef _TARGET_X86_
+ // Fast tail calls aren't currently supported on x86, but if they ever are, the code
+ // below that handles indirect VSD calls will need to be fixed.
+ assert(!call->IsFastTailCall() || !call->IsVirtualStub());
+#endif // _TARGET_X86_
}
// set reg requirements on call target represented as control sequence.
@@ -1103,7 +1255,24 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
// computed into a register.
if (!call->IsFastTailCall())
{
- if (ctrlExpr->isIndir())
+#ifdef _TARGET_X86_
+ // On x86, we need to generate a very specific pattern for indirect VSD calls:
+ //
+ // 3-byte nop
+ // call dword ptr [eax]
+ //
+ // Where EAX is also used as an argument to the stub dispatch helper. Make
+ // sure that the call target address is computed into EAX in this case.
+ if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT))
+ {
+ assert(ctrlExpr->isIndir());
+
+ ctrlExpr->gtGetOp1()->gtLsraInfo.setSrcCandidates(l, RBM_VIRTUAL_STUB_TARGET);
+ MakeSrcContained(call, ctrlExpr);
+ }
+ else
+#endif // _TARGET_X86_
+ if (ctrlExpr->isIndir())
{
MakeSrcContained(call, ctrlExpr);
}
@@ -1191,7 +1360,7 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
// First, count reg args
for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
{
- assert(list->IsList());
+ assert(list->OperIsList());
GenTreePtr argNode = list->Current();
@@ -1206,7 +1375,7 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
argNode->gtLsraInfo.srcCount = 1;
argNode->gtLsraInfo.dstCount = 0;
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
// If the node is TYP_STRUCT and it is put on stack with
// putarg_stk operation, we consume and produce no registers.
// In this case the embedded Obj node should not produce
@@ -1218,7 +1387,7 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
argNode->gtOp.gtOp1->gtLsraInfo.dstCount = 0;
argNode->gtLsraInfo.srcCount = 0;
}
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#endif // FEATURE_PUT_STRUCT_ARG_STK
continue;
}
@@ -1248,7 +1417,7 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
// If the struct arg is wrapped in CPYBLK the type of the param will be TYP_VOID.
// Use the curArgTabEntry's isStruct to get whether the param is a struct.
- if (varTypeIsStruct(argNode) FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(|| curArgTabEntry->isStruct))
+ if (varTypeIsStruct(argNode) PUT_STRUCT_ARG_STK_ONLY(|| curArgTabEntry->isStruct))
{
unsigned originalSize = 0;
LclVarDsc* varDsc = nullptr;
@@ -1270,16 +1439,16 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
{
originalSize = genTypeSize(argNode->gtType);
}
- else if (argNode->gtOper == GT_LIST)
+ else if (argNode->gtOper == GT_FIELD_LIST)
{
originalSize = 0;
// There could be up to 2 PUTARG_REGs in the list
- GenTreeArgList* argListPtr = argNode->AsArgList();
- unsigned iterationNum = 0;
- for (; argListPtr; argListPtr = argListPtr->Rest())
+ GenTreeFieldList* fieldListPtr = argNode->AsFieldList();
+ unsigned iterationNum = 0;
+ for (; fieldListPtr; fieldListPtr = fieldListPtr->Rest())
{
- GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
+ GenTreePtr putArgRegNode = fieldListPtr->Current();
assert(putArgRegNode->gtOper == GT_PUTARG_REG);
if (iterationNum == 0)
@@ -1509,7 +1678,7 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
}
m_lsra->clearOperandCounts(source);
}
- else if (!source->OperIsSIMD())
+ else if (!source->IsMultiRegCall() && !source->OperIsSIMD())
{
assert(source->IsLocal());
MakeSrcContained(blkNode, source);
@@ -1519,7 +1688,11 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
if (isInitBlk)
{
GenTree* initVal = source;
- srcAddrOrFill = source;
+ if (initVal->OperIsInitVal())
+ {
+ initVal = initVal->gtGetOp1();
+ }
+ srcAddrOrFill = initVal;
// If we have an InitBlk with constant block size we can optimize several ways:
// a) If the size is smaller than a small memory page but larger than INITBLK_UNROLL_LIMIT bytes
// we use rep stosb since this reduces the register pressure in LSRA and we have
@@ -1571,8 +1744,23 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
// a pack of 16 init value constants.
blkNode->gtLsraInfo.internalFloatCount = 1;
blkNode->gtLsraInfo.setInternalCandidates(l, l->internalFloatRegCandidates());
+ if ((fill == 0) && ((size & 0xf) == 0))
+ {
+ MakeSrcContained(blkNode, source);
+ }
}
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+
+#ifdef _TARGET_X86_
+ if ((size & 1) != 0)
+ {
+ // On x86, you can't address the lower byte of ESI, EDI, ESP, or EBP when doing
+ // a "mov byte ptr [dest], val". If the fill size is odd, we will try to do this
+ // when unrolling, so only allow byteable registers as the source value. (We could
+ // consider just using BlkOpKindRepInstr instead.)
+ sourceRegMask = RBM_BYTE_REGS;
+ }
+#endif // _TARGET_X86_
}
else
{
@@ -1825,7 +2013,7 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
}
}
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
//------------------------------------------------------------------------
// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK.
//
@@ -1835,44 +2023,219 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
// Return Value:
// None.
//
-void Lowering::TreeNodeInfoInitPutArgStk(GenTree* tree)
+void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
+ TreeNodeInfo* info = &(putArgStk->gtLsraInfo);
LinearScan* l = m_lsra;
- if (tree->TypeGet() != TYP_STRUCT)
+#ifdef _TARGET_X86_
+ if (putArgStk->gtOp1->gtOper == GT_FIELD_LIST)
+ {
+ putArgStk->gtNumberReferenceSlots = 0;
+ putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Invalid;
+
+ GenTreeFieldList* fieldList = putArgStk->gtOp1->AsFieldList();
+
+ // The code generator will push these fields in reverse order by offset. Reorder the list here s.t. the order
+ // of uses is visible to LSRA.
+ unsigned fieldCount = 0;
+ GenTreeFieldList* head = nullptr;
+ for (GenTreeFieldList *current = fieldList, *next; current != nullptr; current = next)
+ {
+ next = current->Rest();
+
+ // First, insert the field node into the sorted list.
+ GenTreeFieldList* prev = nullptr;
+ for (GenTreeFieldList* cursor = head;; cursor = cursor->Rest())
+ {
+ // If the offset of the current list node is greater than the offset of the cursor or if we have
+ // reached the end of the list, insert the current node before the cursor and terminate.
+ if ((cursor == nullptr) || (current->gtFieldOffset > cursor->gtFieldOffset))
+ {
+ if (prev == nullptr)
+ {
+ assert(cursor == head);
+ head = current;
+ }
+ else
+ {
+ prev->Rest() = current;
+ }
+
+ current->Rest() = cursor;
+ break;
+ }
+ }
+
+ fieldCount++;
+ }
+
+ info->srcCount = fieldCount;
+ info->dstCount = 0;
+
+ // In theory, the upper bound for the size of a field list is 8: these constructs only appear when passing the
+ // collection of lclVars that represent the fields of a promoted struct lclVar, and we do not promote struct
+ // lclVars with more than 4 fields. If each of these lclVars is of type long, decomposition will split the
+ // corresponding field list nodes in two, giving an upper bound of 8.
+ //
+ // The reason that this is important is that the algorithm we use above to sort the field list is O(N^2): if
+ // the maximum size of a field list grows significantly, we will need to reevaluate it.
+ assert(fieldCount <= 8);
+
+ // The sort above may have changed which node is at the head of the list. Update the PUTARG_STK node if
+ // necessary.
+ if (head != fieldList)
+ {
+ head->gtFlags |= GTF_FIELD_LIST_HEAD;
+ fieldList->gtFlags &= ~GTF_FIELD_LIST_HEAD;
+
+#ifdef DEBUG
+ head->gtSeqNum = fieldList->gtSeqNum;
+#endif // DEBUG
+
+ head->gtLsraInfo = fieldList->gtLsraInfo;
+ head->gtClearReg(comp);
+
+ BlockRange().InsertAfter(fieldList, head);
+ BlockRange().Remove(fieldList);
+
+ fieldList = head;
+ putArgStk->gtOp1 = fieldList;
+ }
+
+ // Now that the fields have been sorted, initialize the LSRA info.
+ bool allFieldsAreSlots = true;
+ bool needsByteTemp = false;
+ unsigned prevOffset = putArgStk->getArgSize();
+ for (GenTreeFieldList* current = fieldList; current != nullptr; current = current->Rest())
+ {
+ GenTree* const fieldNode = current->Current();
+ const var_types fieldType = fieldNode->TypeGet();
+ const unsigned fieldOffset = current->gtFieldOffset;
+ assert(fieldType != TYP_LONG);
+
+ // For x86 we must mark all integral fields as contained or reg-optional, and handle them
+ // accordingly in code generation, since we may have up to 8 fields, which cannot all be in
+ // registers to be consumed atomically by the call.
+ if (varTypeIsIntegralOrI(fieldNode))
+ {
+ if (fieldNode->OperGet() == GT_LCL_VAR)
+ {
+ LclVarDsc* varDsc = &(comp->lvaTable[fieldNode->AsLclVarCommon()->gtLclNum]);
+ if (varDsc->lvTracked && !varDsc->lvDoNotEnregister)
+ {
+ SetRegOptional(fieldNode);
+ }
+ else
+ {
+ MakeSrcContained(putArgStk, fieldNode);
+ }
+ }
+ else if (fieldNode->IsIntCnsFitsInI32())
+ {
+ MakeSrcContained(putArgStk, fieldNode);
+ }
+ else
+ {
+ // For the case where we cannot directly push the value, if we run out of registers,
+ // it would be better to defer computation until we are pushing the arguments rather
+ // than spilling, but this situation is not all that common, as most cases of promoted
+ // structs do not have a large number of fields, and of those most are lclVars or
+ // copy-propagated constants.
+ SetRegOptional(fieldNode);
+ }
+ }
+ else
+ {
+ assert(varTypeIsFloating(fieldNode));
+ }
+
+ // We can treat as a slot any field that is stored at a slot boundary, where the previous
+ // field is not in the same slot. (Note that we store the fields in reverse order.)
+ const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevOffset - fieldOffset) >= 4);
+ if (!fieldIsSlot)
+ {
+ allFieldsAreSlots = false;
+ if (varTypeIsByte(fieldType))
+ {
+ // If this field is a slot--i.e. it is an integer field that is 4-byte aligned and takes up 4 bytes
+ // (including padding)--we can store the whole value rather than just the byte. Otherwise, we will
+ // need a byte-addressable register for the store. We will enforce this requirement on an internal
+ // register, which we can use to copy multiple byte values.
+ needsByteTemp = true;
+ }
+ }
+
+ if (varTypeIsGC(fieldType))
+ {
+ putArgStk->gtNumberReferenceSlots++;
+ }
+
+ prevOffset = fieldOffset;
+ }
+
+ // Set the copy kind.
+ // TODO-X86-CQ: Even if we are using push, if there are contiguous floating point fields, we should
+ // adjust the stack once for those fields. The latter is really best done in code generation, but
+ // this tuning should probably be undertaken as a whole.
+ // Also, if there are floating point fields, it may be better to use the "Unroll" mode
+ // of copying the struct as a whole, if the fields are not register candidates.
+ if (allFieldsAreSlots)
+ {
+ putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::PushAllSlots;
+ }
+ else
+ {
+ putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Push;
+ // If any of the fields cannot be stored with an actual push, we may need a temporary
+ // register to load the value before storing it to the stack location.
+ info->internalIntCount = 1;
+ regMaskTP regMask = l->allRegs(TYP_INT);
+ if (needsByteTemp)
+ {
+ regMask &= ~RBM_NON_BYTE_REGS;
+ }
+ info->setInternalCandidates(l, regMask);
+ }
+ return;
+ }
+#endif // _TARGET_X86_
+
+#if defined(FEATURE_SIMD) && defined(_TARGET_X86_)
+ // For PutArgStk of a TYP_SIMD12, we need an extra register.
+ if (putArgStk->TypeGet() == TYP_SIMD12)
{
- TreeNodeInfoInitSimple(tree);
+ info->srcCount = putArgStk->gtOp1->gtLsraInfo.dstCount;
+ info->dstCount = 0;
+ info->internalFloatCount = 1;
+ info->setInternalCandidates(l, l->allSIMDRegs());
return;
}
+#endif // defined(FEATURE_SIMD) && defined(_TARGET_X86_)
- GenTreePutArgStk* putArgStkTree = tree->AsPutArgStk();
+ if (putArgStk->TypeGet() != TYP_STRUCT)
+ {
+ TreeNodeInfoInitSimple(putArgStk);
+ return;
+ }
- GenTreePtr dst = tree;
- GenTreePtr src = tree->gtOp.gtOp1;
+ GenTreePtr dst = putArgStk;
+ GenTreePtr src = putArgStk->gtOp1;
GenTreePtr srcAddr = nullptr;
+ bool haveLocalAddr = false;
if ((src->OperGet() == GT_OBJ) || (src->OperGet() == GT_IND))
{
srcAddr = src->gtOp.gtOp1;
+ assert(srcAddr != nullptr);
+ haveLocalAddr = srcAddr->OperIsLocalAddr();
}
else
{
- assert(varTypeIsSIMD(tree));
- }
- info->srcCount = src->gtLsraInfo.dstCount;
-
- // If this is a stack variable address,
- // make the op1 contained, so this way
- // there is no unnecessary copying between registers.
- // To avoid assertion, increment the parent's source.
- // It is recovered below.
- bool haveLocalAddr = ((srcAddr != nullptr) && (srcAddr->OperIsLocalAddr()));
- if (haveLocalAddr)
- {
- info->srcCount += 1;
+ assert(varTypeIsSIMD(putArgStk));
}
+ info->srcCount = src->gtLsraInfo.dstCount;
info->dstCount = 0;
// In case of a CpBlk we could use a helper call. In case of putarg_stk we
@@ -1884,7 +2247,7 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTree* tree)
// This threshold will decide from using the helper or let the JIT decide to inline
// a code sequence of its choice.
ssize_t helperThreshold = max(CPBLK_MOVS_LIMIT, CPBLK_UNROLL_LIMIT);
- ssize_t size = putArgStkTree->gtNumSlots * TARGET_POINTER_SIZE;
+ ssize_t size = putArgStk->gtNumSlots * TARGET_POINTER_SIZE;
// TODO-X86-CQ: The helper call either is not supported on x86 or required more work
// (I don't know which).
@@ -1892,7 +2255,7 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTree* tree)
// If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2.
// Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of
// our framework assemblies, so this is the main code generation scheme we'll use.
- if (size <= CPBLK_UNROLL_LIMIT && putArgStkTree->gtNumberReferenceSlots == 0)
+ if (size <= CPBLK_UNROLL_LIMIT && putArgStk->gtNumberReferenceSlots == 0)
{
// If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg.
//
@@ -1913,46 +2276,62 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTree* tree)
info->setInternalCandidates(l, regMask);
}
+#ifdef _TARGET_X86_
+ if (size >= 8)
+#else // !_TARGET_X86_
if (size >= XMM_REGSIZE_BYTES)
+#endif // !_TARGET_X86_
{
- // If we have a buffer larger than XMM_REGSIZE_BYTES,
- // reserve an XMM register to use it for a
+ // If we have a buffer larger than or equal to XMM_REGSIZE_BYTES on x64/ux,
+ // or larger than or equal to 8 bytes on x86, reserve an XMM register to use it for a
// series of 16-byte loads and stores.
info->internalFloatCount = 1;
info->addInternalCandidates(l, l->internalFloatRegCandidates());
}
- if (haveLocalAddr)
+#ifdef _TARGET_X86_
+ if (size < XMM_REGSIZE_BYTES)
{
- MakeSrcContained(putArgStkTree, srcAddr);
+ putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Push;
}
-
- // If src or dst are on stack, we don't have to generate the address into a register
- // because it's just some constant+SP
- putArgStkTree->gtPutArgStkKind = GenTreePutArgStk::PutArgStkKindUnroll;
+ else
+#endif // _TARGET_X86_
+ {
+ putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Unroll;
+ }
+ }
+#ifdef _TARGET_X86_
+ else if (putArgStk->gtNumberReferenceSlots != 0)
+ {
+ // On x86, we must use `push` to store GC references to the stack in order for the emitter to properly update
+ // the function's GC info. These `putargstk` nodes will generate a sequence of `push` instructions.
+ putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Push;
}
+#endif // _TARGET_X86_
else
{
info->internalIntCount += 3;
info->setInternalCandidates(l, (RBM_RDI | RBM_RCX | RBM_RSI));
- if (haveLocalAddr)
- {
- MakeSrcContained(putArgStkTree, srcAddr);
- }
- putArgStkTree->gtPutArgStkKind = GenTreePutArgStk::PutArgStkKindRepInstr;
+ putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::RepInstr;
}
// Always mark the OBJ and ADDR as contained trees by the putarg_stk. The codegen will deal with this tree.
- MakeSrcContained(putArgStkTree, src);
+ MakeSrcContained(putArgStk, src);
- // Balance up the inc above.
if (haveLocalAddr)
{
- info->srcCount -= 1;
+ // If the source address is the address of a lclVar, make the source address contained to avoid unnecessary
+ // copies.
+ //
+ // To avoid an assertion in MakeSrcContained, increment the parent's source count beforehand and decrement it
+ // afterwards.
+ info->srcCount++;
+ MakeSrcContained(putArgStk, srcAddr);
+ info->srcCount--;
}
}
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#endif // FEATURE_PUT_STRUCT_ARG_STK
//------------------------------------------------------------------------
// TreeNodeInfoInitLclHeap: Set the NodeInfo for a GT_LCLHEAP.
@@ -1976,13 +2355,17 @@ void Lowering::TreeNodeInfoInitLclHeap(GenTree* tree)
// Here '-' means don't care.
//
// Size? Init Memory? # temp regs
- // 0 - 0
- // const and <=6 reg words - 0
- // const and >6 reg words Yes 0
+ // 0 - 0 (returns 0)
+ // const and <=6 reg words - 0 (pushes '0')
+ // const and >6 reg words Yes 0 (pushes '0')
// const and <PageSize No 0 (amd64) 1 (x86)
- // const and >=PageSize No 2
- // Non-const Yes 0
- // Non-const No 2
+ // (x86:tmpReg for sutracting from esp)
+ // const and >=PageSize No 2 (regCnt and tmpReg for subtracing from sp)
+ // Non-const Yes 0 (regCnt=targetReg and pushes '0')
+ // Non-const No 2 (regCnt and tmpReg for subtracting from sp)
+ //
+ // Note: Here we don't need internal register to be different from targetReg.
+ // Rather, require it to be different from operand's reg.
GenTreePtr size = tree->gtOp.gtOp1;
if (size->IsCnsIntOrI())
@@ -2121,6 +2504,9 @@ void Lowering::TreeNodeInfoInitLogicalOp(GenTree* tree)
// as reg optional.
SetRegOptionalForBinOp(tree);
}
+
+ // Codegen of this tree node sets ZF and SF flags.
+ tree->gtFlags |= GTF_ZSF_SET;
}
//------------------------------------------------------------------------
@@ -2189,15 +2575,40 @@ void Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
info->setDstCandidates(l, RBM_RAX);
}
- // If possible would like to have op1 in RAX to avoid a register move
- op1->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
+ bool op2CanBeRegOptional = true;
+#ifdef _TARGET_X86_
+ if (op1->OperGet() == GT_LONG)
+ {
+ // To avoid reg move would like to have op1's low part in RAX and high part in RDX.
+ GenTree* loVal = op1->gtGetOp1();
+ GenTree* hiVal = op1->gtGetOp2();
+
+ // Src count is actually 3, so increment.
+ assert(op2->IsCnsIntOrI());
+ assert(tree->OperGet() == GT_UMOD);
+ info->srcCount++;
+ op2CanBeRegOptional = false;
+
+ // This situation also requires an internal register.
+ info->internalIntCount = 1;
+ info->setInternalCandidates(l, l->allRegs(TYP_INT));
+
+ loVal->gtLsraInfo.setSrcCandidates(l, RBM_EAX);
+ hiVal->gtLsraInfo.setSrcCandidates(l, RBM_EDX);
+ }
+ else
+#endif
+ {
+ // If possible would like to have op1 in RAX to avoid a register move
+ op1->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
+ }
// divisor can be an r/m, but the memory indirection must be of the same size as the divide
if (op2->isMemoryOp() && (op2->TypeGet() == tree->TypeGet()))
{
MakeSrcContained(tree, op2);
}
- else
+ else if (op2CanBeRegOptional)
{
op2->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX));
@@ -2298,12 +2709,13 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
info->dstCount = 1;
switch (simdTree->gtSIMDIntrinsicID)
{
+ GenTree* op1;
GenTree* op2;
case SIMDIntrinsicInit:
{
info->srcCount = 1;
- GenTree* op1 = tree->gtOp.gtOp1;
+ op1 = tree->gtOp.gtOp1;
// This sets all fields of a SIMD struct to the given value.
// Mark op1 as contained if it is either zero or int constant of all 1's,
@@ -2377,7 +2789,8 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
info->srcCount = 2;
// SSE2 32-bit integer multiplication requires two temp regs
- if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT)
+ if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT &&
+ comp->getSIMDInstructionSet() == InstructionSet_SSE2)
{
info->internalFloatCount = 2;
info->setInternalCandidates(lsra, lsra->allSIMDRegs());
@@ -2406,38 +2819,78 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
case SIMDIntrinsicOpEquality:
case SIMDIntrinsicOpInEquality:
- // Need two SIMD registers as scratch.
- // See genSIMDIntrinsicRelOp() for details on code sequence generate and
- // the need for two scratch registers.
- info->srcCount = 2;
- info->internalFloatCount = 2;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ info->srcCount = 2;
+
+ // On SSE4/AVX, we can generate optimal code for (in)equality
+ // against zero using ptest. We can safely do the this optimization
+ // for integral vectors but not for floating-point for the reason
+ // that we have +0.0 and -0.0 and +0.0 == -0.0
+ op2 = tree->gtGetOp2();
+ if ((comp->getSIMDInstructionSet() >= InstructionSet_SSE3_4) && op2->IsIntegralConstVector(0))
+ {
+ MakeSrcContained(tree, op2);
+ }
+ else
+ {
+
+ // Need one SIMD register as scratch.
+ // See genSIMDIntrinsicRelOp() for details on code sequence generated and
+ // the need for one scratch register.
+ //
+ // Note these intrinsics produce a BOOL result, hence internal float
+ // registers reserved are guaranteed to be different from target
+ // integer register without explicitly specifying.
+ info->internalFloatCount = 1;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ }
break;
case SIMDIntrinsicDotProduct:
- if ((comp->getSIMDInstructionSet() == InstructionSet_SSE2) ||
- (simdTree->gtOp.gtOp1->TypeGet() == TYP_SIMD32))
+ // Float/Double vectors:
+ // For SSE, or AVX with 32-byte vectors, we also need an internal register
+ // as scratch. Further we need the targetReg and internal reg to be distinct
+ // registers. Note that if this is a TYP_SIMD16 or smaller on AVX, then we
+ // don't need a tmpReg.
+ //
+ // 32-byte integer vector on SSE4/AVX:
+ // will take advantage of phaddd, which operates only on 128-bit xmm reg.
+ // This will need 1 (in case of SSE4) or 2 (in case of AVX) internal
+ // registers since targetReg is an int type register.
+ //
+ // See genSIMDIntrinsicDotProduct() for details on code sequence generated
+ // and the need for scratch registers.
+ if (varTypeIsFloating(simdTree->gtSIMDBaseType))
{
- // For SSE, or AVX with 32-byte vectors, we also need an internal register as scratch.
- // Further we need the targetReg and internal reg to be distinct registers.
- // This is achieved by requesting two internal registers; thus one of them
- // will be different from targetReg.
- // Note that if this is a TYP_SIMD16 or smaller on AVX, then we don't need a tmpReg.
- //
- // See genSIMDIntrinsicDotProduct() for details on code sequence generated and
- // the need for scratch registers.
- info->internalFloatCount = 2;
+ if ((comp->getSIMDInstructionSet() == InstructionSet_SSE2) ||
+ (simdTree->gtOp.gtOp1->TypeGet() == TYP_SIMD32))
+ {
+ info->internalFloatCount = 1;
+ info->isInternalRegDelayFree = true;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ }
+ // else don't need scratch reg(s).
+ }
+ else
+ {
+ assert(simdTree->gtSIMDBaseType == TYP_INT && comp->getSIMDInstructionSet() >= InstructionSet_SSE3_4);
+
+ // No need to set isInternalRegDelayFree since targetReg is a
+ // an int type reg and guaranteed to be different from xmm/ymm
+ // regs.
+ info->internalFloatCount = comp->canUseAVX() ? 2 : 1;
info->setInternalCandidates(lsra, lsra->allSIMDRegs());
}
info->srcCount = 2;
break;
case SIMDIntrinsicGetItem:
+ {
// This implements get_Item method. The sources are:
// - the source SIMD struct
// - index (which element to get)
// The result is baseType of SIMD struct.
info->srcCount = 2;
+ op1 = tree->gtOp.gtOp1;
op2 = tree->gtOp.gtOp2;
// If the index is a constant, mark it as contained.
@@ -2446,48 +2899,69 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
info->srcCount = 1;
}
- // If the index is not a constant, we will use the SIMD temp location to store the vector.
- // Otherwise, if the baseType is floating point, the targetReg will be a xmm reg and we
- // can use that in the process of extracting the element.
- //
- // If the index is a constant and base type is a small int we can use pextrw, but on AVX
- // we will need a temp if are indexing into the upper half of the AVX register.
- // In all other cases with constant index, we need a temp xmm register to extract the
- // element if index is other than zero.
-
- if (!op2->IsCnsIntOrI())
+ if (op1->isMemoryOp())
{
- (void)comp->getSIMDInitTempVarNum();
+ MakeSrcContained(tree, op1);
+
+ // Although GT_IND of TYP_SIMD12 reserves an internal float
+ // register for reading 4 and 8 bytes from memory and
+ // assembling them into target XMM reg, it is not required
+ // in this case.
+ op1->gtLsraInfo.internalIntCount = 0;
+ op1->gtLsraInfo.internalFloatCount = 0;
}
- else if (!varTypeIsFloating(simdTree->gtSIMDBaseType))
+ else
{
- bool needFloatTemp;
- if (varTypeIsSmallInt(simdTree->gtSIMDBaseType) &&
- (comp->getSIMDInstructionSet() == InstructionSet_AVX))
- {
- int byteShiftCnt = (int)op2->AsIntCon()->gtIconVal * genTypeSize(simdTree->gtSIMDBaseType);
- needFloatTemp = (byteShiftCnt >= 16);
- }
- else
+ // If the index is not a constant, we will use the SIMD temp location to store the vector.
+ // Otherwise, if the baseType is floating point, the targetReg will be a xmm reg and we
+ // can use that in the process of extracting the element.
+ //
+ // If the index is a constant and base type is a small int we can use pextrw, but on AVX
+ // we will need a temp if are indexing into the upper half of the AVX register.
+ // In all other cases with constant index, we need a temp xmm register to extract the
+ // element if index is other than zero.
+
+ if (!op2->IsCnsIntOrI())
{
- needFloatTemp = !op2->IsIntegralConst(0);
+ (void)comp->getSIMDInitTempVarNum();
}
- if (needFloatTemp)
+ else if (!varTypeIsFloating(simdTree->gtSIMDBaseType))
{
- info->internalFloatCount = 1;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ bool needFloatTemp;
+ if (varTypeIsSmallInt(simdTree->gtSIMDBaseType) &&
+ (comp->getSIMDInstructionSet() == InstructionSet_AVX))
+ {
+ int byteShiftCnt = (int)op2->AsIntCon()->gtIconVal * genTypeSize(simdTree->gtSIMDBaseType);
+ needFloatTemp = (byteShiftCnt >= 16);
+ }
+ else
+ {
+ needFloatTemp = !op2->IsIntegralConst(0);
+ }
+
+ if (needFloatTemp)
+ {
+ info->internalFloatCount = 1;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ }
}
}
- break;
+ }
+ break;
case SIMDIntrinsicSetX:
case SIMDIntrinsicSetY:
case SIMDIntrinsicSetZ:
case SIMDIntrinsicSetW:
- // We need an internal integer register
- info->srcCount = 2;
- info->internalIntCount = 1;
- info->setInternalCandidates(lsra, lsra->allRegs(TYP_INT));
+ info->srcCount = 2;
+
+ // We need an internal integer register for SSE2 codegen
+ if (comp->getSIMDInstructionSet() == InstructionSet_SSE2)
+ {
+ info->internalIntCount = 1;
+ info->setInternalCandidates(lsra, lsra->allRegs(TYP_INT));
+ }
+
break;
case SIMDIntrinsicCast:
@@ -2592,6 +3066,8 @@ void Lowering::TreeNodeInfoInitCast(GenTree* tree)
{
if (genTypeSize(castOpType) == 8)
{
+ // Here we don't need internal register to be different from targetReg,
+ // rather require it to be different from operand's reg.
info->internalIntCount = 1;
}
}
@@ -2693,7 +3169,6 @@ void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree)
GenTreePtr index = nullptr;
unsigned mul, cns;
bool rev;
- bool modifiedSources = false;
#ifdef FEATURE_SIMD
// If indirTree is of TYP_SIMD12, don't mark addr as contained
@@ -2711,11 +3186,10 @@ void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree)
info->internalFloatCount = 1;
// In case of GT_IND we need an internal register different from targetReg and
- // both of the registers are used at the same time. This achieved by reserving
- // two internal registers
+ // both of the registers are used at the same time.
if (indirTree->OperGet() == GT_IND)
{
- (info->internalFloatCount)++;
+ info->isInternalRegDelayFree = true;
}
info->setInternalCandidates(m_lsra, m_lsra->allSIMDRegs());
@@ -2724,16 +3198,21 @@ void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree)
}
#endif // FEATURE_SIMD
- // These nodes go into an addr mode:
- // - GT_CLS_VAR_ADDR turns into a constant.
- // - GT_LCL_VAR_ADDR is a stack addr mode.
- if ((addr->OperGet() == GT_CLS_VAR_ADDR) || (addr->OperGet() == GT_LCL_VAR_ADDR))
+ if ((indirTree->gtFlags & GTF_IND_REQ_ADDR_IN_REG) != 0)
{
+ // The address of an indirection that requires its address in a reg.
+ // Skip any further processing that might otherwise make it contained.
+ }
+ else if ((addr->OperGet() == GT_CLS_VAR_ADDR) || (addr->OperGet() == GT_LCL_VAR_ADDR))
+ {
+ // These nodes go into an addr mode:
+ // - GT_CLS_VAR_ADDR turns into a constant.
+ // - GT_LCL_VAR_ADDR is a stack addr mode.
+
// make this contained, it turns into a constant that goes into an addr mode
MakeSrcContained(indirTree, addr);
}
- else if (addr->IsCnsIntOrI() && addr->AsIntConCommon()->FitsInAddrBase(comp) &&
- addr->gtLsraInfo.getDstCandidates(m_lsra) != RBM_VIRTUAL_STUB_PARAM)
+ else if (addr->IsCnsIntOrI() && addr->AsIntConCommon()->FitsInAddrBase(comp))
{
// Amd64:
// We can mark any pc-relative 32-bit addr as containable, except for a direct VSD call address.
@@ -2755,17 +3234,10 @@ void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree)
}
else if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr))
{
- GenTreeAddrMode* lea = addr->AsAddrMode();
- base = lea->Base();
- index = lea->Index();
-
- m_lsra->clearOperandCounts(addr);
- // The srcCount is decremented because addr is now "contained",
- // then we account for the base and index below, if they are non-null.
- info->srcCount--;
+ MakeSrcContained(indirTree, addr);
}
else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) &&
- !(modifiedSources = AreSourcesPossiblyModifiedLocals(indirTree, base, index)))
+ !AreSourcesPossiblyModifiedLocals(indirTree, base, index))
{
// An addressing mode will be constructed that may cause some
// nodes to not need a register, and cause others' lifetimes to be extended
@@ -2774,7 +3246,16 @@ void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree)
assert(base != addr);
m_lsra->clearOperandCounts(addr);
- GenTreePtr arrLength = nullptr;
+ const bool hasBase = base != nullptr;
+ const bool hasIndex = index != nullptr;
+ assert(hasBase || hasIndex); // At least one of a base or an index must be present.
+
+ // If the addressing mode has both a base and an index, bump its source count by one. If it only has one or the
+ // other, its source count is already correct (due to the source for the address itself).
+ if (hasBase && hasIndex)
+ {
+ info->srcCount++;
+ }
// Traverse the computation below GT_IND to find the operands
// for the addressing mode, marking the various constants and
@@ -2784,14 +3265,13 @@ void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree)
// up of simple arithmetic operators, and the code generator
// only traverses one leg of each node.
- bool foundBase = (base == nullptr);
- bool foundIndex = (index == nullptr);
- GenTreePtr nextChild = nullptr;
- for (GenTreePtr child = addr; child != nullptr && !child->OperIsLeaf(); child = nextChild)
+ bool foundBase = !hasBase;
+ bool foundIndex = !hasIndex;
+ for (GenTree *child = addr, *nextChild = nullptr; child != nullptr && !child->OperIsLeaf(); child = nextChild)
{
- nextChild = nullptr;
- GenTreePtr op1 = child->gtOp.gtOp1;
- GenTreePtr op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr;
+ nextChild = nullptr;
+ GenTree* op1 = child->gtOp.gtOp1;
+ GenTree* op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr;
if (op1 == base)
{
@@ -2832,7 +3312,6 @@ void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree)
}
}
assert(foundBase && foundIndex);
- info->srcCount--; // it gets incremented below.
}
else if (addr->gtOper == GT_ARR_ELEM)
{
@@ -2845,32 +3324,23 @@ void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree)
assert(addr->gtLsraInfo.srcCount >= 2);
addr->gtLsraInfo.srcCount -= 1;
}
- else
- {
- // it is nothing but a plain indir
- info->srcCount--; // base gets added in below
- base = addr;
- }
-
- if (base != nullptr)
- {
- info->srcCount++;
- }
-
- if (index != nullptr && !modifiedSources)
- {
- info->srcCount++;
- }
}
-void Lowering::LowerCmp(GenTreePtr tree)
+void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree)
{
+ assert(tree->OperIsCompare());
+
TreeNodeInfo* info = &(tree->gtLsraInfo);
info->srcCount = 2;
info->dstCount = 1;
#ifdef _TARGET_X86_
+ // If the compare is used by a jump, we just need to set the condition codes. If not, then we need
+ // to store the result into the low byte of a register, which requires the dst be a byteable register.
+ // We always set the dst candidates, though, because if this is compare is consumed by a jump, they
+ // won't be used. We might be able to use GTF_RELOP_JMP_USED to determine this case, but it's not clear
+ // that flag is maintained until this location (especially for decomposed long compares).
info->setDstCandidates(m_lsra, RBM_BYTE_REGS);
#endif // _TARGET_X86_
@@ -2894,9 +3364,9 @@ void Lowering::LowerCmp(GenTreePtr tree)
#endif // !defined(_TARGET_64BIT_)
// If either of op1 or op2 is floating point values, then we need to use
- // ucomiss or ucomisd to compare, both of which support the following form
- // ucomis[s|d] xmm, xmm/mem. That is only the second operand can be a memory
- // op.
+ // ucomiss or ucomisd to compare, both of which support the following form:
+ // ucomis[s|d] xmm, xmm/mem
+ // That is only the second operand can be a memory op.
//
// Second operand is a memory Op: Note that depending on comparison operator,
// the operands of ucomis[s|d] need to be reversed. Therefore, either op1 or
@@ -2952,16 +3422,9 @@ void Lowering::LowerCmp(GenTreePtr tree)
bool hasShortCast = false;
if (CheckImmedAndMakeContained(tree, op2))
{
- bool op1CanBeContained = (op1Type == op2Type);
- if (!op1CanBeContained)
- {
- if (genTypeSize(op1Type) == genTypeSize(op2Type))
- {
- // The constant is of the correct size, but we don't have an exact type match
- // We can treat the isMemoryOp as "contained"
- op1CanBeContained = true;
- }
- }
+ // If the types are the same, or if the constant is of the correct size,
+ // we can treat the isMemoryOp as contained.
+ bool op1CanBeContained = (genTypeSize(op1Type) == genTypeSize(op2Type));
// Do we have a short compare against a constant in op2
//
@@ -3031,13 +3494,13 @@ void Lowering::LowerCmp(GenTreePtr tree)
bool op1IsMadeContained = false;
// When op1 is a GT_AND we can often generate a single "test" instruction
- // instead of two instructions (an "and" instruction followed by a "cmp"/"test")
+ // instead of two instructions (an "and" instruction followed by a "cmp"/"test").
//
- // This instruction can only be used for equality or inequality comparions.
+ // This instruction can only be used for equality or inequality comparisons.
// and we must have a compare against zero.
//
// If we have a postive test for a single bit we can reverse the condition and
- // make the compare be against zero
+ // make the compare be against zero.
//
// Example:
// GT_EQ GT_NE
@@ -3046,8 +3509,8 @@ void Lowering::LowerCmp(GenTreePtr tree)
// / \ / \
// andOp1 GT_CNS (0x100) andOp1 GT_CNS (0x100)
//
- // We will mark the GT_AND node as contained if the tree is a equality compare with zero
- // Additionally when we do this we also allow for a contained memory operand for "andOp1".
+ // We will mark the GT_AND node as contained if the tree is an equality compare with zero.
+ // Additionally, when we do this we also allow for a contained memory operand for "andOp1".
//
bool isEqualityCompare = (tree->gtOper == GT_EQ || tree->gtOper == GT_NE);
@@ -3066,7 +3529,7 @@ void Lowering::LowerCmp(GenTreePtr tree)
// so that we can generate a test instruction.
// Reverse the equality comparison
- tree->gtOper = (tree->gtOper == GT_EQ) ? GT_NE : GT_EQ;
+ tree->SetOperRaw((tree->gtOper == GT_EQ) ? GT_NE : GT_EQ);
// Change the relOp2CnsVal to zero
relOp2CnsVal = 0;
@@ -3171,7 +3634,7 @@ void Lowering::LowerCmp(GenTreePtr tree)
genTreeOps castOp1Oper = castOp1->OperGet();
bool safeOper = false;
- // It is not always safe to change the gtType of 'castOp1' to TYP_UBYTE
+ // It is not always safe to change the gtType of 'castOp1' to TYP_UBYTE.
// For example when 'castOp1Oper' is a GT_RSZ or GT_RSH then we are shifting
// bits from the left into the lower bits. If we change the type to a TYP_UBYTE
// we will instead generate a byte sized shift operation: shr al, 24
@@ -3196,22 +3659,24 @@ void Lowering::LowerCmp(GenTreePtr tree)
//
assert(!castOp1->gtOverflowEx()); // Must not be an overflow checking operation
- GenTreePtr removeTreeNode = op1;
- tree->gtOp.gtOp1 = castOp1;
- op1 = castOp1;
- castOp1->gtType = TYP_UBYTE;
-
- // trim down the value if castOp1 is an int constant since its type changed to UBYTE.
- if (castOp1Oper == GT_CNS_INT)
- {
- castOp1->gtIntCon.gtIconVal = (UINT8)castOp1->gtIntCon.gtIconVal;
- }
-
+ // TODO-Cleanup: we're within "if (CheckImmedAndMakeContained(tree, op2))", so isn't
+ // the following condition always true?
if (op2->isContainedIntOrIImmed())
{
ssize_t val = (ssize_t)op2->AsIntConCommon()->IconValue();
if (val >= 0 && val <= 255)
{
+ GenTreePtr removeTreeNode = op1;
+ tree->gtOp.gtOp1 = castOp1;
+ op1 = castOp1;
+ castOp1->gtType = TYP_UBYTE;
+
+ // trim down the value if castOp1 is an int constant since its type changed to UBYTE.
+ if (castOp1Oper == GT_CNS_INT)
+ {
+ castOp1->gtIntCon.gtIconVal = (UINT8)castOp1->gtIntCon.gtIconVal;
+ }
+
op2->gtType = TYP_UBYTE;
tree->gtFlags |= GTF_UNSIGNED;
@@ -3222,18 +3687,26 @@ void Lowering::LowerCmp(GenTreePtr tree)
MakeSrcContained(tree, op1);
op1IsMadeContained = true;
}
- }
- }
- BlockRange().Remove(removeTreeNode);
+ BlockRange().Remove(removeTreeNode);
+
+ // We've changed the type on op1 to TYP_UBYTE, but we already processed that node.
+ // We need to go back and mark it byteable.
+ // TODO-Cleanup: it might be better to move this out of the TreeNodeInfoInit pass to
+ // the earlier "lower" pass, in which case the byteable check would just fall out.
+ // But that is quite complex!
+ TreeNodeInfoInitCheckByteable(op1);
+
#ifdef DEBUG
- if (comp->verbose)
- {
- printf("LowerCmp: Removing a GT_CAST to TYP_UBYTE and changing castOp1->gtType to "
- "TYP_UBYTE\n");
- comp->gtDispTreeRange(BlockRange(), tree);
- }
+ if (comp->verbose)
+ {
+ printf("TreeNodeInfoInitCmp: Removing a GT_CAST to TYP_UBYTE and changing "
+ "castOp1->gtType to TYP_UBYTE\n");
+ comp->gtDispTreeRange(BlockRange(), tree);
+ }
#endif
+ }
+ }
}
}
@@ -3241,6 +3714,41 @@ void Lowering::LowerCmp(GenTreePtr tree)
if (!op1IsMadeContained)
{
SetRegOptional(op1);
+
+ // If op1 codegen sets ZF and SF flags and ==/!= against
+ // zero, we don't need to generate test instruction,
+ // provided we don't have another GenTree node between op1
+ // and tree that could potentially modify flags.
+ //
+ // TODO-CQ: right now the below peep is inexpensive and
+ // gets the benefit in most of cases because in majority
+ // of cases op1, op2 and tree would be in that order in
+ // execution. In general we should be able to check that all
+ // the nodes that come after op1 in execution order do not
+ // modify the flags so that it is safe to avoid generating a
+ // test instruction. Such a check requires that on each
+ // GenTree node we need to set the info whether its codegen
+ // will modify flags.
+ //
+ // TODO-CQ: We can optimize compare against zero in the
+ // following cases by generating the branch as indicated
+ // against each case.
+ // 1) unsigned compare
+ // < 0 - always FALSE
+ // <= 0 - ZF=1 and jne
+ // > 0 - ZF=0 and je
+ // >= 0 - always TRUE
+ //
+ // 2) signed compare
+ // < 0 - SF=1 and js
+ // >= 0 - SF=0 and jns
+ if (isEqualityCompare && op1->gtSetZSFlags() && op2->IsIntegralConst(0) && (op1->gtNext == op2) &&
+ (op2->gtNext == tree))
+ {
+ // Require codegen of op1 to set the flags.
+ assert(!op1->gtSetFlags());
+ op1->gtFlags |= GTF_SET_FLAGS;
+ }
}
}
}
@@ -3255,10 +3763,17 @@ void Lowering::LowerCmp(GenTreePtr tree)
{
MakeSrcContained(tree, op1);
}
+ else if (op1->IsCnsIntOrI())
+ {
+ // TODO-CQ: We should be able to support swapping op1 and op2 to generate cmp reg, imm,
+ // but there is currently an assert in CodeGen::genCompareInt().
+ // https://github.com/dotnet/coreclr/issues/7270
+ SetRegOptional(op2);
+ }
else
{
// One of op1 or op2 could be marked as reg optional
- // to indicate that codgen can still generate code
+ // to indicate that codegen can still generate code
// if one of them is on stack.
SetRegOptional(PreferredRegOptionalOperand(tree));
}
@@ -3318,7 +3833,6 @@ void Lowering::LowerCast(GenTree* tree)
var_types dstType = tree->CastToType();
var_types srcType = op1->TypeGet();
var_types tmpType = TYP_UNDEF;
- bool srcUns = false;
// force the srcType to unsigned if GT_UNSIGNED flag is set
if (tree->gtFlags & GTF_UNSIGNED)
@@ -3849,6 +4363,20 @@ bool Lowering::SetStoreIndOpCountsIfRMWMemOp(GenTreePtr storeInd)
}
m_lsra->clearOperandCounts(indirCandidateChild);
+#ifdef _TARGET_X86_
+ if (varTypeIsByte(storeInd))
+ {
+ // If storeInd is of TYP_BYTE, set indirOpSources to byteable registers.
+ bool containedNode = indirOpSource->gtLsraInfo.dstCount == 0;
+ if (!containedNode)
+ {
+ regMaskTP regMask = indirOpSource->gtLsraInfo.getSrcCandidates(m_lsra);
+ assert(regMask != RBM_NONE);
+ indirOpSource->gtLsraInfo.setSrcCandidates(m_lsra, regMask & ~RBM_NON_BYTE_REGS);
+ }
+ }
+#endif
+
return true;
}
@@ -3858,8 +4386,11 @@ bool Lowering::SetStoreIndOpCountsIfRMWMemOp(GenTreePtr storeInd)
*/
void Lowering::SetMulOpCounts(GenTreePtr tree)
{
+#if defined(_TARGET_X86_)
+ assert(tree->OperGet() == GT_MUL || tree->OperGet() == GT_MULHI || tree->OperGet() == GT_MUL_LONG);
+#else
assert(tree->OperGet() == GT_MUL || tree->OperGet() == GT_MULHI);
-
+#endif
TreeNodeInfo* info = &(tree->gtLsraInfo);
info->srcCount = 2;
@@ -3900,13 +4431,18 @@ void Lowering::SetMulOpCounts(GenTreePtr tree)
GenTreeIntConCommon* imm = nullptr;
GenTreePtr other = nullptr;
- // There are three forms of x86 multiply:
- // one-op form: RDX:RAX = RAX * r/m
- // two-op form: reg *= r/m
- // three-op form: reg = r/m * imm
+// There are three forms of x86 multiply:
+// one-op form: RDX:RAX = RAX * r/m
+// two-op form: reg *= r/m
+// three-op form: reg = r/m * imm
- // This special widening 32x32->64 MUL is not used on x64
- assert((tree->gtFlags & GTF_MUL_64RSLT) == 0);
+// This special widening 32x32->64 MUL is not used on x64
+#if defined(_TARGET_X86_)
+ if (tree->OperGet() != GT_MUL_LONG)
+#endif
+ {
+ assert((tree->gtFlags & GTF_MUL_64RSLT) == 0);
+ }
// Multiply should never be using small types
assert(!varTypeIsSmall(tree->TypeGet()));
@@ -3924,12 +4460,21 @@ void Lowering::SetMulOpCounts(GenTreePtr tree)
info->setDstCandidates(m_lsra, RBM_RAX);
hasImpliedFirstOperand = true;
}
- else if (tree->gtOper == GT_MULHI)
+ else if (tree->OperGet() == GT_MULHI)
+ {
+ // Have to use the encoding:RDX:RAX = RAX * rm. Since we only care about the
+ // upper 32 bits of the result set the destination candidate to REG_RDX.
+ info->setDstCandidates(m_lsra, RBM_RDX);
+ hasImpliedFirstOperand = true;
+ }
+#if defined(_TARGET_X86_)
+ else if (tree->OperGet() == GT_MUL_LONG)
{
// have to use the encoding:RDX:RAX = RAX * rm
info->setDstCandidates(m_lsra, RBM_RAX);
hasImpliedFirstOperand = true;
}
+#endif
else if (IsContainableImmed(tree, op2) || IsContainableImmed(tree, op1))
{
if (IsContainableImmed(tree, op2))
@@ -4187,6 +4732,71 @@ GenTree* Lowering::PreferredRegOptionalOperand(GenTree* tree)
return preferredOp;
}
+#ifdef _TARGET_X86_
+//------------------------------------------------------------------------
+// ExcludeNonByteableRegisters: Determines if we need to exclude non-byteable registers for
+// various reasons
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// If we need to exclude non-byteable registers
+//
+bool Lowering::ExcludeNonByteableRegisters(GenTree* tree)
+{
+ // Example1: GT_STOREIND(byte, addr, op2) - storeind of byte sized value from op2 into mem 'addr'
+ // Storeind itself will not produce any value and hence dstCount=0. But op2 could be TYP_INT
+ // value. In this case we need to exclude esi/edi from the src candidates of op2.
+ if (varTypeIsByte(tree))
+ {
+ return true;
+ }
+ // Example2: GT_CAST(int <- bool <- int) - here type of GT_CAST node is int and castToType is bool.
+ else if ((tree->OperGet() == GT_CAST) && varTypeIsByte(tree->CastToType()))
+ {
+ return true;
+ }
+ else if (tree->OperIsCompare())
+ {
+ GenTree* op1 = tree->gtGetOp1();
+ GenTree* op2 = tree->gtGetOp2();
+
+ // Example3: GT_EQ(int, op1 of type ubyte, op2 of type ubyte) - in this case codegen uses
+ // ubyte as the result of comparison and if the result needs to be materialized into a reg
+ // simply zero extend it to TYP_INT size. Here is an example of generated code:
+ // cmp dl, byte ptr[addr mode]
+ // movzx edx, dl
+ if (varTypeIsByte(op1) && varTypeIsByte(op2))
+ {
+ return true;
+ }
+ // Example4: GT_EQ(int, op1 of type ubyte, op2 is GT_CNS_INT) - in this case codegen uses
+ // ubyte as the result of the comparison and if the result needs to be materialized into a reg
+ // simply zero extend it to TYP_INT size.
+ else if (varTypeIsByte(op1) && op2->IsCnsIntOrI())
+ {
+ return true;
+ }
+ // Example4: GT_EQ(int, op1 is GT_CNS_INT, op2 of type ubyte) - in this case codegen uses
+ // ubyte as the result of the comparison and if the result needs to be materialized into a reg
+ // simply zero extend it to TYP_INT size.
+ else if (op1->IsCnsIntOrI() && varTypeIsByte(op2))
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+ else
+ {
+ return false;
+ }
+}
+#endif // _TARGET_X86_
+
#endif // _TARGET_XARCH_
#endif // !LEGACY_BACKEND
diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp
index 317b976e42..accfd6ee78 100644
--- a/src/jit/lsra.cpp
+++ b/src/jit/lsra.cpp
@@ -355,6 +355,33 @@ RegRecord* LinearScan::getRegisterRecord(regNumber regNum)
}
#ifdef DEBUG
+
+//----------------------------------------------------------------------------
+// getConstrainedRegMask: Returns new regMask which is the intersection of
+// regMaskActual and regMaskConstraint if the new regMask has at least
+// minRegCount registers, otherwise returns regMaskActual.
+//
+// Arguments:
+// regMaskActual - regMask that needs to be constrained
+// regMaskConstraint - regMask constraint that needs to be
+// applied to regMaskActual
+// minRegCount - Minimum number of regs that should be
+// be present in new regMask.
+//
+// Return Value:
+// New regMask that has minRegCount registers after instersection.
+// Otherwise returns regMaskActual.
+regMaskTP LinearScan::getConstrainedRegMask(regMaskTP regMaskActual, regMaskTP regMaskConstraint, unsigned minRegCount)
+{
+ regMaskTP newMask = regMaskActual & regMaskConstraint;
+ if (genCountBits(newMask) >= minRegCount)
+ {
+ return newMask;
+ }
+
+ return regMaskActual;
+}
+
//------------------------------------------------------------------------
// stressLimitRegs: Given a set of registers, expressed as a register mask, reduce
// them based on the current stress options.
@@ -373,38 +400,46 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask)
{
if (getStressLimitRegs() != LSRA_LIMIT_NONE)
{
+ // The refPosition could be null, for example when called
+ // by getTempRegForResolution().
+ int minRegCount = (refPosition != nullptr) ? refPosition->minRegCandidateCount : 1;
+
switch (getStressLimitRegs())
{
case LSRA_LIMIT_CALLEE:
- if (!compiler->opts.compDbgEnC && (mask & RBM_CALLEE_SAVED) != RBM_NONE)
+ if (!compiler->opts.compDbgEnC)
{
- mask &= RBM_CALLEE_SAVED;
+ mask = getConstrainedRegMask(mask, RBM_CALLEE_SAVED, minRegCount);
}
break;
+
case LSRA_LIMIT_CALLER:
- if ((mask & RBM_CALLEE_TRASH) != RBM_NONE)
- {
- mask &= RBM_CALLEE_TRASH;
- }
- break;
+ {
+ mask = getConstrainedRegMask(mask, RBM_CALLEE_TRASH, minRegCount);
+ }
+ break;
+
case LSRA_LIMIT_SMALL_SET:
if ((mask & LsraLimitSmallIntSet) != RBM_NONE)
{
- mask &= LsraLimitSmallIntSet;
+ mask = getConstrainedRegMask(mask, LsraLimitSmallIntSet, minRegCount);
}
else if ((mask & LsraLimitSmallFPSet) != RBM_NONE)
{
- mask &= LsraLimitSmallFPSet;
+ mask = getConstrainedRegMask(mask, LsraLimitSmallFPSet, minRegCount);
}
break;
+
default:
unreached();
}
+
if (refPosition != nullptr && refPosition->isFixedRegRef)
{
mask |= refPosition->registerAssignment;
}
}
+
return mask;
}
#endif // DEBUG
@@ -658,16 +693,13 @@ void LinearScan::applyCalleeSaveHeuristics(RefPosition* rp)
#endif // _TARGET_AMD64_
Interval* theInterval = rp->getInterval();
+
#ifdef DEBUG
regMaskTP calleeSaveMask = calleeSaveRegs(getRegisterType(theInterval, rp));
if (doReverseCallerCallee())
{
- regMaskTP newAssignment = rp->registerAssignment;
- newAssignment &= calleeSaveMask;
- if (newAssignment != RBM_NONE)
- {
- rp->registerAssignment = newAssignment;
- }
+ rp->registerAssignment =
+ getConstrainedRegMask(rp->registerAssignment, calleeSaveMask, rp->minRegCandidateCount);
}
else
#endif // DEBUG
@@ -777,6 +809,9 @@ RefPosition* LinearScan::newRefPosition(
// mask - Set of valid registers for this RefPosition
// multiRegIdx - register position if this RefPosition corresponds to a
// multi-reg call node.
+// minRegCount - Minimum number registers that needs to be ensured while
+// constraining candidates for this ref position under
+// LSRA stress. This is a DEBUG only arg.
//
// Return Value:
// a new RefPosition
@@ -786,7 +821,8 @@ RefPosition* LinearScan::newRefPosition(Interval* theInterval,
RefType theRefType,
GenTree* theTreeNode,
regMaskTP mask,
- unsigned multiRegIdx /* = 0 */)
+ unsigned multiRegIdx /* = 0 */
+ DEBUGARG(unsigned minRegCandidateCount /* = 1 */))
{
#ifdef DEBUG
if (theInterval != nullptr && regType(theInterval->registerType) == FloatRegisterType)
@@ -843,6 +879,10 @@ RefPosition* LinearScan::newRefPosition(Interval* theInterval,
newRP->setMultiRegIdx(multiRegIdx);
newRP->setAllocateIfProfitable(0);
+#ifdef DEBUG
+ newRP->minRegCandidateCount = minRegCandidateCount;
+#endif // DEBUG
+
associateRefPosWithInterval(newRP);
DBEXEC(VERBOSE, newRP->dump());
@@ -1071,12 +1111,14 @@ LinearScan::LinearScan(Compiler* theCompiler)
#endif
dumpTerse = (JitConfig.JitDumpTerseLsra() != 0);
-
#endif // DEBUG
+
availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd);
+
#if ETW_EBP_FRAMED
availableIntRegs &= ~RBM_FPBASE;
#endif // ETW_EBP_FRAMED
+
availableFloatRegs = RBM_ALLFLOAT;
availableDoubleRegs = RBM_ALLDOUBLE;
@@ -1272,6 +1314,7 @@ void LinearScan::setBlockSequence()
bool addedInternalBlocks = false;
verifiedAllBBs = false;
+ hasCriticalEdges = false;
BasicBlock* nextBlock;
for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = nextBlock)
{
@@ -1288,6 +1331,13 @@ void LinearScan::setBlockSequence()
blockInfo[block->bbNum].hasCriticalOutEdge = false;
blockInfo[block->bbNum].weight = block->bbWeight;
+#if TRACK_LSRA_STATS
+ blockInfo[block->bbNum].spillCount = 0;
+ blockInfo[block->bbNum].copyRegCount = 0;
+ blockInfo[block->bbNum].resolutionMovCount = 0;
+ blockInfo[block->bbNum].splitEdgeCount = 0;
+#endif // TRACK_LSRA_STATS
+
if (block->GetUniquePred(compiler) == nullptr)
{
for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
@@ -1296,6 +1346,7 @@ void LinearScan::setBlockSequence()
if (predBlock->NumSucc(compiler) > 1)
{
blockInfo[block->bbNum].hasCriticalInEdge = true;
+ hasCriticalEdges = true;
break;
}
else if (predBlock->bbJumpKind == BBJ_SWITCH)
@@ -1321,6 +1372,7 @@ void LinearScan::setBlockSequence()
if (checkForCriticalOutEdge && succ->GetUniquePred(compiler) == nullptr)
{
blockInfo[block->bbNum].hasCriticalOutEdge = true;
+ hasCriticalEdges = true;
// We can stop checking now.
checkForCriticalOutEdge = false;
}
@@ -1666,11 +1718,6 @@ void LinearScan::doLinearScan()
compiler->codeGen->regSet.rsClearRegsModified();
- // Figure out if we're going to use an RSP frame or an RBP frame. We need to do this
- // before building the intervals and ref positions, because those objects will embed
- // RBP in various register masks (like preferences) if RBP is allowed to be allocated.
- setFrameType();
-
initMaxSpill();
buildIntervals();
DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_REFPOS));
@@ -1685,6 +1732,17 @@ void LinearScan::doLinearScan()
resolveRegisters();
compiler->EndPhase(PHASE_LINEAR_SCAN_RESOLVE);
+#if TRACK_LSRA_STATS
+ if ((JitConfig.DisplayLsraStats() != 0)
+#ifdef DEBUG
+ || VERBOSE
+#endif
+ )
+ {
+ dumpLsraStats(jitstdout);
+ }
+#endif // TRACK_LSRA_STATS
+
DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_POST));
compiler->compLSRADone = true;
@@ -1892,6 +1950,8 @@ void LinearScan::identifyCandidates()
// for vectors on Arm64, though the actual value may differ.
VarSetOps::AssignNoCopy(compiler, fpCalleeSaveCandidateVars, VarSetOps::MakeEmpty(compiler));
+ VarSetOps::AssignNoCopy(compiler, resolutionCandidateVars, VarSetOps::MakeEmpty(compiler));
+ VarSetOps::AssignNoCopy(compiler, splitOrSpilledVars, VarSetOps::MakeEmpty(compiler));
VARSET_TP VARSET_INIT_NOCOPY(fpMaybeCandidateVars, VarSetOps::MakeEmpty(compiler));
unsigned int floatVarCount = 0;
unsigned int thresholdFPRefCntWtd = 4 * BB_UNITY_WEIGHT;
@@ -1902,6 +1962,37 @@ void LinearScan::identifyCandidates()
unsigned int largeVectorVarCount = 0;
unsigned int thresholdLargeVectorRefCntWtd = 4 * BB_UNITY_WEIGHT;
#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+#if DOUBLE_ALIGN
+ unsigned refCntStk = 0;
+ unsigned refCntReg = 0;
+ unsigned refCntWtdReg = 0;
+ unsigned refCntStkParam = 0; // sum of ref counts for all stack based parameters
+ unsigned refCntWtdStkDbl = 0; // sum of wtd ref counts for stack based doubles
+ doDoubleAlign = false;
+ bool checkDoubleAlign = true;
+ if (compiler->codeGen->isFramePointerRequired() || compiler->opts.MinOpts())
+ {
+ checkDoubleAlign = false;
+ }
+ else
+ {
+ switch (compiler->getCanDoubleAlign())
+ {
+ case MUST_DOUBLE_ALIGN:
+ doDoubleAlign = true;
+ checkDoubleAlign = false;
+ break;
+ case CAN_DOUBLE_ALIGN:
+ break;
+ case CANT_DOUBLE_ALIGN:
+ doDoubleAlign = false;
+ checkDoubleAlign = false;
+ break;
+ default:
+ unreached();
+ }
+ }
+#endif // DOUBLE_ALIGN
for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++)
{
@@ -1911,6 +2002,32 @@ void LinearScan::identifyCandidates()
Interval* newInt = newInterval(intervalType);
newInt->setLocalNumber(lclNum, this);
+
+#if DOUBLE_ALIGN
+ if (checkDoubleAlign)
+ {
+ if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
+ {
+ refCntStkParam += varDsc->lvRefCnt;
+ }
+ else if (!isRegCandidate(varDsc) || varDsc->lvDoNotEnregister)
+ {
+ refCntStk += varDsc->lvRefCnt;
+ if ((varDsc->lvType == TYP_DOUBLE) ||
+ ((varTypeIsStruct(varDsc) && varDsc->lvStructDoubleAlign &&
+ (compiler->lvaGetPromotionType(varDsc) != Compiler::PROMOTION_TYPE_INDEPENDENT))))
+ {
+ refCntWtdStkDbl += varDsc->lvRefCntWtd;
+ }
+ }
+ else
+ {
+ refCntReg += varDsc->lvRefCnt;
+ refCntWtdReg += varDsc->lvRefCntWtd;
+ }
+ }
+#endif // DOUBLE_ALIGN
+
if (varDsc->lvIsStructField)
{
newInt->isStructField = true;
@@ -2095,6 +2212,24 @@ void LinearScan::identifyCandidates()
}
}
+#if DOUBLE_ALIGN
+ if (checkDoubleAlign)
+ {
+ // TODO-CQ: Fine-tune this:
+ // In the legacy reg predictor, this runs after allocation, and then demotes any lclVars
+ // allocated to the frame pointer, which is probably the wrong order.
+ // However, because it runs after allocation, it can determine the impact of demoting
+ // the lclVars allocated to the frame pointer.
+ // => Here, estimate of the EBP refCnt and weighted refCnt is a wild guess.
+ //
+ unsigned refCntEBP = refCntReg / 8;
+ unsigned refCntWtdEBP = refCntWtdReg / 8;
+
+ doDoubleAlign =
+ compiler->shouldDoubleAlign(refCntStk, refCntEBP, refCntWtdEBP, refCntStkParam, refCntWtdStkDbl);
+ }
+#endif // DOUBLE_ALIGN
+
// The factors we consider to determine which set of fp vars to use as candidates for callee save
// registers current include the number of fp vars, whether there are loops, and whether there are
// multiple exits. These have been selected somewhat empirically, but there is probably room for
@@ -2510,6 +2645,9 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree)
break;
case GT_MULHI:
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+ case GT_MUL_LONG:
+#endif
killMask = RBM_RAX | RBM_RDX;
break;
@@ -2644,7 +2782,7 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree)
}
break;
-#if defined(PROFILING_SUPPORTED) && defined(_TARGET_AMD64_)
+#if defined(PROFILING_SUPPORTED)
// If this method requires profiler ELT hook then mark these nodes as killing
// callee trash registers (excluding RAX and XMM0). The reason for this is that
// profiler callback would trash these registers. See vm\amd64\asmhelpers.asm for
@@ -2660,10 +2798,9 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree)
if (compiler->compIsProfilerHookNeeded())
{
killMask = compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_TAILCALL);
- ;
}
break;
-#endif // PROFILING_SUPPORTED && _TARGET_AMD64_
+#endif // PROFILING_SUPPORTED
default:
// for all other 'tree->OperGet()' kinds, leave 'killMask' = RBM_NONE
@@ -2769,19 +2906,46 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo
return false;
}
+//----------------------------------------------------------------------------
+// defineNewInternalTemp: Defines a ref position for an internal temp.
+//
+// Arguments:
+// tree - Gentree node requiring an internal register
+// regType - Register type
+// currentLoc - Location of the temp Def position
+// regMask - register mask of candidates for temp
+// minRegCandidateCount - Minimum registers to be ensured in candidate
+// set under LSRA stress mode. This is a
+// DEBUG only arg.
RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree,
RegisterType regType,
LsraLocation currentLoc,
- regMaskTP regMask)
+ regMaskTP regMask DEBUGARG(unsigned minRegCandidateCount))
{
Interval* current = newInterval(regType);
current->isInternal = true;
- return newRefPosition(current, currentLoc, RefTypeDef, tree, regMask);
+ return newRefPosition(current, currentLoc, RefTypeDef, tree, regMask, 0 DEBUG_ARG(minRegCandidateCount));
}
+//------------------------------------------------------------------------
+// buildInternalRegisterDefsForNode - build Def positions for internal
+// registers required for tree node.
+//
+// Arguments:
+// tree - Gentree node that needs internal registers
+// currentLoc - Location at which Def positions need to be defined
+// temps - in-out array which is populated with ref positions
+// created for Def of internal registers
+// minRegCandidateCount - Minimum registers to be ensured in candidate
+// set of ref positions under LSRA stress. This is
+// a DEBUG only arg.
+//
+// Returns:
+// The total number of Def positions created for internal registers of tree node.
int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree,
LsraLocation currentLoc,
- RefPosition* temps[]) // populates
+ RefPosition* temps[] // populates
+ DEBUGARG(unsigned minRegCandidateCount))
{
int count;
int internalIntCount = tree->gtLsraInfo.internalIntCount;
@@ -2805,14 +2969,16 @@ int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree,
internalIntCands = genFindLowestBit(internalIntCands);
internalCands &= ~internalIntCands;
}
- temps[count] = defineNewInternalTemp(tree, IntRegisterType, currentLoc, internalIntCands);
+ temps[count] =
+ defineNewInternalTemp(tree, IntRegisterType, currentLoc, internalIntCands DEBUG_ARG(minRegCandidateCount));
}
int internalFloatCount = tree->gtLsraInfo.internalFloatCount;
for (int i = 0; i < internalFloatCount; i++)
{
regMaskTP internalFPCands = (internalCands & internalFloatRegCandidates());
- temps[count++] = defineNewInternalTemp(tree, FloatRegisterType, currentLoc, internalFPCands);
+ temps[count++] =
+ defineNewInternalTemp(tree, FloatRegisterType, currentLoc, internalFPCands DEBUG_ARG(minRegCandidateCount));
}
noway_assert(count < MaxInternalRegisters);
@@ -2820,10 +2986,26 @@ int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree,
return count;
}
+//------------------------------------------------------------------------
+// buildInternalRegisterUsesForNode - adds Use positions for internal
+// registers required for tree node.
+//
+// Arguments:
+// tree - Gentree node that needs internal registers
+// currentLoc - Location at which Use positions need to be defined
+// defs - int array containing Def positions of internal
+// registers.
+// total - Total number of Def positions in 'defs' array.
+// minRegCandidateCount - Minimum registers to be ensured in candidate
+// set of ref positions under LSRA stress. This is
+// a DEBUG only arg.
+//
+// Returns:
+// Void.
void LinearScan::buildInternalRegisterUsesForNode(GenTree* tree,
LsraLocation currentLoc,
RefPosition* defs[],
- int total)
+ int total DEBUGARG(unsigned minRegCandidateCount))
{
assert(total < MaxInternalRegisters);
@@ -2840,8 +3022,14 @@ void LinearScan::buildInternalRegisterUsesForNode(GenTree* tree,
}
else
{
- RefPosition* newest = newRefPosition(defs[i]->getInterval(), currentLoc, RefTypeUse, tree, mask);
- newest->lastUse = true;
+ RefPosition* newest = newRefPosition(defs[i]->getInterval(), currentLoc, RefTypeUse, tree, mask,
+ 0 DEBUG_ARG(minRegCandidateCount));
+ newest->lastUse = true;
+
+ if (tree->gtLsraInfo.isInternalRegDelayFree)
+ {
+ newest->delayRegFree = true;
+ }
}
}
}
@@ -3196,10 +3384,10 @@ static int ComputeOperandDstCount(GenTree* operand)
// If an operand has no destination registers but does have source registers, it must be a store
// or a compare.
assert(operand->OperIsStore() || operand->OperIsBlkOp() || operand->OperIsPutArgStk() ||
- operand->OperIsCompare());
+ operand->OperIsCompare() || operand->IsSIMDEqualityOrInequality());
return 0;
}
- else if (!operand->OperIsAggregate() && (operand->OperIsStore() || operand->TypeGet() == TYP_VOID))
+ else if (!operand->OperIsFieldListHead() && (operand->OperIsStore() || operand->TypeGet() == TYP_VOID))
{
// Stores and void-typed operands may be encountered when processing call nodes, which contain
// pointers to argument setup stores.
@@ -3207,7 +3395,7 @@ static int ComputeOperandDstCount(GenTree* operand)
}
else
{
- // If an aggregate or non-void-typed operand is not an unsued value and does not have source registers,
+ // If a field list or non-void-typed operand is not an unused value and does not have source registers,
// that argument is contained within its parent and produces `sum(operand_dst_count)` registers.
int dstCount = 0;
for (GenTree* op : operand->Operands())
@@ -3254,16 +3442,14 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree,
assert(!isRegPairType(tree->TypeGet()));
#endif // _TARGET_ARM_
- // The LIR traversal doesn't visit non-aggregate GT_LIST or GT_ARGPLACE nodes
+ // The LIR traversal doesn't visit GT_LIST or GT_ARGPLACE nodes.
+ // GT_CLS_VAR nodes should have been eliminated by rationalizer.
assert(tree->OperGet() != GT_ARGPLACE);
- assert((tree->OperGet() != GT_LIST) || tree->AsArgList()->IsAggregate());
+ assert(tree->OperGet() != GT_LIST);
+ assert(tree->OperGet() != GT_CLS_VAR);
- // These nodes are eliminated by the Rationalizer.
- if (tree->OperGet() == GT_CLS_VAR)
- {
- JITDUMP("Unexpected node %s in LSRA.\n", GenTree::NodeName(tree->OperGet()));
- assert(!"Unexpected node in LSRA.");
- }
+ // The LIR traversal visits only the first node in a GT_FIELD_LIST.
+ assert((tree->OperGet() != GT_FIELD_LIST) || tree->AsFieldList()->IsFieldListHead());
// The set of internal temporary registers used by this node are stored in the
// gtRsvdRegs register mask. Clear it out.
@@ -3409,7 +3595,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree,
{
// Get the location info for the register defined by the first operand.
LocationInfoList operandDefs;
- bool found = operandToLocationInfoMap.TryGetValue(*(tree->OperandsBegin()), &operandDefs);
+ bool found = operandToLocationInfoMap.TryGetValue(*(tree->OperandsBegin()), &operandDefs);
assert(found);
// Since we only expect to consume one register, we should only have a single register to
@@ -3503,7 +3689,51 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree,
// (i.e. the target is read-modify-write), preference the dst to op1.
bool hasDelayFreeSrc = tree->gtLsraInfo.hasDelayFreeSrc;
- if (tree->OperGet() == GT_PUTARG_REG && isCandidateLocalRef(tree->gtGetOp1()) &&
+
+#if defined(DEBUG) && defined(_TARGET_X86_)
+ // On x86, `LSRA_LIMIT_CALLER` is too restrictive to allow the use of special put args: this stress mode
+ // leaves only three registers allocatable--eax, ecx, and edx--of which the latter two are also used for the
+ // first two integral arguments to a call. This can leave us with too few registers to succesfully allocate in
+ // situations like the following:
+ //
+ // t1026 = lclVar ref V52 tmp35 u:3 REG NA <l:$3a1, c:$98d>
+ //
+ // /--* t1026 ref
+ // t1352 = * putarg_reg ref REG NA
+ //
+ // t342 = lclVar int V14 loc6 u:4 REG NA $50c
+ //
+ // t343 = const int 1 REG NA $41
+ //
+ // /--* t342 int
+ // +--* t343 int
+ // t344 = * + int REG NA $495
+ //
+ // t345 = lclVar int V04 arg4 u:2 REG NA $100
+ //
+ // /--* t344 int
+ // +--* t345 int
+ // t346 = * % int REG NA $496
+ //
+ // /--* t346 int
+ // t1353 = * putarg_reg int REG NA
+ //
+ // t1354 = lclVar ref V52 tmp35 (last use) REG NA
+ //
+ // /--* t1354 ref
+ // t1355 = * lea(b+0) byref REG NA
+ //
+ // Here, the first `putarg_reg` would normally be considered a special put arg, which would remove `ecx` from the
+ // set of allocatable registers, leaving only `eax` and `edx`. The allocator will then fail to allocate a register
+ // for the def of `t345` if arg4 is not a register candidate: the corresponding ref position will be constrained to
+ // { `ecx`, `ebx`, `esi`, `edi` }, which `LSRA_LIMIT_CALLER` will further constrain to `ecx`, which will not be
+ // available due to the special put arg.
+ const bool supportsSpecialPutArg = getStressLimitRegs() != LSRA_LIMIT_CALLER;
+#else
+ const bool supportsSpecialPutArg = true;
+#endif
+
+ if (supportsSpecialPutArg && tree->OperGet() == GT_PUTARG_REG && isCandidateLocalRef(tree->gtGetOp1()) &&
(tree->gtGetOp1()->gtFlags & GTF_VAR_DEATH) == 0)
{
// This is the case for a "pass-through" copy of a lclVar. In the case where it is a non-last-use,
@@ -3525,9 +3755,17 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree,
RefPosition* internalRefs[MaxInternalRegisters];
+#ifdef DEBUG
+ // Number of registers required for tree node is the sum of
+ // consume + produce + internalCount. This is the minimum
+ // set of registers that needs to be ensured in candidate
+ // set of ref positions created.
+ unsigned minRegCount = consume + produce + info.internalIntCount + info.internalFloatCount;
+#endif // DEBUG
+
// make intervals for all the 'internal' register requirements for this node
// where internal means additional registers required temporarily
- int internalCount = buildInternalRegisterDefsForNode(tree, currentLoc, internalRefs);
+ int internalCount = buildInternalRegisterDefsForNode(tree, currentLoc, internalRefs DEBUG_ARG(minRegCount));
// pop all ref'd tree temps
GenTreeOperandIterator iterator = tree->OperandsBegin();
@@ -3632,6 +3870,37 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree,
candidates = fixedAssignment;
}
+#ifdef DEBUG
+ // If delayRegFree, then Use will interfere with the destination of
+ // the consuming node. Therefore, we also need add the kill set of
+ // consuming node to minRegCount.
+ //
+ // For example consider the following IR on x86, where v01 and v02
+ // are method args coming in ecx and edx respectively.
+ // GT_DIV(v01, v02)
+ //
+ // For GT_DIV minRegCount will be 3 without adding kill set
+ // of GT_DIV node.
+ //
+ // Assume further JitStressRegs=2, which would constrain
+ // candidates to callee trashable regs { eax, ecx, edx } on
+ // use positions of v01 and v02. LSRA allocates ecx for v01.
+ // Use position of v02 cannot be allocated a regs since it
+ // is marked delay-reg free and {eax,edx} are getting killed
+ // before the def of GT_DIV. For this reason, minRegCount
+ // for Use position of v02 also needs to take into account
+ // of kill set of its consuming node.
+ unsigned minRegCountForUsePos = minRegCount;
+ if (delayRegFree)
+ {
+ regMaskTP killMask = getKillSetForNode(tree);
+ if (killMask != RBM_NONE)
+ {
+ minRegCountForUsePos += genCountBits(killMask);
+ }
+ }
+#endif // DEBUG
+
RefPosition* pos;
if ((candidates & allRegs(i->registerType)) == 0)
{
@@ -3645,13 +3914,16 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree,
regNumber physicalReg = genRegNumFromMask(fixedAssignment);
RefPosition* pos = newRefPosition(physicalReg, currentLoc, RefTypeFixedReg, nullptr, fixedAssignment);
}
- pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, allRegs(i->registerType), multiRegIdx);
+ pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, allRegs(i->registerType),
+ multiRegIdx DEBUG_ARG(minRegCountForUsePos));
pos->registerAssignment = candidates;
}
else
{
- pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, candidates, multiRegIdx);
+ pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, candidates,
+ multiRegIdx DEBUG_ARG(minRegCountForUsePos));
}
+
if (delayRegFree)
{
hasDelayFreeSrc = true;
@@ -3675,7 +3947,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree,
listNodePool.ReturnNodes(operandDefs);
}
- buildInternalRegisterUsesForNode(tree, currentLoc, internalRefs, internalCount);
+ buildInternalRegisterUsesForNode(tree, currentLoc, internalRefs, internalCount DEBUG_ARG(minRegCount));
RegisterType registerType = getDefType(tree);
regMaskTP candidates = getDefCandidates(tree);
@@ -3708,7 +3980,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree,
{
// Build RefPositions for saving any live large vectors.
// This must be done after the kills, so that we know which large vectors are still live.
- VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc));
+ VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc + 1));
}
#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
@@ -3779,7 +4051,8 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree,
locationInfoList.Append(listNodePool.GetNode(defLocation, interval, tree, (unsigned)i));
}
- RefPosition* pos = newRefPosition(interval, defLocation, defRefType, defNode, currCandidates, (unsigned)i);
+ RefPosition* pos = newRefPosition(interval, defLocation, defRefType, defNode, currCandidates,
+ (unsigned)i DEBUG_ARG(minRegCount));
if (info.isLocalDefUse)
{
pos->isLocalDefUse = true;
@@ -3791,11 +4064,12 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree,
}
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- buildUpperVectorRestoreRefPositions(tree, currentLoc, liveLargeVectors);
+ // SaveDef position must be at the same location as Def position of call node.
+ buildUpperVectorRestoreRefPositions(tree, defLocation, liveLargeVectors);
#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- bool isContainedNode =
- !noAdd && consume == 0 && produce == 0 && (tree->OperIsAggregate() || (tree->TypeGet() != TYP_VOID && !tree->OperIsStore()));
+ bool isContainedNode = !noAdd && consume == 0 && produce == 0 &&
+ (tree->OperIsFieldListHead() || ((tree->TypeGet() != TYP_VOID) && !tree->OperIsStore()));
if (isContainedNode)
{
// Contained nodes map to the concatenated lists of their operands.
@@ -3852,6 +4126,22 @@ BasicBlock* getNonEmptyBlock(BasicBlock* block)
return block;
}
+//------------------------------------------------------------------------
+// insertZeroInitRefPositions: Handle lclVars that are live-in to the first block
+//
+// Notes:
+// For each lclVar that is live-in to the first block:
+// - If it is a GC ref, or if compInitMem is set, a ZeroInit RefPosition will be created.
+// - Otherwise, it will be marked as spilled, since it will not be assigned a register
+// on entry and will be loaded from memory on the undefined path.
+// Note that, when the compInitMem option is not set, we may encounter these on
+// paths that are protected by the same condition as an earlier def. However, since
+// we don't do the analysis to determine this - and couldn't rely on always identifying
+// such cases even if we tried - we must conservatively treat the undefined path as
+// being possible. This is a relatively rare case, so the introduced conservatism is
+// not expected to warrant the analysis required to determine the best placement of
+// an initialization.
+//
void LinearScan::insertZeroInitRefPositions()
{
// insert defs for this, then a block boundary
@@ -3861,15 +4151,23 @@ void LinearScan::insertZeroInitRefPositions()
{
unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
LclVarDsc* varDsc = compiler->lvaTable + varNum;
- if (!varDsc->lvIsParam && isCandidateVar(varDsc) &&
- (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet())))
+ if (!varDsc->lvIsParam && isCandidateVar(varDsc))
{
- GenTree* firstNode = getNonEmptyBlock(compiler->fgFirstBB)->firstNode();
- JITDUMP("V%02u was live in\n", varNum);
- Interval* interval = getIntervalForLocalVar(varNum);
- RefPosition* pos =
- newRefPosition(interval, MinLocation, RefTypeZeroInit, firstNode, allRegs(interval->registerType));
- varDsc->lvMustInit = true;
+ JITDUMP("V%02u was live in to first block:", varNum);
+ Interval* interval = getIntervalForLocalVar(varNum);
+ if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet()))
+ {
+ JITDUMP(" creating ZeroInit\n");
+ GenTree* firstNode = getNonEmptyBlock(compiler->fgFirstBB)->firstNode();
+ RefPosition* pos =
+ newRefPosition(interval, MinLocation, RefTypeZeroInit, firstNode, allRegs(interval->registerType));
+ varDsc->lvMustInit = true;
+ }
+ else
+ {
+ setIntervalAsSpilled(interval);
+ JITDUMP(" marking as spilled\n");
+ }
}
}
}
@@ -4131,8 +4429,20 @@ void LinearScan::buildIntervals()
}
#endif // DEBUG
+#if DOUBLE_ALIGN
+ // We will determine whether we should double align the frame during
+ // identifyCandidates(), but we initially assume that we will not.
+ doDoubleAlign = false;
+#endif
+
identifyCandidates();
+ // Figure out if we're going to use a frame pointer. We need to do this before building
+ // the ref positions, because those objects will embed the frame register in various register masks
+ // if the frame pointer is not reserved. If we decide to have a frame pointer, setFrameType() will
+ // remove the frame pointer from the masks.
+ setFrameType();
+
DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_PRE));
// second part:
@@ -4263,6 +4573,9 @@ void LinearScan::buildIntervals()
insertZeroInitRefPositions();
}
+ // Any lclVars live-in to a block are resolution candidates.
+ VarSetOps::UnionD(compiler, resolutionCandidateVars, block->bbLiveIn);
+
// Determine if we need any DummyDefs.
// We need DummyDefs for cases where "predBlock" isn't really a predecessor.
// Note that it's possible to have uses of unitialized variables, in which case even the first
@@ -4274,8 +4587,8 @@ void LinearScan::buildIntervals()
VARSET_TP VARSET_INIT(compiler, newLiveIn, block->bbLiveIn);
if (predBlock)
{
- JITDUMP("\n\nSetting incoming variable registers of BB%02u to outVarToRegMap of BB%02u\n", block->bbNum,
- predBlock->bbNum);
+ JITDUMP("\n\nSetting BB%02u as the predecessor for determining incoming variable registers of BB%02u\n",
+ block->bbNum, predBlock->bbNum);
assert(predBlock->bbNum <= bbNumMaxBeforeResolution);
blockInfo[block->bbNum].predBBNum = predBlock->bbNum;
// Compute set difference: newLiveIn = block->bbLiveIn - predBlock->bbLiveOut
@@ -4534,7 +4847,16 @@ void LinearScan::validateIntervals()
void LinearScan::setFrameType()
{
FrameType frameType = FT_NOT_SET;
- if (compiler->codeGen->isFramePointerRequired())
+#if DOUBLE_ALIGN
+ compiler->codeGen->setDoubleAlign(false);
+ if (doDoubleAlign)
+ {
+ frameType = FT_DOUBLE_ALIGN_FRAME;
+ compiler->codeGen->setDoubleAlign(true);
+ }
+ else
+#endif // DOUBLE_ALIGN
+ if (compiler->codeGen->isFramePointerRequired())
{
frameType = FT_EBP_FRAME;
}
@@ -4563,22 +4885,6 @@ void LinearScan::setFrameType()
}
}
-#if DOUBLE_ALIGN
- // The DOUBLE_ALIGN feature indicates whether the JIT will attempt to double-align the
- // frame if needed. Note that this feature isn't on for amd64, because the stack is
- // always double-aligned by default.
- compiler->codeGen->setDoubleAlign(false);
-
- // TODO-CQ: Tune this (see regalloc.cpp, in which raCntWtdStkDblStackFP is used to
- // determine whether to double-align). Note, though that there is at least one test
- // (jit\opt\Perf\DoubleAlign\Locals.exe) that depends on double-alignment being set
- // in certain situations.
- if (!compiler->opts.MinOpts() && !compiler->codeGen->isFramePointerRequired() && compiler->compFloatingPointUsed)
- {
- frameType = FT_DOUBLE_ALIGN_FRAME;
- }
-#endif // DOUBLE_ALIGN
-
switch (frameType)
{
case FT_ESP_FRAME:
@@ -4593,7 +4899,6 @@ void LinearScan::setFrameType()
case FT_DOUBLE_ALIGN_FRAME:
noway_assert(!compiler->codeGen->isFramePointerRequired());
compiler->codeGen->setFramePointerUsed(false);
- compiler->codeGen->setDoubleAlign(true);
break;
#endif // DOUBLE_ALIGN
default:
@@ -4625,11 +4930,11 @@ void LinearScan::setFrameType()
compiler->rpFrameType = frameType;
}
-// Is the copyReg given by this RefPosition still busy at the
+// Is the copyReg/moveReg given by this RefPosition still busy at the
// given location?
-bool copyRegInUse(RefPosition* ref, LsraLocation loc)
+bool copyOrMoveRegInUse(RefPosition* ref, LsraLocation loc)
{
- assert(ref->copyReg);
+ assert(ref->copyReg || ref->moveReg);
if (ref->getRefEndLocation() >= loc)
{
return true;
@@ -4689,14 +4994,15 @@ bool LinearScan::registerIsAvailable(RegRecord* physRegRecord,
return false;
}
- // Is this a copyReg? It is if the register assignment doesn't match.
- // (the recentReference may not be a copyReg, because we could have seen another
- // reference since the copyReg)
+ // Is this a copyReg/moveReg? It is if the register assignment doesn't match.
+ // (the recentReference may not be a copyReg/moveReg, because we could have seen another
+ // reference since the copyReg/moveReg)
if (!assignedInterval->isAssignedTo(physRegRecord->regNum))
{
// Don't reassign it if it's still in use
- if (recentReference->copyReg && copyRegInUse(recentReference, currentLoc))
+ if ((recentReference->copyReg || recentReference->moveReg) &&
+ copyOrMoveRegInUse(recentReference, currentLoc))
{
return false;
}
@@ -5393,8 +5699,17 @@ regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPositio
// to remain live until the use, we should set the candidates to allRegs(regType)
// to avoid a spill - codegen can then insert the copy.
assert(candidates == candidateBit);
- physRegNextLocation = MaxLocation;
- farthestRefPosWeight = BB_MAX_WEIGHT;
+
+ // If a refPosition has a fixed reg as its candidate and is also marked
+ // as allocateIfProfitable, we should allocate fixed reg only if the
+ // weight of this ref position is greater than the weight of the ref
+ // position to which fixed reg is assigned. Such a case would arise
+ // on x86 under LSRA stress.
+ if (!allocateIfProfitable)
+ {
+ physRegNextLocation = MaxLocation;
+ farthestRefPosWeight = BB_MAX_WEIGHT;
+ }
}
else
{
@@ -5487,13 +5802,14 @@ regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPositio
}
}
- LsraLocation nextLocation = assignedInterval->getNextRefLocation();
+ RefPosition* nextRefPosition = assignedInterval->getNextRefPosition();
+ LsraLocation nextLocation = assignedInterval->getNextRefLocation();
// We should never spill a register that's occupied by an Interval with its next use at the current location.
// Normally this won't occur (unless we actually had more uses in a single node than there are registers),
// because we'll always find something with a later nextLocation, but it can happen in stress when
// we have LSRA_SELECT_NEAREST.
- if ((nextLocation == refLocation) && !refPosition->isFixedRegRef)
+ if ((nextLocation == refLocation) && !refPosition->isFixedRegRef && nextRefPosition->RequiresRegister())
{
continue;
}
@@ -5578,7 +5894,17 @@ regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPositio
else
{
// Must have found a spill candidate.
- assert((farthestRefPhysRegRecord != nullptr) && (farthestLocation > refLocation || refPosition->isFixedRegRef));
+ assert(farthestRefPhysRegRecord != nullptr);
+ if ((farthestLocation == refLocation) && !refPosition->isFixedRegRef)
+ {
+ Interval* assignedInterval = farthestRefPhysRegRecord->assignedInterval;
+ RefPosition* nextRefPosition = assignedInterval->getNextRefPosition();
+ assert(!nextRefPosition->RequiresRegister());
+ }
+ else
+ {
+ assert(farthestLocation > refLocation || refPosition->isFixedRegRef);
+ }
}
#endif
@@ -5699,6 +6025,70 @@ void LinearScan::assignPhysReg(RegRecord* regRec, Interval* interval)
}
//------------------------------------------------------------------------
+// setIntervalAsSplit: Set this Interval as being split
+//
+// Arguments:
+// interval - The Interval which is being split
+//
+// Return Value:
+// None.
+//
+// Notes:
+// The given Interval will be marked as split, and it will be added to the
+// set of splitOrSpilledVars.
+//
+// Assumptions:
+// "interval" must be a lclVar interval, as tree temps are never split.
+// This is asserted in the call to getVarIndex().
+//
+void LinearScan::setIntervalAsSplit(Interval* interval)
+{
+ if (interval->isLocalVar)
+ {
+ unsigned varIndex = interval->getVarIndex(compiler);
+ if (!interval->isSplit)
+ {
+ VarSetOps::AddElemD(compiler, splitOrSpilledVars, varIndex);
+ }
+ else
+ {
+ assert(VarSetOps::IsMember(compiler, splitOrSpilledVars, varIndex));
+ }
+ }
+ interval->isSplit = true;
+}
+
+//------------------------------------------------------------------------
+// setIntervalAsSpilled: Set this Interval as being spilled
+//
+// Arguments:
+// interval - The Interval which is being spilled
+//
+// Return Value:
+// None.
+//
+// Notes:
+// The given Interval will be marked as spilled, and it will be added
+// to the set of splitOrSpilledVars.
+//
+void LinearScan::setIntervalAsSpilled(Interval* interval)
+{
+ if (interval->isLocalVar)
+ {
+ unsigned varIndex = interval->getVarIndex(compiler);
+ if (!interval->isSpilled)
+ {
+ VarSetOps::AddElemD(compiler, splitOrSpilledVars, varIndex);
+ }
+ else
+ {
+ assert(VarSetOps::IsMember(compiler, splitOrSpilledVars, varIndex));
+ }
+ }
+ interval->isSpilled = true;
+}
+
+//------------------------------------------------------------------------
// spill: Spill this Interval between "fromRefPosition" and "toRefPosition"
//
// Arguments:
@@ -5739,8 +6129,10 @@ void LinearScan::spillInterval(Interval* interval, RefPosition* fromRefPosition,
}
#endif // DEBUG
- interval->isActive = false;
- interval->isSpilled = true;
+ INTRACK_STATS(updateLsraStat(LSRA_STAT_SPILL, fromRefPosition->bbNum));
+
+ interval->isActive = false;
+ setIntervalAsSpilled(interval);
// If fromRefPosition occurs before the beginning of this block, mark this as living in the stack
// on entry to this block.
@@ -5923,7 +6315,7 @@ void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPositio
setInVarRegForBB(curBBNum, assignedInterval->varNum, REG_STK);
if (spillRefPosition->nextRefPosition != nullptr)
{
- assignedInterval->isSpilled = true;
+ setIntervalAsSpilled(assignedInterval);
}
}
else
@@ -5945,7 +6337,8 @@ void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPositio
{
assignedInterval->assignedReg = regRec;
}
- else if (regRec->previousInterval != nullptr && regRec->previousInterval->assignedReg == regRec &&
+ else if (regRec->previousInterval != nullptr && regRec->previousInterval != assignedInterval &&
+ regRec->previousInterval->assignedReg == regRec &&
regRec->previousInterval->getNextRefPosition() != nullptr)
{
regRec->assignedInterval = regRec->previousInterval;
@@ -6128,7 +6521,14 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock, bool alloc
if (allocationPass)
{
targetReg = predVarToRegMap[varIndex];
- INDEBUG(targetReg = rotateBlockStartLocation(interval, targetReg, (~liveRegs | inactiveRegs)));
+#ifdef DEBUG
+ regNumber newTargetReg = rotateBlockStartLocation(interval, targetReg, (~liveRegs | inactiveRegs));
+ if (newTargetReg != targetReg)
+ {
+ targetReg = newTargetReg;
+ setIntervalAsSplit(interval);
+ }
+#endif // DEBUG
inVarToRegMap[varIndex] = targetReg;
}
else // !allocationPass (i.e. resolution/write-back pass)
@@ -6686,6 +7086,7 @@ void LinearScan::allocateRegisters()
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_ENTRY_REG_ALLOCATED, currentInterval));
didDump = true;
allocate = false;
+ setIntervalAsSpilled(currentInterval);
}
// If it has no actual references, mark it as "lastUse"; since they're not actually part
// of any flow they won't have been marked during dataflow. Otherwise, if we allocate a
@@ -6912,6 +7313,7 @@ void LinearScan::allocateRegisters()
}
currentRefPosition->moveReg = true;
assignedRegister = REG_NA;
+ setIntervalAsSplit(currentInterval);
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_MOVE_REG, currentInterval, assignedRegister));
}
else if ((genRegMask(assignedRegister) & currentRefPosition->registerAssignment) != 0)
@@ -6936,65 +7338,47 @@ void LinearScan::allocateRegisters()
}
else
{
- // This must be a localVar or a single-reg fixed use or a tree temp with conflicting def & use.
-
- assert(currentInterval && (currentInterval->isLocalVar || currentRefPosition->isFixedRegRef ||
- currentInterval->hasConflictingDefUse));
+ assert(currentInterval != nullptr);
// It's already in a register, but not one we need.
- // If it is a fixed use that is not marked "delayRegFree", there is already a FixedReg to ensure that
- // the needed reg is not otherwise in use, so we can simply ignore it and codegen will do the copy.
- // The reason we need special handling for the "delayRegFree" case is that we need to mark the
- // fixed-reg as in-use and delayed (the FixedReg RefPosition doesn't handle the delay requirement).
- // Otherwise, if this is a pure use localVar or tree temp, we assign a copyReg, but must free both regs
- // if it is a last use.
- if (!currentRefPosition->isFixedRegRef || currentRefPosition->delayRegFree)
- {
- if (!RefTypeIsDef(currentRefPosition->refType))
+ if (!RefTypeIsDef(currentRefPosition->refType))
+ {
+ regNumber copyReg = assignCopyReg(currentRefPosition);
+ assert(copyReg != REG_NA);
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_COPY_REG, currentInterval, copyReg));
+ lastAllocatedRefPosition = currentRefPosition;
+ if (currentRefPosition->lastUse)
{
- regNumber copyReg = assignCopyReg(currentRefPosition);
- assert(copyReg != REG_NA);
- INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_COPY_REG, currentInterval, copyReg));
- lastAllocatedRefPosition = currentRefPosition;
- if (currentRefPosition->lastUse)
+ if (currentRefPosition->delayRegFree)
{
- if (currentRefPosition->delayRegFree)
- {
- INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED, currentInterval,
- assignedRegister));
- delayRegsToFree |=
- (genRegMask(assignedRegister) | currentRefPosition->registerAssignment);
- }
- else
- {
- INDEBUG(
- dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE, currentInterval, assignedRegister));
- regsToFree |= (genRegMask(assignedRegister) | currentRefPosition->registerAssignment);
- }
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED, currentInterval,
+ assignedRegister));
+ delayRegsToFree |= (genRegMask(assignedRegister) | currentRefPosition->registerAssignment);
}
- // If this is a tree temp (non-localVar) interval, we will need an explicit move.
- if (!currentInterval->isLocalVar)
+ else
{
- currentRefPosition->moveReg = true;
- currentRefPosition->copyReg = false;
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE, currentInterval, assignedRegister));
+ regsToFree |= (genRegMask(assignedRegister) | currentRefPosition->registerAssignment);
}
- continue;
}
- else
+ // If this is a tree temp (non-localVar) interval, we will need an explicit move.
+ if (!currentInterval->isLocalVar)
{
- INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NEEDS_NEW_REG, nullptr, assignedRegister));
- regsToFree |= genRegMask(assignedRegister);
- // We want a new register, but we don't want this to be considered a spill.
- assignedRegister = REG_NA;
- if (physRegRecord->assignedInterval == currentInterval)
- {
- unassignPhysRegNoSpill(physRegRecord);
- }
+ currentRefPosition->moveReg = true;
+ currentRefPosition->copyReg = false;
}
+ continue;
}
else
{
- INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, assignedRegister));
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NEEDS_NEW_REG, nullptr, assignedRegister));
+ regsToFree |= genRegMask(assignedRegister);
+ // We want a new register, but we don't want this to be considered a spill.
+ assignedRegister = REG_NA;
+ if (physRegRecord->assignedInterval == currentInterval)
+ {
+ unassignPhysRegNoSpill(physRegRecord);
+ }
}
}
}
@@ -7031,23 +7415,39 @@ void LinearScan::allocateRegisters()
// then find a register to spill
if (assignedRegister == REG_NA)
{
-#ifdef FEATURE_SIMD
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
if (refType == RefTypeUpperVectorSaveDef)
{
// TODO-CQ: Determine whether copying to two integer callee-save registers would be profitable.
- currentRefPosition->registerAssignment = (allRegs(TYP_FLOAT) & RBM_FLT_CALLEE_TRASH);
- assignedRegister = tryAllocateFreeReg(currentInterval, currentRefPosition);
+
+ // SaveDef position occurs after the Use of args and at the same location as Kill/Def
+ // positions of a call node. But SaveDef position cannot use any of the arg regs as
+ // they are needed for call node.
+ currentRefPosition->registerAssignment =
+ (allRegs(TYP_FLOAT) & RBM_FLT_CALLEE_TRASH & ~RBM_FLTARG_REGS);
+ assignedRegister = tryAllocateFreeReg(currentInterval, currentRefPosition);
+
// There MUST be caller-save registers available, because they have all just been killed.
+ // Amd64 Windows: xmm4-xmm5 are guaranteed to be available as xmm0-xmm3 are used for passing args.
+ // Amd64 Unix: xmm8-xmm15 are guaranteed to be avilable as xmm0-xmm7 are used for passing args.
+ // X86 RyuJIT Windows: xmm4-xmm7 are guanrateed to be available.
assert(assignedRegister != REG_NA);
+
// Now, spill it.
- // (These will look a bit backward in the dump, but it's a pain to dump the alloc before the spill).
+ // Note:
+ // i) The reason we have to spill is that SaveDef position is allocated after the Kill positions
+ // of the call node are processed. Since callee-trash registers are killed by call node
+ // we explicity spill and unassign the register.
+ // ii) These will look a bit backward in the dump, but it's a pain to dump the alloc before the
+ // spill).
unassignPhysReg(getRegisterRecord(assignedRegister), currentRefPosition);
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_REG, currentInterval, assignedRegister));
+
// Now set assignedRegister to REG_NA again so that we don't re-activate it.
assignedRegister = REG_NA;
}
else
-#endif // FEATURE_SIMD
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
if (currentRefPosition->RequiresRegister() || currentRefPosition->AllocateIfProfitable())
{
if (allocateReg)
@@ -7069,6 +7469,7 @@ void LinearScan::allocateRegisters()
currentRefPosition->registerAssignment = RBM_NONE;
currentRefPosition->reload = false;
+ setIntervalAsSpilled(currentInterval);
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval));
}
@@ -7078,6 +7479,7 @@ void LinearScan::allocateRegisters()
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval));
currentRefPosition->registerAssignment = RBM_NONE;
currentInterval->isActive = false;
+ setIntervalAsSpilled(currentInterval);
}
}
#ifdef DEBUG
@@ -7224,7 +7626,7 @@ void LinearScan::allocateRegisters()
// - interval->physReg is set to the assigned register
// (i.e. at the code location which is currently being handled by resolveRegisters())
// - interval->isActive is true iff the interval is live and occupying a register
-// - interval->isSpilled is set to true if the interval is EVER spilled
+// - interval->isSpilled should have already been set to true if the interval is EVER spilled
// - interval->isSplit is set to true if the interval does not occupy the same
// register throughout the method
// - RegRecord->assignedInterval points to the interval which currently occupies
@@ -7264,9 +7666,9 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosi
if (currentRefPosition->registerAssignment == RBM_NONE)
{
assert(!currentRefPosition->RequiresRegister());
+ assert(interval->isSpilled);
- interval->isSpilled = true;
- varDsc->lvRegNum = REG_STK;
+ varDsc->lvRegNum = REG_STK;
if (interval->assignedReg != nullptr && interval->assignedReg->assignedInterval == interval)
{
interval->assignedReg->assignedInterval = nullptr;
@@ -7314,8 +7716,10 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosi
// In the reload case we simply do not set GTF_REG_VAL, and it gets
// referenced from the variable's home location.
// This is also true for a pure def which is spilled.
- if (reload && currentRefPosition->refType != RefTypeDef)
+ if (reload)
{
+ assert(currentRefPosition->refType != RefTypeDef);
+ assert(interval->isSpilled);
varDsc->lvRegNum = REG_STK;
if (!spillAfter)
{
@@ -7353,31 +7757,15 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosi
{
assert(currentRefPosition->refType == RefTypeExpUse);
}
-
- // If we have an undefined use set it as non-reg
- if (!interval->isSpilled)
- {
- if (varDsc->lvIsParam && !varDsc->lvIsRegArg && currentRefPosition == interval->firstRefPosition)
- {
- // Parameters are the only thing that can be used before defined
- }
- else
- {
- // if we see a use before def of something else, the zero init flag better not be set.
- noway_assert(!compiler->info.compInitMem);
- // if it is not set, then the behavior is undefined but we don't want to crash or assert
- interval->isSpilled = true;
- }
- }
}
else if (spillAfter && !RefTypeIsUse(currentRefPosition->refType))
{
// In the case of a pure def, don't bother spilling - just assign it to the
// stack. However, we need to remember that it was spilled.
- interval->isSpilled = true;
- varDsc->lvRegNum = REG_STK;
- interval->physReg = REG_NA;
+ assert(interval->isSpilled);
+ varDsc->lvRegNum = REG_STK;
+ interval->physReg = REG_NA;
if (treeNode != nullptr)
{
treeNode->gtRegNum = REG_NA;
@@ -7409,6 +7797,7 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosi
}
else
{
+ assert(interval->isSplit);
interval->physReg = assignedReg;
}
@@ -7426,13 +7815,11 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosi
{
if (varDsc->lvRegNum != REG_STK)
{
- // If the register assignments don't match, then this interval is spilt,
- // but not spilled (yet)
- // However, we don't have a single register assignment now
+ // If the register assignments don't match, then this interval is split.
if (varDsc->lvRegNum != assignedReg)
{
- interval->isSplit = TRUE;
- varDsc->lvRegNum = REG_STK;
+ setIntervalAsSplit(interval);
+ varDsc->lvRegNum = REG_STK;
}
}
else
@@ -7447,9 +7834,9 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosi
{
treeNode->gtFlags |= GTF_SPILL;
}
- interval->isSpilled = true;
- interval->physReg = REG_NA;
- varDsc->lvRegNum = REG_STK;
+ assert(interval->isSpilled);
+ interval->physReg = REG_NA;
+ varDsc->lvRegNum = REG_STK;
}
// This value is in a register, UNLESS we already saw this treeNode
@@ -7489,6 +7876,7 @@ void LinearScan::writeRegisters(RefPosition* currentRefPosition, GenTree* tree)
// than the one it was spilled from (GT_RELOAD).
//
// Arguments:
+// block - basic block in which GT_COPY/GT_RELOAD is inserted.
// tree - This is the node to copy or reload.
// Insert copy or reload node between this node and its parent.
// multiRegIdx - register position of tree node for which copy or reload is needed.
@@ -7557,6 +7945,10 @@ void LinearScan::insertCopyOrReload(BasicBlock* block, GenTreePtr tree, unsigned
else
{
oper = GT_COPY;
+
+#if TRACK_LSRA_STATS
+ updateLsraStat(LSRA_STAT_COPY_REG, block->bbNum);
+#endif
}
// If the parent is a reload/copy node, then tree must be a multi-reg call node
@@ -8100,7 +8492,7 @@ void LinearScan::resolveRegisters()
{
JITDUMP(" internal");
GenTreePtr indNode = nullptr;
- if (treeNode->OperIsIndir())
+ if (treeNode->OperGet() == GT_IND)
{
indNode = treeNode;
JITDUMP(" allocated at GT_IND");
@@ -8223,6 +8615,11 @@ void LinearScan::resolveRegisters()
printf("RESOLVING BB BOUNDARIES\n");
printf("-----------------------\n");
+ printf("Resolution Candidates: ");
+ dumpConvertedVarSet(compiler, resolutionCandidateVars);
+ printf("\n");
+ printf("Has %sCritical Edges\n\n", hasCriticalEdges ? "" : "No");
+
printf("Prior to Resolution\n");
foreach_block(compiler, block)
{
@@ -8282,23 +8679,10 @@ void LinearScan::resolveRegisters()
varDsc->lvArgInitReg = initialReg;
JITDUMP(" Set V%02u argument initial register to %s\n", lclNum, getRegName(initialReg));
}
- if (!varDsc->lvIsRegArg)
- {
- // stack arg
- if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
- {
- if (sourceReg != initialReg)
- {
- // The code generator won't initialize struct
- // fields, so we have to do that if it's not already
- // where it belongs.
- assert(interval->isStructField);
- JITDUMP(" Move struct field param V%02u from %s to %s\n", lclNum, getRegName(sourceReg),
- getRegName(initialReg));
- insertMove(insertionBlock, insertionPoint, lclNum, sourceReg, initialReg);
- }
- }
- }
+
+ // Stack args that are part of dependently-promoted structs should never be register candidates (see
+ // LinearScan::isRegCandidate).
+ assert(varDsc->lvIsRegArg || !compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc));
}
// If lvRegNum is REG_STK, that means that either no register
@@ -8347,8 +8731,8 @@ void LinearScan::resolveRegisters()
}
if (firstRefPosition->registerAssignment == RBM_NONE || firstRefPosition->spillAfter)
{
- // Either this RefPosition is spilled, or it is not a "real" def or use
- assert(firstRefPosition->spillAfter ||
+ // Either this RefPosition is spilled, or regOptional or it is not a "real" def or use
+ assert(firstRefPosition->spillAfter || firstRefPosition->AllocateIfProfitable() ||
(firstRefPosition->refType != RefTypeDef && firstRefPosition->refType != RefTypeUse));
varDsc->lvRegNum = REG_STK;
}
@@ -8432,6 +8816,8 @@ void LinearScan::insertMove(
BasicBlock* block, GenTreePtr insertionPoint, unsigned lclNum, regNumber fromReg, regNumber toReg)
{
LclVarDsc* varDsc = compiler->lvaTable + lclNum;
+ // the lclVar must be a register candidate
+ assert(isRegCandidate(varDsc));
// One or both MUST be a register
assert(fromReg != REG_STK || toReg != REG_STK);
// They must not be the same register.
@@ -8440,20 +8826,22 @@ void LinearScan::insertMove(
// This var can't be marked lvRegister now
varDsc->lvRegNum = REG_STK;
- var_types lclTyp = varDsc->TypeGet();
- if (varDsc->lvNormalizeOnStore())
- {
- lclTyp = genActualType(lclTyp);
- }
- GenTreePtr src = compiler->gtNewLclvNode(lclNum, lclTyp);
+ GenTreePtr src = compiler->gtNewLclvNode(lclNum, varDsc->TypeGet());
src->gtLsraInfo.isLsraAdded = true;
- GenTreePtr top;
- // If we are moving from STK to reg, mark the lclVar nodes with GTF_SPILLED
- // Otherwise, if we are moving from reg to stack, mark it as GTF_SPILL
- // Finally, for a reg-to-reg move, generate a GT_COPY
+ // There are three cases we need to handle:
+ // - We are loading a lclVar from the stack.
+ // - We are storing a lclVar to the stack.
+ // - We are copying a lclVar between registers.
+ //
+ // In the first and second cases, the lclVar node will be marked with GTF_SPILLED and GTF_SPILL, respectively.
+ // It is up to the code generator to ensure that any necessary normalization is done when loading or storing the
+ // lclVar's value.
+ //
+ // In the third case, we generate GT_COPY(GT_LCL_VAR) and type each node with the normalized type of the lclVar.
+ // This is safe because a lclVar is always normalized once it is in a register.
- top = src;
+ GenTree* dst = src;
if (fromReg == REG_STK)
{
src->gtFlags |= GTF_SPILLED;
@@ -8467,21 +8855,22 @@ void LinearScan::insertMove(
}
else
{
- top = new (compiler, GT_COPY) GenTreeCopyOrReload(GT_COPY, varDsc->TypeGet(), src);
+ var_types movType = genActualType(varDsc->TypeGet());
+ src->gtType = movType;
+
+ dst = new (compiler, GT_COPY) GenTreeCopyOrReload(GT_COPY, movType, src);
// This is the new home of the lclVar - indicate that by clearing the GTF_VAR_DEATH flag.
// Note that if src is itself a lastUse, this will have no effect.
- top->gtFlags &= ~(GTF_VAR_DEATH);
+ dst->gtFlags &= ~(GTF_VAR_DEATH);
src->gtRegNum = fromReg;
src->SetInReg();
- top->gtRegNum = toReg;
- src->gtNext = top;
- top->gtPrev = src;
+ dst->gtRegNum = toReg;
src->gtLsraInfo.isLocalDefUse = false;
- top->gtLsraInfo.isLsraAdded = true;
+ dst->gtLsraInfo.isLsraAdded = true;
}
- top->gtLsraInfo.isLocalDefUse = true;
+ dst->gtLsraInfo.isLocalDefUse = true;
- LIR::Range treeRange = LIR::SeqTree(compiler, top);
+ LIR::Range treeRange = LIR::SeqTree(compiler, dst);
LIR::Range& blockRange = LIR::AsRange(block);
if (insertionPoint != nullptr)
@@ -8497,7 +8886,7 @@ void LinearScan::insertMove(
noway_assert(!blockRange.IsEmpty());
GenTree* branch = blockRange.LastNode();
- assert(branch->OperGet() == GT_JTRUE || branch->OperGet() == GT_SWITCH_TABLE ||
+ assert(branch->OperIsConditionalJump() || branch->OperGet() == GT_SWITCH_TABLE ||
branch->OperGet() == GT_SWITCH);
blockRange.InsertBefore(branch, std::move(treeRange));
@@ -8568,7 +8957,7 @@ void LinearScan::insertSwap(
noway_assert(!blockRange.IsEmpty());
GenTree* branch = blockRange.LastNode();
- assert(branch->OperGet() == GT_JTRUE || branch->OperGet() == GT_SWITCH_TABLE ||
+ assert(branch->OperIsConditionalJump() || branch->OperGet() == GT_SWITCH_TABLE ||
branch->OperGet() == GT_SWITCH);
blockRange.InsertBefore(branch, std::move(swapRange));
@@ -8682,12 +9071,15 @@ void LinearScan::addResolution(
insertMove(block, insertionPoint, interval->varNum, fromReg, toReg);
if (fromReg == REG_STK || toReg == REG_STK)
{
- interval->isSpilled = true;
+ assert(interval->isSpilled);
}
else
{
- interval->isSplit = true;
+ // We should have already marked this as spilled or split.
+ assert((interval->isSpilled) || (interval->isSplit));
}
+
+ INTRACK_STATS(updateLsraStat(LSRA_STAT_RESOLUTION_MOV, block->bbNum));
}
//------------------------------------------------------------------------
@@ -8706,6 +9098,12 @@ void LinearScan::addResolution(
void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block)
{
+ VARSET_TP VARSET_INIT_NOCOPY(outResolutionSet,
+ VarSetOps::Intersection(compiler, block->bbLiveOut, resolutionCandidateVars));
+ if (VarSetOps::IsEmpty(compiler, outResolutionSet))
+ {
+ return;
+ }
VARSET_TP VARSET_INIT_NOCOPY(sameResolutionSet, VarSetOps::MakeEmpty(compiler));
VARSET_TP VARSET_INIT_NOCOPY(sameLivePathsSet, VarSetOps::MakeEmpty(compiler));
VARSET_TP VARSET_INIT_NOCOPY(singleTargetSet, VarSetOps::MakeEmpty(compiler));
@@ -8720,6 +9118,8 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block)
// First, determine the live regs at the end of this block so that we know what regs are
// available to copy into.
+ // Note that for this purpose we use the full live-out set, because we must ensure that
+ // even the registers that remain the same across the edge are preserved correctly.
regMaskTP liveOutRegs = RBM_NONE;
VARSET_ITER_INIT(compiler, iter1, block->bbLiveOut, varIndex1);
while (iter1.NextElem(compiler, &varIndex1))
@@ -8755,7 +9155,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block)
regMaskTP sameWriteRegs = RBM_NONE;
regMaskTP diffReadRegs = RBM_NONE;
- // For each var, classify them as:
+ // For each var that may require resolution, classify them as:
// - in the same register at the end of this block and at each target (no resolution needed)
// - in different registers at different targets (resolve separately):
// diffResolutionSet
@@ -8764,7 +9164,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block)
// write to any registers that are read by those in the diffResolutionSet:
// sameResolutionSet
- VARSET_ITER_INIT(compiler, iter, block->bbLiveOut, varIndex);
+ VARSET_ITER_INIT(compiler, iter, outResolutionSet, varIndex);
while (iter.NextElem(compiler, &varIndex))
{
unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
@@ -8936,6 +9336,16 @@ void LinearScan::resolveEdges()
{
JITDUMP("RESOLVING EDGES\n");
+ // The resolutionCandidateVars set was initialized with all the lclVars that are live-in to
+ // any block. We now intersect that set with any lclVars that ever spilled or split.
+ // If there are no candidates for resoultion, simply return.
+
+ VarSetOps::IntersectionD(compiler, resolutionCandidateVars, splitOrSpilledVars);
+ if (VarSetOps::IsEmpty(compiler, resolutionCandidateVars))
+ {
+ return;
+ }
+
BasicBlock *block, *prevBlock = nullptr;
// Handle all the critical edges first.
@@ -8944,18 +9354,21 @@ void LinearScan::resolveEdges()
// remaining mismatches. We visit the out-edges, as that allows us to share the moves that are
// common among allt he targets.
- foreach_block(compiler, block)
+ if (hasCriticalEdges)
{
- if (block->bbNum > bbNumMaxBeforeResolution)
- {
- // This is a new block added during resolution - we don't need to visit these now.
- continue;
- }
- if (blockInfo[block->bbNum].hasCriticalOutEdge)
+ foreach_block(compiler, block)
{
- handleOutgoingCriticalEdges(block);
+ if (block->bbNum > bbNumMaxBeforeResolution)
+ {
+ // This is a new block added during resolution - we don't need to visit these now.
+ continue;
+ }
+ if (blockInfo[block->bbNum].hasCriticalOutEdge)
+ {
+ handleOutgoingCriticalEdges(block);
+ }
+ prevBlock = block;
}
- prevBlock = block;
}
prevBlock = nullptr;
@@ -8975,7 +9388,9 @@ void LinearScan::resolveEdges()
// we may need resolution at the beginning of this block.
// This may be true even if it's the block we used for starting locations,
// if a variable was spilled.
- if (!VarSetOps::IsEmpty(compiler, block->bbLiveIn))
+ VARSET_TP VARSET_INIT_NOCOPY(inResolutionSet,
+ VarSetOps::Intersection(compiler, block->bbLiveIn, resolutionCandidateVars));
+ if (!VarSetOps::IsEmpty(compiler, inResolutionSet))
{
if (uniquePredBlock != nullptr)
{
@@ -8988,7 +9403,7 @@ void LinearScan::resolveEdges()
uniquePredBlock = uniquePredBlock->GetUniquePred(compiler);
noway_assert(uniquePredBlock != nullptr);
}
- resolveEdge(uniquePredBlock, block, ResolveSplit, block->bbLiveIn);
+ resolveEdge(uniquePredBlock, block, ResolveSplit, inResolutionSet);
}
}
@@ -9003,7 +9418,12 @@ void LinearScan::resolveEdges()
BasicBlock* succBlock = block->GetSucc(0, compiler);
if (succBlock->GetUniquePred(compiler) == nullptr)
{
- resolveEdge(block, succBlock, ResolveJoin, succBlock->bbLiveIn);
+ VARSET_TP VARSET_INIT_NOCOPY(outResolutionSet, VarSetOps::Intersection(compiler, succBlock->bbLiveIn,
+ resolutionCandidateVars));
+ if (!VarSetOps::IsEmpty(compiler, outResolutionSet))
+ {
+ resolveEdge(block, succBlock, ResolveJoin, outResolutionSet);
+ }
}
}
}
@@ -9161,6 +9581,9 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
// in resolveEdges(), after all the edge resolution has been done (by calling this
// method for each edge).
block = compiler->fgSplitEdge(fromBlock, toBlock);
+
+ // Split edges are counted against fromBlock.
+ INTRACK_STATS(updateLsraStat(LSRA_STAT_SPLIT_EDGE, fromBlock->bbNum));
break;
default:
unreached();
@@ -9347,11 +9770,13 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
{
useSwap = true;
}
-#else // !_TARGET_XARCH_
+#else // !_TARGET_XARCH_
+
else
{
tempReg = tempRegInt;
}
+
#endif // !_TARGET_XARCH_
if (useSwap || tempReg == REG_NA)
{
@@ -9396,6 +9821,8 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
sourceIntervals[sourceReg]->varNum, fromReg);
location[sourceReg] = REG_NA;
location[source[otherTargetReg]] = (regNumberSmall)fromReg;
+
+ INTRACK_STATS(updateLsraStat(LSRA_STAT_RESOLUTION_MOV, block->bbNum));
}
else
{
@@ -9406,6 +9833,7 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock,
// First, spill "otherInterval" from targetReg to the stack.
Interval* otherInterval = sourceIntervals[source[otherTargetReg]];
+ setIntervalAsSpilled(otherInterval);
addResolution(block, insertionPoint, otherInterval, REG_STK, targetReg);
JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
location[source[otherTargetReg]] = REG_STK;
@@ -9527,6 +9955,126 @@ void TreeNodeInfo::addInternalCandidates(LinearScan* lsra, regMaskTP mask)
internalCandsIndex = (unsigned char)i;
}
+#if TRACK_LSRA_STATS
+// ----------------------------------------------------------
+// updateLsraStat: Increment LSRA stat counter.
+//
+// Arguments:
+// stat - LSRA stat enum
+// bbNum - Basic block to which LSRA stat needs to be
+// associated with.
+//
+void LinearScan::updateLsraStat(LsraStat stat, unsigned bbNum)
+{
+ if (bbNum > bbNumMaxBeforeResolution)
+ {
+ // This is a newly created basic block as part of resolution.
+ // These blocks contain resolution moves that are already accounted.
+ return;
+ }
+
+ switch (stat)
+ {
+ case LSRA_STAT_SPILL:
+ ++(blockInfo[bbNum].spillCount);
+ break;
+
+ case LSRA_STAT_COPY_REG:
+ ++(blockInfo[bbNum].copyRegCount);
+ break;
+
+ case LSRA_STAT_RESOLUTION_MOV:
+ ++(blockInfo[bbNum].resolutionMovCount);
+ break;
+
+ case LSRA_STAT_SPLIT_EDGE:
+ ++(blockInfo[bbNum].splitEdgeCount);
+ break;
+
+ default:
+ break;
+ }
+}
+
+// -----------------------------------------------------------
+// dumpLsraStats - dumps Lsra stats to given file.
+//
+// Arguments:
+// file - file to which stats are to be written.
+//
+void LinearScan::dumpLsraStats(FILE* file)
+{
+ unsigned sumSpillCount = 0;
+ unsigned sumCopyRegCount = 0;
+ unsigned sumResolutionMovCount = 0;
+ unsigned sumSplitEdgeCount = 0;
+ UINT64 wtdSpillCount = 0;
+ UINT64 wtdCopyRegCount = 0;
+ UINT64 wtdResolutionMovCount = 0;
+
+ fprintf(file, "----------\n");
+ fprintf(file, "LSRA Stats");
+#ifdef DEBUG
+ if (!VERBOSE)
+ {
+ fprintf(file, " : %s\n", compiler->info.compFullName);
+ }
+ else
+ {
+ // In verbose mode no need to print full name
+ // while printing lsra stats.
+ fprintf(file, "\n");
+ }
+#else
+ fprintf(file, " : %s\n", compiler->eeGetMethodFullName(compiler->info.compCompHnd));
+#endif
+
+ fprintf(file, "----------\n");
+
+ for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ if (block->bbNum > bbNumMaxBeforeResolution)
+ {
+ continue;
+ }
+
+ unsigned spillCount = blockInfo[block->bbNum].spillCount;
+ unsigned copyRegCount = blockInfo[block->bbNum].copyRegCount;
+ unsigned resolutionMovCount = blockInfo[block->bbNum].resolutionMovCount;
+ unsigned splitEdgeCount = blockInfo[block->bbNum].splitEdgeCount;
+
+ if (spillCount != 0 || copyRegCount != 0 || resolutionMovCount != 0 || splitEdgeCount != 0)
+ {
+ fprintf(file, "BB%02u [%8d]: ", block->bbNum, block->bbWeight);
+ fprintf(file, "SpillCount = %d, ResolutionMovs = %d, SplitEdges = %d, CopyReg = %d\n", spillCount,
+ resolutionMovCount, splitEdgeCount, copyRegCount);
+ }
+
+ sumSpillCount += spillCount;
+ sumCopyRegCount += copyRegCount;
+ sumResolutionMovCount += resolutionMovCount;
+ sumSplitEdgeCount += splitEdgeCount;
+
+ wtdSpillCount += (UINT64)spillCount * block->bbWeight;
+ wtdCopyRegCount += (UINT64)copyRegCount * block->bbWeight;
+ wtdResolutionMovCount += (UINT64)resolutionMovCount * block->bbWeight;
+ }
+
+ fprintf(file, "Total Spill Count: %d Weighted: %I64u\n", sumSpillCount, wtdSpillCount);
+ fprintf(file, "Total CopyReg Count: %d Weighted: %I64u\n", sumCopyRegCount, wtdCopyRegCount);
+ fprintf(file, "Total ResolutionMov Count: %d Weighted: %I64u\n", sumResolutionMovCount, wtdResolutionMovCount);
+ fprintf(file, "Total number of split edges: %d\n", sumSplitEdgeCount);
+
+ // compute total number of spill temps created
+ unsigned numSpillTemps = 0;
+ for (int i = 0; i < TYP_COUNT; i++)
+ {
+ numSpillTemps += maxSpill[i];
+ }
+ fprintf(file, "Total Number of spill temps created: %d\n\n", numSpillTemps);
+}
+#endif // TRACK_LSRA_STATS
+
#ifdef DEBUG
void dumpRegMask(regMaskTP regs)
{
@@ -9645,6 +10193,11 @@ void RefPosition::dump()
{
printf(" outOfOrder");
}
+
+ if (this->AllocateIfProfitable())
+ {
+ printf(" regOptional");
+ }
printf(">\n");
}
@@ -11329,9 +11882,18 @@ void LinearScan::verifyFinalAllocation()
{
if (VERBOSE)
{
+ // If refPos is marked as copyReg, then the reg that is spilled
+ // is the homeReg of the interval not the reg currently assigned
+ // to refPos.
+ regNumber spillReg = regNum;
+ if (currentRefPosition->copyReg)
+ {
+ assert(interval != nullptr);
+ spillReg = interval->physReg;
+ }
dumpRegRecords();
dumpEmptyRefPosition();
- printf("Spill %-4s ", getRegName(regNum));
+ printf("Spill %-4s ", getRegName(spillReg));
}
}
else if (currentRefPosition->copyReg)
@@ -11392,15 +11954,14 @@ void LinearScan::verifyFinalAllocation()
interval->physReg = REG_NA;
interval->assignedReg = nullptr;
- // regRegcord could be null if RefPosition is to be allocated a
- // reg only if profitable.
+ // regRegcord could be null if the RefPosition does not require a register.
if (regRecord != nullptr)
{
regRecord->assignedInterval = nullptr;
}
else
{
- assert(currentRefPosition->AllocateIfProfitable());
+ assert(!currentRefPosition->RequiresRegister());
}
}
}
@@ -11506,6 +12067,8 @@ void LinearScan::verifyResolutionMove(GenTree* resolutionMove, LsraLocation curr
assert(leftInterval->physReg == leftRegNum && rightInterval->physReg == rightRegNum);
leftInterval->physReg = rightRegNum;
rightInterval->physReg = leftRegNum;
+ leftInterval->assignedReg = &physRegs[rightRegNum];
+ rightInterval->assignedReg = &physRegs[leftRegNum];
physRegs[rightRegNum].assignedInterval = leftInterval;
physRegs[leftRegNum].assignedInterval = rightInterval;
if (VERBOSE)
diff --git a/src/jit/lsra.h b/src/jit/lsra.h
index a3c41fe1e3..c8a3fb4e24 100644
--- a/src/jit/lsra.h
+++ b/src/jit/lsra.h
@@ -73,6 +73,25 @@ struct LsraBlockInfo
unsigned int predBBNum;
bool hasCriticalInEdge;
bool hasCriticalOutEdge;
+
+#if TRACK_LSRA_STATS
+ // Per block maintained LSRA statistics.
+
+ // Number of spills of local vars or tree temps in this basic block.
+ unsigned spillCount;
+
+ // Number of GT_COPY nodes inserted in this basic block while allocating regs.
+ // Note that GT_COPY nodes are also inserted as part of basic block boundary
+ // resolution, which are accounted against resolutionMovCount but not
+ // against copyRegCount.
+ unsigned copyRegCount;
+
+ // Number of resolution moves inserted in this basic block.
+ unsigned resolutionMovCount;
+
+ // Number of critical edges from this block that are split.
+ unsigned splitEdgeCount;
+#endif // TRACK_LSRA_STATS
};
// This is sort of a bit mask
@@ -504,6 +523,8 @@ private:
{
return (LsraStressLimitRegs)(lsraStressMask & LSRA_LIMIT_MASK);
}
+
+ regMaskTP getConstrainedRegMask(regMaskTP regMaskActual, regMaskTP regMaskConstrain, unsigned minRegCount);
regMaskTP stressLimitRegs(RefPosition* refPosition, regMaskTP mask);
// This controls the heuristics used to select registers
@@ -572,7 +593,7 @@ private:
regNumber rotateBlockStartLocation(Interval* interval, regNumber targetReg, regMaskTP availableRegs);
// This controls whether we always insert a GT_RELOAD instruction after a spill
- // Note that this can be combined with LsraSpillAlways (or not)
+ // Note that this can be combined with LSRA_SPILL_ALWAYS (or not)
enum LsraReload{LSRA_NO_RELOAD_IF_SAME = 0, LSRA_ALWAYS_INSERT_RELOAD = 0x400, LSRA_RELOAD_MASK = 0x400};
LsraReload getLsraReload()
{
@@ -769,11 +790,19 @@ private:
regMaskTP getDefCandidates(GenTree* tree);
var_types getDefType(GenTree* tree);
- RefPosition* defineNewInternalTemp(GenTree* tree, RegisterType regType, LsraLocation currentLoc, regMaskTP regMask);
+ RefPosition* defineNewInternalTemp(GenTree* tree,
+ RegisterType regType,
+ LsraLocation currentLoc,
+ regMaskTP regMask DEBUGARG(unsigned minRegCandidateCount));
- int buildInternalRegisterDefsForNode(GenTree* tree, LsraLocation currentLoc, RefPosition* defs[]);
+ int buildInternalRegisterDefsForNode(GenTree* tree,
+ LsraLocation currentLoc,
+ RefPosition* defs[] DEBUGARG(unsigned minRegCandidateCount));
- void buildInternalRegisterUsesForNode(GenTree* tree, LsraLocation currentLoc, RefPosition* defs[], int total);
+ void buildInternalRegisterUsesForNode(GenTree* tree,
+ LsraLocation currentLoc,
+ RefPosition* defs[],
+ int total DEBUGARG(unsigned minRegCandidateCount));
void resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosition* currentRefPosition);
@@ -824,7 +853,7 @@ private:
RefType theRefType,
GenTree* theTreeNode,
regMaskTP mask,
- unsigned multiRegIdx = 0);
+ unsigned multiRegIdx = 0 DEBUGARG(unsigned minRegCandidateCount = 1));
RefPosition* newRefPosition(
regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask);
@@ -864,6 +893,8 @@ private:
unassignPhysReg(getRegisterRecord(reg), nullptr);
}
+ void setIntervalAsSpilled(Interval* interval);
+ void setIntervalAsSplit(Interval* interval);
void spillInterval(Interval* interval, RefPosition* fromRefPosition, RefPosition* toRefPosition);
void spillGCRefs(RefPosition* killRefPosition);
@@ -936,11 +967,8 @@ private:
char* operandString,
unsigned operandStringLength);
void lsraDispNode(GenTreePtr tree, LsraTupleDumpMode mode, bool hasDest);
- void DumpOperandDefs(GenTree* operand,
- bool& first,
- LsraTupleDumpMode mode,
- char* operandString,
- const unsigned operandStringLength);
+ void DumpOperandDefs(
+ GenTree* operand, bool& first, LsraTupleDumpMode mode, char* operandString, const unsigned operandStringLength);
void TupleStyleDump(LsraTupleDumpMode mode);
bool dumpTerse;
@@ -1020,6 +1048,20 @@ private:
void validateIntervals();
#endif // DEBUG
+#if TRACK_LSRA_STATS
+ enum LsraStat{
+ LSRA_STAT_SPILL, LSRA_STAT_COPY_REG, LSRA_STAT_RESOLUTION_MOV, LSRA_STAT_SPLIT_EDGE,
+ };
+
+ void updateLsraStat(LsraStat stat, unsigned currentBBNum);
+
+ void dumpLsraStats(FILE* file);
+
+#define INTRACK_STATS(x) x
+#else // !TRACK_LSRA_STATS
+#define INTRACK_STATS(x)
+#endif // !TRACK_LSRA_STATS
+
Compiler* compiler;
private:
@@ -1066,6 +1108,10 @@ private:
return BlockSetOps::IsMember(compiler, bbVisitedSet, block->bbNum);
}
+#if DOUBLE_ALIGN
+ bool doDoubleAlign;
+#endif
+
// A map from bbNum to the block information used during register allocation.
LsraBlockInfo* blockInfo;
BasicBlock* findPredBlockForLiveIn(BasicBlock* block, BasicBlock* prevBlock DEBUGARG(bool* pPredBlockIsAllocated));
@@ -1092,6 +1138,8 @@ private:
unsigned int bbSeqCount;
// The Location of the start of the current block.
LsraLocation curBBStartLocation;
+ // True if the method contains any critical edges.
+ bool hasCriticalEdges;
// Ordered list of RefPositions
RefPositionList refPositions;
@@ -1111,6 +1159,12 @@ private:
// Current set of live tracked vars, used during building of RefPositions to determine whether
// to preference to callee-save
VARSET_TP currentLiveVars;
+ // Set of variables that may require resolution across an edge.
+ // This is first constructed during interval building, to contain all the lclVars that are live at BB edges.
+ // Then, any lclVar that is always in the same register is removed from the set.
+ VARSET_TP resolutionCandidateVars;
+ // This set contains all the lclVars that are ever spilled or split.
+ VARSET_TP splitOrSpilledVars;
// Set of floating point variables to consider for callee-save registers.
VARSET_TP fpCalleeSaveCandidateVars;
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
@@ -1382,6 +1436,7 @@ public:
, delayRegFree(false)
, outOfOrder(false)
#ifdef DEBUG
+ , minRegCandidateCount(1)
, rpNum(0)
#endif
{
@@ -1555,9 +1610,15 @@ public:
}
#ifdef DEBUG
- unsigned rpNum; // The unique RefPosition number, equal to its index in the refPositions list. Only used for
- // debugging dumps.
-#endif // DEBUG
+ // Minimum number registers that needs to be ensured while
+ // constraining candidates for this ref position under
+ // LSRA stress.
+ unsigned minRegCandidateCount;
+
+ // The unique RefPosition number, equal to its index in the
+ // refPositions list. Only used for debugging dumps.
+ unsigned rpNum;
+#endif // DEBUG
bool isIntervalRef()
{
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp
index 00df17baa0..678bb34c54 100644
--- a/src/jit/morph.cpp
+++ b/src/jit/morph.cpp
@@ -204,6 +204,9 @@ GenTreePtr Compiler::fgMorphCast(GenTreePtr tree)
{
case TYP_INT:
#ifdef _TARGET_X86_ // there is no rounding convert to integer instruction on ARM or x64 so skip this
+#ifdef LEGACY_BACKEND
+ // the RyuJIT backend does not use the x87 FPU and therefore
+ // does not support folding the cast conv.i4(round.d(d))
if ((oper->gtOper == GT_INTRINSIC) &&
(oper->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round))
{
@@ -212,7 +215,9 @@ GenTreePtr Compiler::fgMorphCast(GenTreePtr tree)
return fgMorphTree(oper);
}
// if SSE2 is not enabled, we need the helper
- else if (!opts.compCanUseSSE2)
+ else
+#endif // LEGACY_BACKEND
+ if (!opts.compCanUseSSE2)
{
return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper);
}
@@ -360,8 +365,17 @@ GenTreePtr Compiler::fgMorphCast(GenTreePtr tree)
oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG);
oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
tree->gtFlags &= ~GTF_UNSIGNED;
+#ifndef LEGACY_BACKEND
+ return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
+#endif
}
}
+#ifndef LEGACY_BACKEND
+ else if (((tree->gtFlags & GTF_UNSIGNED) == 0) && (srcType == TYP_LONG) && varTypeIsFloating(dstType))
+ {
+ return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
+ }
+#endif
#endif //_TARGET_XARCH_
else if (varTypeIsGC(srcType) != varTypeIsGC(dstType))
{
@@ -1010,12 +1024,12 @@ fgArgInfo::fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall)
{
/* Get hold of the next argument values for the oldCall and newCall */
- assert(newArgs->IsList());
+ assert(newArgs->OperIsList());
newCurr = newArgs->Current();
newArgs = newArgs->Rest();
- assert(oldArgs->IsList());
+ assert(oldArgs->OperIsList());
oldCurr = oldArgs->Current();
oldArgs = oldArgs->Rest();
@@ -1047,6 +1061,8 @@ fgArgInfo::fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall)
argCount = oldArgInfo->argCount;
nextSlotNum = oldArgInfo->nextSlotNum;
+ hasRegArgs = oldArgInfo->hasRegArgs;
+ hasStackArgs = oldArgInfo->hasStackArgs;
argsComplete = true;
argsSorted = true;
}
@@ -1188,7 +1204,7 @@ fgArgTabEntry* fgArgInfo::RemorphRegArg(
GenTreePtr argx;
if (curArgTabEntry->parent != nullptr)
{
- assert(curArgTabEntry->parent->IsList());
+ assert(curArgTabEntry->parent->OperIsList());
argx = curArgTabEntry->parent->Current();
isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
}
@@ -1255,7 +1271,7 @@ void fgArgInfo::RemorphStkArg(
if (curArgTabEntry->parent != nullptr)
{
- assert(curArgTabEntry->parent->IsList());
+ assert(curArgTabEntry->parent->OperIsList());
argx = curArgTabEntry->parent->Current();
isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
}
@@ -1283,7 +1299,7 @@ void fgArgInfo::RemorphStkArg(
assert(curArgTabEntry->numSlots == numSlots);
assert(curArgTabEntry->alignment == alignment);
assert(curArgTabEntry->parent == parent);
- assert(parent->IsList());
+ assert(parent->OperIsList());
#if FEATURE_FIXED_OUT_ARGS
if (curArgTabEntry->node != node)
@@ -1512,7 +1528,7 @@ void fgArgInfo::ArgsComplete()
#ifndef LEGACY_BACKEND
#if FEATURE_MULTIREG_ARGS
- // For RyuJIT backend we will expand a Multireg arg into a GT_LIST
+ // For RyuJIT backend we will expand a Multireg arg into a GT_FIELD_LIST
// with multiple indirections, so here we consider spilling it into a tmp LclVar.
//
// Note that Arm32 is a LEGACY_BACKEND and it defines FEATURE_MULTIREG_ARGS
@@ -2364,7 +2380,7 @@ void fgArgInfo::EvalArgsToTemps()
{
GenTreePtr parent = curArgTabEntry->parent;
/* a normal argument from the list */
- noway_assert(parent->IsList());
+ noway_assert(parent->OperIsList());
noway_assert(parent->gtOp.gtOp1 == argx);
parent->gtOp.gtOp1 = setupArg;
@@ -2387,7 +2403,7 @@ void fgArgInfo::EvalArgsToTemps()
}
else
{
- noway_assert(tmpRegArgNext->IsList());
+ noway_assert(tmpRegArgNext->OperIsList());
noway_assert(tmpRegArgNext->Current());
tmpRegArgNext->gtOp.gtOp2 = compiler->gtNewArgList(defArg);
tmpRegArgNext = tmpRegArgNext->Rest();
@@ -2603,7 +2619,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
unsigned argSlots = 0;
unsigned nonRegPassedStructSlots = 0;
- bool lateArgsComputed = (call->gtCallLateArgs != nullptr);
+ bool reMorphing = call->AreArgsComplete();
bool callHasRetBuffArg = call->HasRetBufArg();
#ifndef _TARGET_X86_ // i.e. _TARGET_AMD64_ or _TARGET_ARM_
@@ -2731,7 +2747,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
// Process the late arguments (which were determined by a previous caller).
// Do this before resetting fgPtrArgCntCur as fgMorphTree(call->gtCallLateArgs)
// may need to refer to it.
- if (lateArgsComputed)
+ if (reMorphing)
{
// We need to reMorph the gtCallLateArgs early since that is what triggers
// the expression folding and we need to have the final folded gtCallLateArgs
@@ -2745,14 +2761,17 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
//
// Since the late arguments are evaluated last we have pushed all of the
// other arguments on the stack before we evaluate these late arguments,
- // so we record the stack depth on the first morph call when lateArgsComputed
+ // so we record the stack depth on the first morph call when reMorphing
// was false (via RecordStkLevel) and then retrieve that value here (via RetrieveStkLevel)
//
unsigned callStkLevel = call->fgArgInfo->RetrieveStkLevel();
- fgPtrArgCntCur += callStkLevel;
- call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList();
- flagsSummary |= call->gtCallLateArgs->gtFlags;
- fgPtrArgCntCur -= callStkLevel;
+ if (call->gtCallLateArgs != nullptr)
+ {
+ fgPtrArgCntCur += callStkLevel;
+ call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList();
+ flagsSummary |= call->gtCallLateArgs->gtFlags;
+ fgPtrArgCntCur -= callStkLevel;
+ }
assert(call->fgArgInfo != nullptr);
call->fgArgInfo->RemorphReset();
@@ -2780,7 +2799,8 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
// *********** END NOTE *********
CLANG_FORMAT_COMMENT_ANCHOR;
-#if !defined(LEGACY_BACKEND) && defined(_TARGET_X86_)
+#if !defined(LEGACY_BACKEND)
+#if defined(_TARGET_X86_)
// The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper has a custom calling convention. Set the argument registers
// correctly here.
if (call->IsHelperCall(this, CORINFO_HELP_INIT_PINVOKE_FRAME))
@@ -2792,21 +2812,20 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
}
// The x86 shift helpers have custom calling conventions and expect the lo part of the long to be in EAX and the
// hi part to be in EDX. This sets the argument registers up correctly.
- else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) || call->IsHelperCall(this, CORINFO_HELP_LRSZ))
+ else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) ||
+ call->IsHelperCall(this, CORINFO_HELP_LRSZ))
{
GenTreeArgList* args = call->gtCallArgs;
- GenTree* arg1 = args->Current();
+ GenTree* arg1 = args->Current();
assert(arg1 != nullptr);
nonStandardArgs.Add(arg1, REG_LNGARG_LO);
- args = args->Rest();
+ args = args->Rest();
GenTree* arg2 = args->Current();
assert(arg2 != nullptr);
nonStandardArgs.Add(arg2, REG_LNGARG_HI);
}
-#endif // !defined(LEGACY_BACKEND) && defined(_TARGET_X86_)
-
-#if !defined(LEGACY_BACKEND) && !defined(_TARGET_X86_)
+#else // !defined(_TARGET_X86_)
// TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed.
// If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling
// convention for x86/SSE.
@@ -2817,7 +2836,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
{
args = call->gtCallArgs;
assert(args != nullptr);
- assert(args->IsList());
+ assert(args->OperIsList());
argx = call->gtCallArgs->Current();
@@ -2871,21 +2890,32 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
nonStandardArgs.Add(arg, REG_VIRTUAL_STUB_PARAM);
}
- else if (call->gtCallType == CT_INDIRECT && call->gtCallCookie)
+ else
+#endif // defined(_TARGET_X86_)
+ if (call->gtCallType == CT_INDIRECT && (call->gtCallCookie != nullptr))
{
assert(!call->IsUnmanaged());
- // put cookie into R11
GenTree* arg = call->gtCallCookie;
noway_assert(arg != nullptr);
call->gtCallCookie = nullptr;
+#if defined(_TARGET_X86_)
+ // x86 passes the cookie on the stack as the final argument to the call.
+ GenTreeArgList** insertionPoint = &call->gtCallArgs;
+ for (; *insertionPoint != nullptr; insertionPoint = &(*insertionPoint)->Rest())
+ {
+ }
+ *insertionPoint = gtNewListNode(arg, nullptr);
+#else // !defined(_TARGET_X86_)
+ // All other architectures pass the cookie in a register.
call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
- numArgs++;
+#endif // defined(_TARGET_X86_)
nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM);
+ numArgs++;
- // put destination into R10
+ // put destination into R10/EAX
arg = gtClone(call->gtCallAddr, true);
call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
numArgs++;
@@ -2896,7 +2926,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
call->gtCallType = CT_HELPER;
call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI);
}
-#endif // !defined(LEGACY_BACKEND) && !defined(_TARGET_X86_)
+#endif // !defined(LEGACY_BACKEND)
// Allocate the fgArgInfo for the call node;
//
@@ -2929,7 +2959,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
/* We must fill in or update the argInfo table */
- if (lateArgsComputed)
+ if (reMorphing)
{
/* this is a register argument - possibly update it in the table */
call->fgArgInfo->RemorphRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1);
@@ -3075,7 +3105,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
*parentArgx = argx;
flagsSummary |= argx->gtFlags;
- assert(args->IsList());
+ assert(args->OperIsList());
assert(argx == args->Current());
#ifndef LEGACY_BACKEND
@@ -3114,13 +3144,15 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
compFloatingPointUsed = true;
}
- unsigned size = 0;
- CORINFO_CLASS_HANDLE copyBlkClass = nullptr;
- bool isRegArg = false;
+ unsigned size = 0;
+ CORINFO_CLASS_HANDLE copyBlkClass = nullptr;
+ bool isRegArg = false;
+ bool isNonStandard = false;
+ regNumber nonStdRegNum = REG_NA;
fgArgTabEntryPtr argEntry = nullptr;
- if (lateArgsComputed)
+ if (reMorphing)
{
argEntry = gtArgEntryByArgNum(call, argIndex);
}
@@ -3128,7 +3160,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
#ifdef _TARGET_ARM_
bool passUsingIntRegs;
- if (lateArgsComputed)
+ if (reMorphing)
{
passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
passUsingIntRegs = isValidIntArgReg(argEntry->regNum);
@@ -3179,7 +3211,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
#elif defined(_TARGET_ARM64_)
- if (lateArgsComputed)
+ if (reMorphing)
{
passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
}
@@ -3189,8 +3221,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
}
#elif defined(_TARGET_AMD64_)
-#if defined(UNIX_AMD64_ABI)
- if (lateArgsComputed)
+ if (reMorphing)
{
passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
}
@@ -3198,9 +3229,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
{
passUsingFloatRegs = varTypeIsFloating(argx);
}
-#else // WINDOWS_AMD64_ABI
- passUsingFloatRegs = varTypeIsFloating(argx);
-#endif // !UNIX_AMD64_ABI
#elif defined(_TARGET_X86_)
passUsingFloatRegs = false;
@@ -3216,7 +3244,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
bool isStructArg = varTypeIsStruct(argx);
- if (lateArgsComputed)
+ if (reMorphing)
{
#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
// Get the struct description for the already completed struct argument.
@@ -3260,7 +3288,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
// This size has now been computed
assert(size != 0);
}
- else // !lateArgsComputed
+ else // !reMorphing
{
//
// Figure out the size of the argument. This is either in number of registers, or number of
@@ -3287,7 +3315,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
}
}
#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
- size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
+ size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
#elif defined(_TARGET_ARM64_)
if (isStructArg)
@@ -3379,7 +3407,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
GenTreePtr argObj = argx;
GenTreePtr* parentOfArgObj = parentArgx;
- assert(args->IsList());
+ assert(args->OperIsList());
assert(argx == args->Current());
/* The GT_OBJ may be be a child of a GT_COMMA */
@@ -3686,11 +3714,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
// the obj reading memory past the end of the valuetype
CLANG_FORMAT_COMMENT_ANCHOR;
-#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
- // TODO-X86-CQ: [1091733] Revisit for small structs, we should use push instruction
- copyBlkClass = objClass;
- size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
-#else // !defined(_TARGET_X86_) || defined(LEGACY_BACKEND)
if (roundupSize > originalSize)
{
copyBlkClass = objClass;
@@ -3705,7 +3728,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
}
size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
-#endif // !defined(_TARGET_X86_) || defined(LEGACY_BACKEND)
}
}
}
@@ -3841,7 +3863,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
}
}
#else // !defined(UNIX_AMD64_ABI)
- isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
+ isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
#endif // !defined(UNIX_AMD64_ABI)
#endif // _TARGET_ARM_
}
@@ -3850,8 +3872,19 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
isRegArg = false;
}
-#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
- if (call->IsTailCallViaHelper())
+#ifndef LEGACY_BACKEND
+ // If there are nonstandard args (outside the calling convention) they were inserted above
+ // and noted them in a table so we can recognize them here and build their argInfo.
+ //
+ // They should not affect the placement of any other args or stack space required.
+ // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls.
+ isNonStandard = nonStandardArgs.FindReg(argx, &nonStdRegNum);
+ if (isNonStandard && (nonStdRegNum == REG_STK))
+ {
+ isRegArg = false;
+ }
+#if defined(_TARGET_X86_)
+ else if (call->IsTailCallViaHelper())
{
// We have already (before calling fgMorphArgs()) appended the 4 special args
// required by the x86 tailcall helper. These args are required to go on the
@@ -3862,9 +3895,9 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
isRegArg = false;
}
}
-#endif // defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
-
- } // end !lateArgsComputed
+#endif // defined(_TARGET_X86_)
+#endif // !LEGACY_BACKEND
+ } // end !reMorphing
//
// Now we know if the argument goes in registers or not and how big it is,
@@ -3943,23 +3976,17 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
#endif
fgArgTabEntryPtr newArgEntry;
- if (lateArgsComputed)
+ if (reMorphing)
{
// This is a register argument - possibly update it in the table
newArgEntry = call->fgArgInfo->RemorphRegArg(argIndex, argx, args, nextRegNum, size, argAlign);
}
else
{
- bool isNonStandard = false;
-
-#ifndef LEGACY_BACKEND
- // If there are nonstandard args (outside the calling convention) they were inserted above
- // and noted them in a table so we can recognize them here and build their argInfo.
- //
- // They should not affect the placement of any other args or stack space required.
- // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls.
- isNonStandard = nonStandardArgs.FindReg(argx, &nextRegNum);
-#endif // !LEGACY_BACKEND
+ if (isNonStandard)
+ {
+ nextRegNum = nonStdRegNum;
+ }
// This is a register argument - put it in the table
newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign
@@ -4053,7 +4080,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
// If the register arguments have not been determined then we must fill in the argInfo
- if (lateArgsComputed)
+ if (reMorphing)
{
// This is a stack argument - possibly update it in the table
call->fgArgInfo->RemorphStkArg(argIndex, argx, args, size, argAlign);
@@ -4068,14 +4095,14 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
if (copyBlkClass != NO_CLASS_HANDLE)
{
- noway_assert(!lateArgsComputed);
+ noway_assert(!reMorphing);
fgMakeOutgoingStructArgCopy(call, args, argIndex,
copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(&structDesc));
// This can cause a GTF_EXCEPT flag to be set.
// TODO-CQ: Fix the cases where this happens. We shouldn't be adding any new flags.
// This currently occurs in the case where we are re-morphing the args on x86/RyuJIT, and
- // there are no register arguments. Then lateArgsComputed is never true, so we keep re-copying
+ // there are no register arguments. Then reMorphing is never true, so we keep re-copying
// any struct arguments.
// i.e. assert(((call->gtFlags & GTF_EXCEPT) != 0) || ((args->Current()->gtFlags & GTF_EXCEPT) == 0)
flagsSummary |= (args->Current()->gtFlags & GTF_EXCEPT);
@@ -4088,10 +4115,21 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
#ifndef LEGACY_BACKEND
if (argx->gtOper == GT_MKREFANY)
{
- NYI_X86("MKREFANY");
-
// 'Lower' the MKREFANY tree and insert it.
- noway_assert(!lateArgsComputed);
+ noway_assert(!reMorphing);
+
+#ifdef _TARGET_X86_
+
+ // Build the mkrefany as a GT_FIELD_LIST
+ GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST)
+ GenTreeFieldList(argx->gtOp.gtOp1, offsetof(CORINFO_RefAny, dataPtr), TYP_BYREF, nullptr);
+ (void)new (this, GT_FIELD_LIST)
+ GenTreeFieldList(argx->gtOp.gtOp2, offsetof(CORINFO_RefAny, type), TYP_I_IMPL, fieldList);
+ fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
+ fp->node = fieldList;
+ args->gtOp.gtOp1 = fieldList;
+
+#else // !_TARGET_X86_
// Get a new temp
// Here we don't need unsafe value cls check since the addr of temp is used only in mkrefany
@@ -4117,9 +4155,47 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
// EvalArgsToTemps will cause tmp to actually get loaded as the argument
call->fgArgInfo->EvalToTmp(argIndex, tmp, asg);
lvaSetVarAddrExposed(tmp);
+#endif // !_TARGET_X86_
}
#endif // !LEGACY_BACKEND
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+ if (isStructArg)
+ {
+ GenTree* lclNode = fgIsIndirOfAddrOfLocal(argx);
+ if ((lclNode != nullptr) &&
+ (lvaGetPromotionType(lclNode->AsLclVarCommon()->gtLclNum) == Compiler::PROMOTION_TYPE_INDEPENDENT))
+ {
+ // Make a GT_FIELD_LIST of the field lclVars.
+ GenTreeLclVarCommon* lcl = lclNode->AsLclVarCommon();
+ LclVarDsc* varDsc = &(lvaTable[lcl->gtLclNum]);
+ GenTreeFieldList* fieldList = nullptr;
+ for (unsigned fieldLclNum = varDsc->lvFieldLclStart;
+ fieldLclNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++fieldLclNum)
+ {
+ LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum];
+ if (fieldList == nullptr)
+ {
+ lcl->SetLclNum(fieldLclNum);
+ lcl->ChangeOper(GT_LCL_VAR);
+ lcl->gtType = fieldVarDsc->lvType;
+ fieldList = new (this, GT_FIELD_LIST)
+ GenTreeFieldList(lcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, nullptr);
+ fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
+ fp->node = fieldList;
+ args->gtOp.gtOp1 = fieldList;
+ }
+ else
+ {
+ GenTree* fieldLcl = gtNewLclvNode(fieldLclNum, fieldVarDsc->lvType);
+ fieldList = new (this, GT_FIELD_LIST)
+ GenTreeFieldList(fieldLcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, fieldList);
+ }
+ }
+ }
+ }
+#endif // defined (_TARGET_X86_) && !defined(LEGACY_BACKEND)
+
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
if (isStructArg && !isRegArg)
{
@@ -4132,7 +4208,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
}
} // end foreach argument loop
- if (!lateArgsComputed)
+ if (!reMorphing)
{
call->fgArgInfo->ArgsComplete();
#ifdef LEGACY_BACKEND
@@ -4240,11 +4316,11 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
// For UNIX_AMD64, the condition without hasStackArgCopy cannot catch
// all cases of fgMakeOutgoingStructArgCopy() being called. hasStackArgCopy
// is added to make sure to call EvalArgsToTemp.
- if (!lateArgsComputed && (call->fgArgInfo->HasRegArgs()
+ if (!reMorphing && (call->fgArgInfo->HasRegArgs()
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- || hasStackArgCopy
+ || hasStackArgCopy
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- ))
+ ))
{
// This is the first time that we morph this call AND it has register arguments.
// Follow into the code below and do the 'defer or eval to temp' analysis.
@@ -4271,7 +4347,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
// In the future we can migrate UNIX_AMD64 to use this
// method instead of fgMorphSystemVStructArgs
- // We only build GT_LISTs for MultiReg structs for the RyuJIT backend
+ // We only build GT_FIELD_LISTs for MultiReg structs for the RyuJIT backend
if (hasMultiregStructArgs)
{
fgMorphMultiregStructArgs(call);
@@ -4334,7 +4410,7 @@ void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgumen
{
for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
{
- assert(list->IsList());
+ assert(list->OperIsList());
GenTreePtr argNode = list->Current();
if (argx == argNode)
@@ -4355,7 +4431,7 @@ void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgumen
{
var_types originalType = type;
// If we have already processed the arg...
- if (arg->OperGet() == GT_LIST && varTypeIsStruct(arg))
+ if (arg->OperGet() == GT_FIELD_LIST && varTypeIsStruct(arg))
{
continue;
}
@@ -4386,6 +4462,16 @@ void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgumen
// Create LCL_FLD for each eightbyte.
argListCreated = true;
+ // First eightbyte.
+ arg->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
+ arg->gtType =
+ GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
+ fgEntryPtr->structDesc.eightByteSizes[0]);
+ GenTreeFieldList* fieldList =
+ new (this, GT_FIELD_LIST) GenTreeFieldList(arg, 0, originalType, nullptr);
+ fieldList->gtType = originalType; // Preserve the type. It is a special case.
+ arg = fieldList;
+
// Second eightbyte.
GenTreeLclFld* newLclField = new (this, GT_LCL_FLD)
GenTreeLclFld(GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc
@@ -4393,17 +4479,9 @@ void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgumen
fgEntryPtr->structDesc.eightByteSizes[1]),
lclCommon->gtLclNum, fgEntryPtr->structDesc.eightByteOffsets[1]);
- GenTreeArgList* aggregate = gtNewAggregate(newLclField);
- aggregate->gtType = originalType; // Preserve the type. It is a special case.
- newLclField->gtFieldSeq = FieldSeqStore::NotAField();
-
- // First field
- arg->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
- arg->gtType =
- GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
- fgEntryPtr->structDesc.eightByteSizes[0]);
- arg = aggregate->Prepend(this, arg);
- arg->gtType = type; // Preserve the type. It is a special case.
+ fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(newLclField, 0, originalType, fieldList);
+ fieldList->gtType = originalType; // Preserve the type. It is a special case.
+ newLclField->gtFieldSeq = FieldSeqStore::NotAField();
}
else
{
@@ -4450,7 +4528,7 @@ void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgumen
{
for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
{
- assert(list->IsList());
+ assert(list->OperIsList());
GenTreePtr argNode = list->Current();
if (argx == argNode)
@@ -4490,8 +4568,8 @@ void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgumen
//
// Notes:
// We only call fgMorphMultiregStructArg for the register passed TYP_STRUCT arguments.
-// The call to fgMorphMultiregStructArg will mutate the argument into the GT_LIST form
-// whicj is only used for register arguments.
+// The call to fgMorphMultiregStructArg will mutate the argument into the GT_FIELD_LIST form
+// which is only used for struct arguments.
// If this method fails to find any TYP_STRUCT arguments it will assert.
//
void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
@@ -4540,7 +4618,7 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
{
for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
{
- assert(list->IsList());
+ assert(list->OperIsList());
GenTreePtr argNode = list->Current();
if (argx == argNode)
@@ -4588,7 +4666,7 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
//-----------------------------------------------------------------------------
// fgMorphMultiregStructArg: Given a multireg TYP_STRUCT arg from a call argument list
-// Morph the argument into a set of GT_LIST nodes.
+// Morph the argument into a set of GT_FIELD_LIST nodes.
//
// Arguments:
// arg - A GenTree node containing a TYP_STRUCT arg that
@@ -4600,7 +4678,7 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
// for passing in multiple registers.
// If arg is a LclVar we check if it is struct promoted and has the right number of fields
// and if they are at the appropriate offsets we will use the struct promted fields
-// in the GT_LIST nodes that we create.
+// in the GT_FIELD_LIST nodes that we create.
// If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements
// we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct
// this also forces the struct to be stack allocated into the local frame.
@@ -4715,7 +4793,7 @@ GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr f
// We should still have a TYP_STRUCT
assert(argValue->TypeGet() == TYP_STRUCT);
- GenTreeArgList* newArg = nullptr;
+ GenTreeFieldList* newArg = nullptr;
// Are we passing a struct LclVar?
//
@@ -4817,9 +4895,10 @@ GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr f
// Create a new tree for 'arg'
// replace the existing LDOBJ(ADDR(LCLVAR))
- // with a LIST(LCLVAR-LO, LIST(LCLVAR-HI, nullptr))
+ // with a FIELD_LIST(LCLVAR-LO, FIELD_LIST(LCLVAR-HI, nullptr))
//
- newArg = gtNewAggregate(hiLclVar)->Prepend(this, loLclVar);
+ newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(loLclVar, 0, loType, nullptr);
+ (void)new (this, GT_FIELD_LIST) GenTreeFieldList(hiLclVar, TARGET_POINTER_SIZE, hiType, newArg);
}
}
}
@@ -4885,27 +4964,22 @@ GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr f
//
lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
- // Start building our list from the last element
- unsigned offset = lastOffset;
- unsigned inx = elemCount;
-
// Create a new tree for 'arg'
// replace the existing LDOBJ(ADDR(LCLVAR))
- // with a LIST(LCLFLD-LO, LIST(LCLFLD-HI, nullptr) ...)
+ // with a FIELD_LIST(LCLFLD-LO, FIELD_LIST(LCLFLD-HI, nullptr) ...)
//
- while (inx > 0)
+ unsigned offset = 0;
+ GenTreeFieldList* listEntry = nullptr;
+ for (unsigned inx = 0; inx < elemCount; inx++)
{
- inx--;
- offset -= elemSize;
+ elemSize = genTypeSize(type[inx]);
GenTreePtr nextLclFld = gtNewLclFldNode(varNum, type[inx], offset);
+ listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(nextLclFld, offset, type[inx], listEntry);
if (newArg == nullptr)
{
- newArg = gtNewAggregate(nextLclFld);
- }
- else
- {
- newArg = newArg->Prepend(this, nextLclFld);
+ newArg = listEntry;
}
+ offset += elemSize;
}
}
// Are we passing a GT_OBJ struct?
@@ -4918,17 +4992,14 @@ GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr f
// Create a new tree for 'arg'
// replace the existing LDOBJ(EXPR)
- // with a LIST(IND(EXPR), LIST(IND(EXPR+8), nullptr) ...)
+ // with a FIELD_LIST(IND(EXPR), FIELD_LIST(IND(EXPR+8), nullptr) ...)
//
- // Start building our list from the last element
- unsigned offset = structSize;
- unsigned inx = elemCount;
- while (inx > 0)
+ unsigned offset = 0;
+ GenTreeFieldList* listEntry = nullptr;
+ for (unsigned inx = 0; inx < elemCount; inx++)
{
- inx--;
- elemSize = genTypeSize(type[inx]);
- offset -= elemSize;
+ elemSize = genTypeSize(type[inx]);
GenTreePtr curAddr = baseAddr;
if (offset != 0)
{
@@ -4941,14 +5012,21 @@ GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr f
curAddr = baseAddr;
}
GenTreePtr curItem = gtNewOperNode(GT_IND, type[inx], curAddr);
- if (newArg == nullptr)
+
+ // For safety all GT_IND should have at least GT_GLOB_REF set.
+ curItem->gtFlags |= GTF_GLOB_REF;
+ if (fgAddrCouldBeNull(curItem))
{
- newArg = gtNewAggregate(curItem);
+ // This indirection can cause a GPF if the address could be null.
+ curItem->gtFlags |= GTF_EXCEPT;
}
- else
+
+ listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(curItem, offset, type[inx], listEntry);
+ if (newArg == nullptr)
{
- newArg = newArg->Prepend(this, curItem);
+ newArg = listEntry;
}
+ offset += elemSize;
}
}
}
@@ -5674,7 +5752,7 @@ GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree)
addr = gtNewOperNode(GT_ADD, TYP_BYREF, addr, cns);
#if SMALL_TREE_NODES
- assert(tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE);
+ assert((tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE) || GenTree::s_gtNodeSizes[GT_IND] == TREE_NODE_SZ_SMALL);
#endif
// Change the orginal GT_INDEX node into a GT_IND node
@@ -5847,7 +5925,15 @@ GenTreePtr Compiler::fgMorphStackArgForVarArgs(unsigned lclNum, var_types varTyp
lclOffs));
// Access the argument through the local
- GenTreePtr tree = gtNewOperNode(GT_IND, varType, ptrArg);
+ GenTreePtr tree;
+ if (varType == TYP_STRUCT)
+ {
+ tree = gtNewBlockVal(ptrArg, varDsc->lvExactSize);
+ }
+ else
+ {
+ tree = gtNewOperNode(GT_IND, varType, ptrArg);
+ }
tree->gtFlags |= GTF_IND_TGTANYWHERE;
if (varDsc->lvAddrExposed)
@@ -5884,8 +5970,14 @@ GenTreePtr Compiler::fgMorphLocalVar(GenTreePtr tree)
if (info.compIsVarArgs)
{
GenTreePtr newTree = fgMorphStackArgForVarArgs(lclNum, varType, 0);
- if (newTree != NULL)
+ if (newTree != nullptr)
+ {
+ if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0))
+ {
+ fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType);
+ }
return newTree;
+ }
}
#endif // _TARGET_X86_
@@ -6205,7 +6297,9 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
GenTreePtr baseOffset = gtNewIconEmbHndNode(tree->gtField.gtFieldLookup.addr, nullptr, GTF_ICON_FIELD_HDL);
if (tree->gtField.gtFieldLookup.accessType == IAT_PVALUE)
+ {
baseOffset = gtNewOperNode(GT_IND, TYP_I_IMPL, baseOffset);
+ }
addr =
gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr, baseOffset);
@@ -6483,8 +6577,8 @@ void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult)
// hanging a "nothing" node to it. Later the "nothing" node will be removed
// and the original GT_CALL tree will be picked up by the GT_RET_EXPR node.
- noway_assert(fgMorphStmt->gtStmt.gtStmtExpr == call);
- fgMorphStmt->gtStmt.gtStmtExpr = gtNewNothingNode();
+ noway_assert(fgMorphStmt->gtStmtExpr == call);
+ fgMorphStmt->gtStmtExpr = gtNewNothingNode();
}
// Clear the Inline Candidate flag so we can ensure later we tried
@@ -6662,7 +6756,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
{
nCalleeArgs++;
- assert(args->IsList());
+ assert(args->OperIsList());
GenTreePtr argx = args->gtOp.gtOp1;
if (varTypeIsStruct(argx))
@@ -6980,7 +7074,14 @@ void Compiler::fgMorphTailCall(GenTreeCall* call)
}
#endif // _TARGET_X86_
+#if defined(_TARGET_X86_)
+ // When targeting x86, the runtime requires that we perforrm a null check on the `this` argument before tail
+ // calling to a virtual dispatch stub. This requirement is a consequence of limitations in the runtime's
+ // ability to map an AV to a NullReferenceException if the AV occurs in a dispatch stub.
+ if (call->NeedsNullCheck() || call->IsVirtualStub())
+#else
if (call->NeedsNullCheck())
+#endif // defined(_TARGET_X86_)
{
// clone "this" if "this" has no side effects.
if ((thisPtr == nullptr) && !(objp->gtFlags & GTF_SIDE_EFFECT))
@@ -7668,17 +7769,39 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
}
#endif
- GenTreePtr stmtExpr = fgMorphStmt->gtStmt.gtStmtExpr;
+ GenTreePtr stmtExpr = fgMorphStmt->gtStmtExpr;
#ifdef DEBUG
// Tail call needs to be in one of the following IR forms
// Either a call stmt or
- // GT_RETURN(GT_CALL(..)) or
- // var = call
- noway_assert((stmtExpr->gtOper == GT_CALL && stmtExpr == call) ||
- (stmtExpr->gtOper == GT_RETURN &&
- (stmtExpr->gtOp.gtOp1 == call || stmtExpr->gtOp.gtOp1->gtOp.gtOp1 == call)) ||
- (stmtExpr->gtOper == GT_ASG && stmtExpr->gtOp.gtOp2 == call));
+ // GT_RETURN(GT_CALL(..)) or GT_RETURN(GT_CAST(GT_CALL(..)))
+ // var = GT_CALL(..) or var = (GT_CAST(GT_CALL(..)))
+ genTreeOps stmtOper = stmtExpr->gtOper;
+ if (stmtOper == GT_CALL)
+ {
+ noway_assert(stmtExpr == call);
+ }
+ else
+ {
+ noway_assert(stmtOper == GT_RETURN || stmtOper == GT_ASG);
+ GenTreePtr treeWithCall;
+ if (stmtOper == GT_RETURN)
+ {
+ treeWithCall = stmtExpr->gtGetOp1();
+ }
+ else
+ {
+ treeWithCall = stmtExpr->gtGetOp2();
+ }
+ if (treeWithCall->gtOper == GT_CAST)
+ {
+ noway_assert(treeWithCall->gtGetOp1() == call && !treeWithCall->gtOverflow());
+ }
+ else
+ {
+ noway_assert(treeWithCall == call);
+ }
+ }
#endif
// For void calls, we would have created a GT_CALL in the stmt list.
@@ -7687,7 +7810,7 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
// For debuggable code, it would be an assignment of the call to a temp
// We want to get rid of any of this extra trees, and just leave
// the call.
- GenTreePtr nextMorphStmt = fgMorphStmt->gtNext;
+ GenTreeStmt* nextMorphStmt = fgMorphStmt->gtNextStmt;
#ifdef _TARGET_AMD64_
// Legacy Jit64 Compat:
@@ -7703,46 +7826,46 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
if ((stmtExpr->gtOper == GT_CALL) || (stmtExpr->gtOper == GT_ASG))
{
// First delete all GT_NOPs after the call
- GenTreePtr morphStmtToRemove = nullptr;
+ GenTreeStmt* morphStmtToRemove = nullptr;
while (nextMorphStmt != nullptr)
{
- GenTreePtr nextStmtExpr = nextMorphStmt->gtStmt.gtStmtExpr;
+ GenTreePtr nextStmtExpr = nextMorphStmt->gtStmtExpr;
if (!nextStmtExpr->IsNothingNode())
{
break;
}
morphStmtToRemove = nextMorphStmt;
- nextMorphStmt = morphStmtToRemove->gtNext;
+ nextMorphStmt = morphStmtToRemove->gtNextStmt;
fgRemoveStmt(compCurBB, morphStmtToRemove);
}
// Check to see if there is a pop.
// Since tail call is honored, we can get rid of the stmt corresponding to pop.
- if (nextMorphStmt != nullptr && nextMorphStmt->gtStmt.gtStmtExpr->gtOper != GT_RETURN)
+ if (nextMorphStmt != nullptr && nextMorphStmt->gtStmtExpr->gtOper != GT_RETURN)
{
// Note that pop opcode may or may not result in a new stmt (for details see
// impImportBlockCode()). Hence, it is not possible to assert about the IR
// form generated by pop but pop tree must be side-effect free so that we can
// delete it safely.
- GenTreePtr popStmt = nextMorphStmt;
- nextMorphStmt = nextMorphStmt->gtNext;
+ GenTreeStmt* popStmt = nextMorphStmt;
+ nextMorphStmt = nextMorphStmt->gtNextStmt;
- noway_assert((popStmt->gtStmt.gtStmtExpr->gtFlags & GTF_ALL_EFFECT) == 0);
+ noway_assert((popStmt->gtStmtExpr->gtFlags & GTF_ALL_EFFECT) == 0);
fgRemoveStmt(compCurBB, popStmt);
}
// Next delete any GT_NOP nodes after pop
while (nextMorphStmt != nullptr)
{
- GenTreePtr nextStmtExpr = nextMorphStmt->gtStmt.gtStmtExpr;
+ GenTreePtr nextStmtExpr = nextMorphStmt->gtStmtExpr;
if (!nextStmtExpr->IsNothingNode())
{
break;
}
morphStmtToRemove = nextMorphStmt;
- nextMorphStmt = morphStmtToRemove->gtNext;
+ nextMorphStmt = morphStmtToRemove->gtNextStmt;
fgRemoveStmt(compCurBB, morphStmtToRemove);
}
}
@@ -7751,7 +7874,7 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
// Delete GT_RETURN if any
if (nextMorphStmt != nullptr)
{
- GenTreePtr retExpr = nextMorphStmt->gtStmt.gtStmtExpr;
+ GenTreePtr retExpr = nextMorphStmt->gtStmtExpr;
noway_assert(retExpr->gtOper == GT_RETURN);
// If var=call, then the next stmt must be a GT_RETURN(TYP_VOID) or GT_RETURN(var).
@@ -7766,7 +7889,7 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
fgRemoveStmt(compCurBB, nextMorphStmt);
}
- fgMorphStmt->gtStmt.gtStmtExpr = call;
+ fgMorphStmt->gtStmtExpr = call;
// Tail call via helper: The VM can't use return address hijacking if we're
// not going to return and the helper doesn't have enough info to safely poll,
@@ -7855,7 +7978,7 @@ NO_TAIL_CALL:
|| call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR)
#endif
) &&
- (call == fgMorphStmt->gtStmt.gtStmtExpr))
+ (call == fgMorphStmt->gtStmtExpr))
{
// This is call to CORINFO_HELP_VIRTUAL_FUNC_PTR with ignored result.
// Transform it into a null check.
@@ -8008,31 +8131,72 @@ NO_TAIL_CALL:
// This needs to be done after the arguments are morphed to ensure constant propagation has already taken place.
if ((call->gtCallType == CT_HELPER) && (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ARRADDR_ST)))
{
- GenTreePtr value = gtArgEntryByArgNum(call, 2)->node;
-
+ GenTree* value = gtArgEntryByArgNum(call, 2)->node;
if (value->IsIntegralConst(0))
{
assert(value->OperGet() == GT_CNS_INT);
- GenTreePtr arr = gtArgEntryByArgNum(call, 0)->node;
- GenTreePtr index = gtArgEntryByArgNum(call, 1)->node;
- arr = gtClone(arr, true);
- if (arr != nullptr)
+ GenTree* arr = gtArgEntryByArgNum(call, 0)->node;
+ GenTree* index = gtArgEntryByArgNum(call, 1)->node;
+
+ // Either or both of the array and index arguments may have been spilled to temps by `fgMorphArgs`. Copy
+ // the spill trees as well if necessary.
+ GenTreeOp* argSetup = nullptr;
+ for (GenTreeArgList* earlyArgs = call->gtCallArgs; earlyArgs != nullptr; earlyArgs = earlyArgs->Rest())
{
- index = gtClone(index, true);
- if (index != nullptr)
+ GenTree* const arg = earlyArgs->Current();
+ if (arg->OperGet() != GT_ASG)
{
- value = gtClone(value);
- noway_assert(value != nullptr);
+ continue;
+ }
+
+ assert(arg != arr);
+ assert(arg != index);
- GenTreePtr nullCheckedArr = impCheckForNullPointer(arr);
- GenTreePtr arrIndexNode = gtNewIndexRef(TYP_REF, nullCheckedArr, index);
- GenTreePtr arrStore = gtNewAssignNode(arrIndexNode, value);
- arrStore->gtFlags |= GTF_ASG;
+ arg->gtFlags &= ~GTF_LATE_ARG;
- return fgMorphTree(arrStore);
+ GenTree* op1 = argSetup;
+ if (op1 == nullptr)
+ {
+ op1 = gtNewNothingNode();
+#if DEBUG
+ op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif // DEBUG
}
+
+ argSetup = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, op1, arg);
+
+#if DEBUG
+ argSetup->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif // DEBUG
}
+
+#ifdef DEBUG
+ auto resetMorphedFlag = [](GenTree** slot, fgWalkData* data) -> fgWalkResult {
+ (*slot)->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
+ return WALK_CONTINUE;
+ };
+
+ fgWalkTreePost(&arr, resetMorphedFlag);
+ fgWalkTreePost(&index, resetMorphedFlag);
+ fgWalkTreePost(&value, resetMorphedFlag);
+#endif // DEBUG
+
+ GenTree* const nullCheckedArr = impCheckForNullPointer(arr);
+ GenTree* const arrIndexNode = gtNewIndexRef(TYP_REF, nullCheckedArr, index);
+ GenTree* const arrStore = gtNewAssignNode(arrIndexNode, value);
+ arrStore->gtFlags |= GTF_ASG;
+
+ GenTree* result = fgMorphTree(arrStore);
+ if (argSetup != nullptr)
+ {
+ result = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, argSetup, result);
+#if DEBUG
+ result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif // DEBUG
+ }
+
+ return result;
}
}
@@ -8187,8 +8351,14 @@ GenTreePtr Compiler::fgMorphLeaf(GenTreePtr tree)
{
GenTreePtr newTree =
fgMorphStackArgForVarArgs(tree->gtLclFld.gtLclNum, tree->gtType, tree->gtLclFld.gtLclOffs);
- if (newTree != NULL)
+ if (newTree != nullptr)
+ {
+ if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0))
+ {
+ fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType);
+ }
return newTree;
+ }
}
}
#endif // _TARGET_X86_
@@ -8390,7 +8560,7 @@ GenTreePtr Compiler::fgMorphOneAsgBlockOp(GenTreePtr tree)
// with the bits to create a single assigment.
noway_assert(size <= REGSIZE_BYTES);
- if (isInitBlock && (src->gtOper != GT_CNS_INT))
+ if (isInitBlock && !src->IsConstInitVal())
{
return nullptr;
}
@@ -8563,8 +8733,12 @@ GenTreePtr Compiler::fgMorphOneAsgBlockOp(GenTreePtr tree)
}
else
#endif
- if (src->IsCnsIntOrI())
{
+ if (src->OperIsInitVal())
+ {
+ src = src->gtGetOp1();
+ }
+ assert(src->IsCnsIntOrI());
// This will mutate the integer constant, in place, to be the correct
// value for the type we are using in the assignment.
src->AsIntCon()->FixupInitBlkValue(asgType);
@@ -8632,7 +8806,8 @@ GenTreePtr Compiler::fgMorphOneAsgBlockOp(GenTreePtr tree)
GenTreePtr Compiler::fgMorphInitBlock(GenTreePtr tree)
{
- noway_assert(tree->gtOper == GT_ASG && varTypeIsStruct(tree));
+ // We must have the GT_ASG form of InitBlkOp.
+ noway_assert((tree->OperGet() == GT_ASG) && tree->OperIsInitBlkOp());
#ifdef DEBUG
bool morphed = false;
#endif // DEBUG
@@ -8647,6 +8822,12 @@ GenTreePtr Compiler::fgMorphInitBlock(GenTreePtr tree)
tree->gtOp.gtOp1 = dest;
}
tree->gtType = dest->TypeGet();
+ // (Constant propagation may cause a TYP_STRUCT lclVar to be changed to GT_CNS_INT, and its
+ // type will be the type of the original lclVar, in which case we will change it to TYP_INT).
+ if ((src->OperGet() == GT_CNS_INT) && varTypeIsStruct(src))
+ {
+ src->gtType = TYP_INT;
+ }
JITDUMP("\nfgMorphInitBlock:");
GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree);
@@ -8658,7 +8839,7 @@ GenTreePtr Compiler::fgMorphInitBlock(GenTreePtr tree)
else
{
GenTree* destAddr = nullptr;
- GenTree* initVal = src;
+ GenTree* initVal = src->OperIsInitVal() ? src->gtGetOp1() : src;
GenTree* blockSize = nullptr;
unsigned blockWidth = 0;
FieldSeqNode* destFldSeq = nullptr;
@@ -8727,6 +8908,7 @@ GenTreePtr Compiler::fgMorphInitBlock(GenTreePtr tree)
if (destLclVar->lvPromoted && blockWidthIsConst)
{
+ assert(initVal->OperGet() == GT_CNS_INT);
noway_assert(varTypeIsStruct(destLclVar));
noway_assert(!opts.MinOpts());
if (destLclVar->lvAddrExposed & destLclVar->lvContainsHoles)
@@ -8786,25 +8968,9 @@ GenTreePtr Compiler::fgMorphInitBlock(GenTreePtr tree)
#if CPU_USES_BLOCK_MOVE
compBlkOpUsed = true;
#endif
- if (!dest->OperIsBlk())
- {
- GenTree* destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
- CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(dest);
- if (clsHnd == NO_CLASS_HANDLE)
- {
- dest = new (this, GT_BLK) GenTreeBlk(GT_BLK, dest->TypeGet(), destAddr, blockWidth);
- }
- else
- {
- GenTree* newDest = gtNewObjNode(clsHnd, destAddr);
- if (newDest->OperGet() == GT_OBJ)
- {
- gtSetObjGcInfo(newDest->AsObj());
- }
- dest = newDest;
- }
- tree->gtOp.gtOp1 = dest;
- }
+ dest = fgMorphBlockOperand(dest, dest->TypeGet(), blockWidth, true);
+ tree->gtOp.gtOp1 = dest;
+ tree->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);
}
else
{
@@ -9068,9 +9234,18 @@ GenTree* Compiler::fgMorphBlkNode(GenTreePtr tree, bool isDest)
if (blkNode->AsDynBlk()->gtDynamicSize->IsCnsIntOrI())
{
unsigned size = (unsigned)blkNode->AsDynBlk()->gtDynamicSize->AsIntConCommon()->IconValue();
- blkNode->AsDynBlk()->gtDynamicSize = nullptr;
- blkNode->ChangeOper(GT_BLK);
- blkNode->gtBlkSize = size;
+ // A GT_BLK with size of zero is not supported,
+ // so if we encounter such a thing we just leave it as a GT_DYN_BLK
+ if (size != 0)
+ {
+ blkNode->AsDynBlk()->gtDynamicSize = nullptr;
+ blkNode->ChangeOper(GT_BLK);
+ blkNode->gtBlkSize = size;
+ }
+ else
+ {
+ return tree;
+ }
}
else
{
@@ -9104,7 +9279,7 @@ GenTree* Compiler::fgMorphBlkNode(GenTreePtr tree, bool isDest)
//
// Notes:
// This does the following:
-// - Ensures that a struct operand is a block node.
+// - Ensures that a struct operand is a block node or (for non-LEGACY_BACKEND) lclVar.
// - Ensures that any COMMAs are above ADDR nodes.
// Although 'tree' WAS an operand of a block assignment, the assignment
// may have been retyped to be a scalar assignment.
@@ -9113,10 +9288,6 @@ GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigne
{
GenTree* effectiveVal = tree->gtEffectiveVal();
- // TODO-1stClassStucts: We would like to transform non-TYP_STRUCT nodes to
- // either plain lclVars or GT_INDs. However, for now we want to preserve most
- // of the block nodes until the Rationalizer.
-
if (!varTypeIsStruct(asgType))
{
if (effectiveVal->OperIsIndir())
@@ -9143,69 +9314,141 @@ GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigne
}
else
{
+ GenTreeIndir* indirTree = nullptr;
+ GenTreeLclVarCommon* lclNode = nullptr;
+ bool needsIndirection = true;
+
+ if (effectiveVal->OperIsIndir())
+ {
+ indirTree = effectiveVal->AsIndir();
+ GenTree* addr = effectiveVal->AsIndir()->Addr();
+ if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->OperGet() == GT_LCL_VAR))
+ {
+ lclNode = addr->gtGetOp1()->AsLclVarCommon();
+ }
+ }
+ else if (effectiveVal->OperGet() == GT_LCL_VAR)
+ {
+ lclNode = effectiveVal->AsLclVarCommon();
+ }
#ifdef FEATURE_SIMD
if (varTypeIsSIMD(asgType))
{
- if (effectiveVal->OperIsIndir())
+ if ((indirTree != nullptr) && (lclNode == nullptr) && (indirTree->Addr()->OperGet() == GT_ADDR) &&
+ (indirTree->Addr()->gtGetOp1()->gtOper == GT_SIMD))
{
- GenTree* addr = effectiveVal->AsIndir()->Addr();
- if (!isDest && (addr->OperGet() == GT_ADDR))
- {
- if ((addr->gtGetOp1()->gtOper == GT_SIMD) || (addr->gtGetOp1()->OperGet() == GT_LCL_VAR))
- {
- effectiveVal = addr->gtGetOp1();
- }
- }
- else if (isDest && !effectiveVal->OperIsBlk())
- {
- effectiveVal = new (this, GT_BLK) GenTreeBlk(GT_BLK, asgType, addr, blockWidth);
- }
+ assert(!isDest);
+ needsIndirection = false;
+ effectiveVal = indirTree->Addr()->gtGetOp1();
}
- else if (!effectiveVal->OperIsSIMD() && (!effectiveVal->IsLocal() || isDest) && !effectiveVal->OperIsBlk())
+ if (effectiveVal->OperIsSIMD())
{
- GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
- effectiveVal = new (this, GT_BLK) GenTreeBlk(GT_BLK, asgType, addr, blockWidth);
+ needsIndirection = false;
}
}
- else
#endif // FEATURE_SIMD
- if (!effectiveVal->OperIsBlk())
+ if (lclNode != nullptr)
+ {
+ LclVarDsc* varDsc = &(lvaTable[lclNode->gtLclNum]);
+ if (varTypeIsStruct(varDsc) && (varDsc->lvExactSize == blockWidth))
+ {
+#ifndef LEGACY_BACKEND
+ effectiveVal = lclNode;
+ needsIndirection = false;
+#endif // !LEGACY_BACKEND
+ }
+ else
+ {
+ // This may be a lclVar that was determined to be address-exposed.
+ effectiveVal->gtFlags |= (lclNode->gtFlags & GTF_ALL_EFFECT);
+ }
+ }
+ if (needsIndirection)
{
- GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
- CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(effectiveVal);
- GenTree* newTree;
- if (clsHnd == NO_CLASS_HANDLE)
+ if (indirTree != nullptr)
{
- newTree = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, addr, blockWidth);
+ // We should never find a struct indirection on the lhs of an assignment.
+ assert(!isDest || indirTree->OperIsBlk());
+ if (!isDest && indirTree->OperIsBlk())
+ {
+ (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
+ }
}
else
{
- newTree = gtNewObjNode(clsHnd, addr);
- if (isDest && (newTree->OperGet() == GT_OBJ))
+ GenTree* newTree;
+ GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
+ if (isDest)
{
- gtSetObjGcInfo(newTree->AsObj());
+ CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(effectiveVal);
+ if (clsHnd == NO_CLASS_HANDLE)
+ {
+ newTree = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, addr, blockWidth);
+ }
+ else
+ {
+ newTree = gtNewObjNode(clsHnd, addr);
+ if (isDest && (newTree->OperGet() == GT_OBJ))
+ {
+ gtSetObjGcInfo(newTree->AsObj());
+ }
+ if (effectiveVal->IsLocal() && ((effectiveVal->gtFlags & GTF_GLOB_EFFECT) == 0))
+ {
+ // This is not necessarily a global reference, though gtNewObjNode always assumes it is.
+ // TODO-1stClassStructs: This check should be done in the GenTreeObj constructor,
+ // where it currently sets GTF_GLOB_EFFECT unconditionally, but it is handled
+ // separately now to avoid excess diffs.
+ newTree->gtFlags &= ~(GTF_GLOB_EFFECT);
+ }
+ }
}
- if (effectiveVal->IsLocal() && ((effectiveVal->gtFlags & GTF_GLOB_EFFECT) == 0))
+ else
{
- // This is not necessarily a global reference, though gtNewObjNode always assumes it is.
- // TODO-1stClassStructs: This check should be done in the GenTreeObj constructor,
- // where it currently sets GTF_GLOB_EFFECT unconditionally, but it is handled
- // separately now to avoid excess diffs.
- newTree->gtFlags &= ~(GTF_GLOB_EFFECT);
+ newTree = new (this, GT_IND) GenTreeIndir(GT_IND, asgType, addr, nullptr);
}
+ effectiveVal = newTree;
}
- effectiveVal = newTree;
}
}
- if (!isDest && effectiveVal->OperIsBlk())
- {
- (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
- }
tree = effectiveVal;
return tree;
}
//------------------------------------------------------------------------
+// fgMorphUnsafeBlk: Convert a CopyObj with a dest on the stack to a GC Unsafe CopyBlk
+//
+// Arguments:
+// dest - the GT_OBJ or GT_STORE_OBJ
+//
+// Assumptions:
+// The destination must be known (by the caller) to be on the stack.
+//
+// Notes:
+// If we have a CopyObj with a dest on the stack, and its size is small enouch
+// to be completely unrolled (i.e. between [16..64] bytes), we will convert it into a
+// GC Unsafe CopyBlk that is non-interruptible.
+// This is not supported for the JIT32_GCENCODER, in which case this method is a no-op.
+//
+void Compiler::fgMorphUnsafeBlk(GenTreeObj* dest)
+{
+#if defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER)
+ assert(dest->gtGcPtrCount != 0);
+ unsigned blockWidth = dest->AsBlk()->gtBlkSize;
+#ifdef DEBUG
+ bool destOnStack = false;
+ GenTree* destAddr = dest->Addr();
+ assert(destAddr->IsLocalAddrExpr() != nullptr);
+#endif
+ if ((blockWidth >= (2 * TARGET_POINTER_SIZE)) && (blockWidth <= CPBLK_UNROLL_LIMIT))
+ {
+ genTreeOps newOper = (dest->gtOper == GT_OBJ) ? GT_BLK : GT_STORE_BLK;
+ dest->SetOper(newOper);
+ dest->AsBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsafe copy block
+ }
+#endif // defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER)
+}
+
+//------------------------------------------------------------------------
// fgMorphCopyBlock: Perform the Morphing of block copy
//
// Arguments:
@@ -9444,6 +9687,14 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
bool requiresCopyBlock = false;
bool srcSingleLclVarAsg = false;
+ if ((destLclVar != nullptr) && (srcLclVar == destLclVar))
+ {
+ // Beyond perf reasons, it is not prudent to have a copy of a struct to itself.
+ GenTree* nop = gtNewNothingNode();
+ INDEBUG(nop->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
+ return nop;
+ }
+
// If either src or dest is a reg-sized non-field-addressed struct, keep the copyBlock.
if ((destLclVar != nullptr && destLclVar->lvRegStruct) || (srcLclVar != nullptr && srcLclVar->lvRegStruct))
{
@@ -9485,12 +9736,19 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
// Are both dest and src promoted structs?
if (destDoFldAsg && srcDoFldAsg)
{
- // Both structs should be of the same type, if not we will use a copy block
+ // Both structs should be of the same type, or each have a single field of the same type.
+ // If not we will use a copy block.
if (lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() !=
lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle())
{
- requiresCopyBlock = true; // Mismatched types, leave as a CopyBlock
- JITDUMP(" with mismatched types");
+ unsigned destFieldNum = lvaTable[destLclNum].lvFieldLclStart;
+ unsigned srcFieldNum = lvaTable[srcLclNum].lvFieldLclStart;
+ if ((lvaTable[destLclNum].lvFieldCnt != 1) || (lvaTable[srcLclNum].lvFieldCnt != 1) ||
+ (lvaTable[destFieldNum].lvType != lvaTable[srcFieldNum].lvType))
+ {
+ requiresCopyBlock = true; // Mismatched types, leave as a CopyBlock
+ JITDUMP(" with mismatched types");
+ }
}
}
// Are neither dest or src promoted structs?
@@ -9584,34 +9842,24 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
var_types asgType = dest->TypeGet();
dest = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/);
asg->gtOp.gtOp1 = dest;
- hasGCPtrs = ((dest->OperGet() == GT_OBJ) && (dest->AsObj()->gtGcPtrCount != 0));
+ asg->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);
-#ifdef CPBLK_UNROLL_LIMIT
// Note that the unrolling of CopyBlk is only implemented on some platforms.
- // Currently that includes x64 and Arm64 but not x64 or Arm32.
+ // Currently that includes x64 and ARM but not x86: the code generation for this
+ // construct requires the ability to mark certain regions of the generated code
+ // as non-interruptible, and the GC encoding for the latter platform does not
+ // have this capability.
// If we have a CopyObj with a dest on the stack
// we will convert it into an GC Unsafe CopyBlk that is non-interruptible
- // when its size is small enouch to be completely unrolled (i.e. between [16..64] bytes)
+ // when its size is small enouch to be completely unrolled (i.e. between [16..64] bytes).
+ // (This is not supported for the JIT32_GCENCODER, for which fgMorphUnsafeBlk is a no-op.)
//
- if (hasGCPtrs && destOnStack && blockWidthIsConst && (blockWidth >= (2 * TARGET_POINTER_SIZE)) &&
- (blockWidth <= CPBLK_UNROLL_LIMIT))
+ if (destOnStack && (dest->OperGet() == GT_OBJ))
{
- if (dest->OperGet() == GT_OBJ)
- {
- dest->SetOper(GT_BLK);
- dest->AsBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsafe copy block
- }
- else
- {
- assert(dest->OperIsLocal());
- GenTree* destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
- dest = new (this, GT_BLK) GenTreeBlk(GT_BLK, dest->TypeGet(), destAddr, blockWidth);
- dest->AsBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsafe copy block
- tree->gtOp.gtOp1 = dest;
- }
+ fgMorphUnsafeBlk(dest->AsObj());
}
-#endif
+
// Eliminate the "OBJ or BLK" node on the rhs.
rhs = fgMorphBlockOperand(rhs, asgType, blockWidth, false /*!isDest*/);
asg->gtOp.gtOp2 = rhs;
@@ -9659,8 +9907,6 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
// To do fieldwise assignments for both sides, they'd better be the same struct type!
// All of these conditions were checked above...
assert(destLclNum != BAD_VAR_NUM && srcLclNum != BAD_VAR_NUM);
- assert(lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() ==
- lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle());
assert(destLclVar != nullptr && srcLclVar != nullptr && destLclVar->lvFieldCnt == srcLclVar->lvFieldCnt);
fieldCnt = destLclVar->lvFieldCnt;
@@ -10354,23 +10600,12 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
/* fgDoNormalizeOnStore can change op2 */
noway_assert(op1 == tree->gtOp.gtOp1);
op2 = tree->gtOp.gtOp2;
- // TODO-1stClassStructs: this is here to match previous behavior, but results in some
- // unnecessary pessimization in the handling of addresses in fgMorphCopyBlock().
- if (tree->OperIsBlkOp())
- {
- op1->gtFlags |= GTF_DONT_CSE;
- if (tree->OperIsCopyBlkOp() &&
- (op2->IsLocal() || (op2->OperIsIndir() && (op2->AsIndir()->Addr()->OperGet() == GT_ADDR))))
- {
- op2->gtFlags |= GTF_DONT_CSE;
- }
- }
#ifdef FEATURE_SIMD
{
// We should check whether op2 should be assigned to a SIMD field or not.
// If it is, we should tranlate the tree to simd intrinsic.
- assert((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0);
+ assert(!fgGlobalMorph || ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0));
GenTreePtr newTree = fgMorphFieldAssignToSIMDIntrinsicSet(tree);
typ = tree->TypeGet();
op1 = tree->gtGetOp1();
@@ -10451,8 +10686,8 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
case GT_COLON:
#if LOCAL_ASSERTION_PROP
if (optLocalAssertionProp)
- {
#endif
+ {
isQmarkColon = true;
}
break;
@@ -10608,13 +10843,6 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
{
op2 = gtFoldExprConst(op2);
}
-
- if (fgShouldUseMagicNumberDivide(tree->AsOp()))
- {
- tree = fgMorphDivByConst(tree->AsOp());
- op1 = tree->gtOp.gtOp1;
- op2 = tree->gtOp.gtOp2;
- }
#endif // !LEGACY_BACKEND
break;
@@ -10673,44 +10901,44 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
// Note for _TARGET_ARMARCH_ we don't have a remainder instruction, so we don't do this optimization
//
#else // _TARGET_XARCH
- /* If this is an unsigned long mod with op2 which is a cast to long from a
- constant int, then don't morph to a call to the helper. This can be done
- faster inline using idiv.
- */
+ /* If this is an unsigned long mod with op2 which is a cast to long from a
+ constant int, then don't morph to a call to the helper. This can be done
+ faster inline using idiv.
+ */
- noway_assert(op2);
- if ((typ == TYP_LONG) && opts.OptEnabled(CLFLG_CONSTANTFOLD) &&
- ((tree->gtFlags & GTF_UNSIGNED) == (op1->gtFlags & GTF_UNSIGNED)) &&
- ((tree->gtFlags & GTF_UNSIGNED) == (op2->gtFlags & GTF_UNSIGNED)))
- {
- if (op2->gtOper == GT_CAST && op2->gtCast.CastOp()->gtOper == GT_CNS_INT &&
- op2->gtCast.CastOp()->gtIntCon.gtIconVal >= 2 &&
- op2->gtCast.CastOp()->gtIntCon.gtIconVal <= 0x3fffffff &&
- (tree->gtFlags & GTF_UNSIGNED) == (op2->gtCast.CastOp()->gtFlags & GTF_UNSIGNED))
- {
- tree->gtOp.gtOp2 = op2 = fgMorphCast(op2);
- noway_assert(op2->gtOper == GT_CNS_NATIVELONG);
- }
+ noway_assert(op2);
+ if ((typ == TYP_LONG) && opts.OptEnabled(CLFLG_CONSTANTFOLD) &&
+ ((tree->gtFlags & GTF_UNSIGNED) == (op1->gtFlags & GTF_UNSIGNED)) &&
+ ((tree->gtFlags & GTF_UNSIGNED) == (op2->gtFlags & GTF_UNSIGNED)))
+ {
+ if (op2->gtOper == GT_CAST && op2->gtCast.CastOp()->gtOper == GT_CNS_INT &&
+ op2->gtCast.CastOp()->gtIntCon.gtIconVal >= 2 &&
+ op2->gtCast.CastOp()->gtIntCon.gtIconVal <= 0x3fffffff &&
+ (tree->gtFlags & GTF_UNSIGNED) == (op2->gtCast.CastOp()->gtFlags & GTF_UNSIGNED))
+ {
+ tree->gtOp.gtOp2 = op2 = fgMorphCast(op2);
+ noway_assert(op2->gtOper == GT_CNS_NATIVELONG);
+ }
- if (op2->gtOper == GT_CNS_NATIVELONG && op2->gtIntConCommon.LngValue() >= 2 &&
- op2->gtIntConCommon.LngValue() <= 0x3fffffff)
- {
- tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
- noway_assert(op1->TypeGet() == TYP_LONG);
+ if (op2->gtOper == GT_CNS_NATIVELONG && op2->gtIntConCommon.LngValue() >= 2 &&
+ op2->gtIntConCommon.LngValue() <= 0x3fffffff)
+ {
+ tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
+ noway_assert(op1->TypeGet() == TYP_LONG);
- // Update flags for op1 morph
- tree->gtFlags &= ~GTF_ALL_EFFECT;
+ // Update flags for op1 morph
+ tree->gtFlags &= ~GTF_ALL_EFFECT;
- tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); // Only update with op1 as op2 is a constant
+ tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); // Only update with op1 as op2 is a constant
- // If op1 is a constant, then do constant folding of the division operator
- if (op1->gtOper == GT_CNS_NATIVELONG)
- {
- tree = gtFoldExpr(tree);
+ // If op1 is a constant, then do constant folding of the division operator
+ if (op1->gtOper == GT_CNS_NATIVELONG)
+ {
+ tree = gtFoldExpr(tree);
+ }
+ return tree;
}
- return tree;
}
- }
#endif // _TARGET_XARCH
ASSIGN_HELPER_FOR_MOD:
@@ -10773,16 +11001,28 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
tree = fgMorphModToSubMulDiv(tree->AsOp());
op1 = tree->gtOp.gtOp1;
op2 = tree->gtOp.gtOp2;
-
-#else // !_TARGET_ARM64_
-
- if (oper != GT_UMOD && fgShouldUseMagicNumberDivide(tree->AsOp()))
- {
- tree = fgMorphModByConst(tree->AsOp());
- op1 = tree->gtOp.gtOp1;
- op2 = tree->gtOp.gtOp2;
+#else //_TARGET_ARM64_
+ // If b is not a power of 2 constant then lowering replaces a % b
+ // with a - (a / b) * b and applies magic division optimization to
+ // a / b. The code may already contain an a / b expression (e.g.
+ // x = a / 10; y = a % 10;) and then we end up with redundant code.
+ // If we convert % to / here we give CSE the opportunity to eliminate
+ // the redundant division. If there's no redundant division then
+ // nothing is lost, lowering would have done this transform anyway.
+
+ if ((tree->OperGet() == GT_MOD) && op2->IsIntegralConst())
+ {
+ ssize_t divisorValue = op2->AsIntCon()->IconValue();
+ size_t absDivisorValue = (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue)
+ : static_cast<size_t>(abs(divisorValue));
+
+ if (!isPow2(absDivisorValue))
+ {
+ tree = fgMorphModToSubMulDiv(tree->AsOp());
+ op1 = tree->gtOp.gtOp1;
+ op2 = tree->gtOp.gtOp2;
+ }
}
-
#endif //_TARGET_ARM64_
#endif // !LEGACY_BACKEND
break;
@@ -10857,12 +11097,12 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
((op2->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) ||
(op2->gtCall.gtCallType == CT_HELPER)))
#else
- if ((((op1->gtOper == GT_INTRINSIC) &&
- (op1->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
- ((op1->gtOper == GT_CALL) && (op1->gtCall.gtCallType == CT_HELPER))) &&
- (((op2->gtOper == GT_INTRINSIC) &&
- (op2->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
- ((op2->gtOper == GT_CALL) && (op2->gtCall.gtCallType == CT_HELPER))))
+ if ((((op1->gtOper == GT_INTRINSIC) &&
+ (op1->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
+ ((op1->gtOper == GT_CALL) && (op1->gtCall.gtCallType == CT_HELPER))) &&
+ (((op2->gtOper == GT_INTRINSIC) &&
+ (op2->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
+ ((op2->gtOper == GT_CALL) && (op2->gtCall.gtCallType == CT_HELPER))))
#endif
{
GenTreePtr pGetClassFromHandle;
@@ -10872,8 +11112,8 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
bool bOp1ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op1);
bool bOp2ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op2);
#else
- bool bOp1ClassFromHandle = op1->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op1) : false;
- bool bOp2ClassFromHandle = op2->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op2) : false;
+ bool bOp1ClassFromHandle = op1->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op1) : false;
+ bool bOp2ClassFromHandle = op2->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op2) : false;
#endif
// Optimize typeof(...) == typeof(...)
@@ -10929,8 +11169,8 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
info.compCompHnd->getIntrinsicID(pGetType->gtCall.gtCallMethHnd) ==
CORINFO_INTRINSIC_Object_GetType &&
#else
- if ((pGetType->gtOper == GT_INTRINSIC) &&
- (pGetType->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType) &&
+ if ((pGetType->gtOper == GT_INTRINSIC) &&
+ (pGetType->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType) &&
#endif
pConstLiteral->gtOper == GT_CNS_INT && pConstLiteral->gtType == TYP_I_IMPL)
{
@@ -10944,7 +11184,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
#ifdef LEGACY_BACKEND
GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtCall.gtCallObjp);
#else
- GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtUnOp.gtOp1);
+ GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtUnOp.gtOp1);
#endif
objMT->gtFlags |= GTF_EXCEPT; // Null ref exception if object is null
compCurBB->bbFlags |= BBF_HAS_VTABREF;
@@ -11041,7 +11281,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
// Assume it's an Ind context to start.
MorphAddrContext subIndMac1(MACK_Ind);
MorphAddrContext* subMac1 = mac;
- if (subMac1 == nullptr || subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_CopyBlock)
+ if (subMac1 == nullptr || subMac1->m_kind == MACK_Ind)
{
switch (tree->gtOper)
{
@@ -11532,7 +11772,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
//
// EQ/NE
// / \
- // op1 CNS 0/1
+ // op1 CNS 0/1
//
ival2 = INT_MAX; // The value of INT_MAX for ival2 just means that the constant value is not 0 or 1
@@ -11557,11 +11797,11 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
//
// EQ/NE Possible REVERSE(RELOP)
// / \ / \
- // COMMA CNS 0/1 -> COMMA relop_op2
+ // COMMA CNS 0/1 -> COMMA relop_op2
// / \ / \
- // x RELOP x relop_op1
+ // x RELOP x relop_op1
// / \
- // relop_op1 relop_op2
+ // relop_op1 relop_op2
//
//
//
@@ -11600,13 +11840,13 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
//
// EQ/NE EQ/NE
// / \ / \
- // COMMA CNS 0/1 -> RELOP CNS 0/1
+ // COMMA CNS 0/1 -> RELOP CNS 0/1
// / \ / \
- // ASG LCL_VAR
+ // ASG LCL_VAR
// / \
- // LCL_VAR RELOP
+ // LCL_VAR RELOP
// / \
- //
+ //
GenTreePtr asg = op1->gtOp.gtOp1;
GenTreePtr lcl = op1->gtOp.gtOp2;
@@ -11689,9 +11929,9 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
//
// EQ/NE -> RELOP/!RELOP
// / \ / \
- // RELOP CNS 0/1
+ // RELOP CNS 0/1
// / \
- //
+ //
// Note that we will remove/destroy the EQ/NE node and move
// the RELOP up into it's location.
@@ -11721,11 +11961,11 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
//
// EQ/NE EQ/NE
// / \ / \
- // AND CNS 0/1 -> AND CNS 0
+ // AND CNS 0/1 -> AND CNS 0
// / \ / \
- // RSZ/RSH CNS 1 x CNS (1 << y)
+ // RSZ/RSH CNS 1 x CNS (1 << y)
// / \
- // x CNS_INT +y
+ // x CNS_INT +y
if (op1->gtOper == GT_AND)
{
@@ -12121,38 +12361,42 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
goto CM_OVF_OP;
}
- /* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */
-
- noway_assert(op2);
- if (op2->IsCnsIntOrI())
+ // TODO #4104: there are a lot of other places where
+ // this condition is not checked before transformations.
+ if (fgGlobalMorph)
{
- /* Negate the constant and change the node to be "+" */
+ /* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */
- op2->gtIntConCommon.SetIconValue(-op2->gtIntConCommon.IconValue());
- oper = GT_ADD;
- tree->ChangeOper(oper);
- goto CM_ADD_OP;
- }
+ noway_assert(op2);
+ if (op2->IsCnsIntOrI())
+ {
+ /* Negate the constant and change the node to be "+" */
- /* Check for "cns1 - op2" , we change it to "(cns1 + (-op2))" */
+ op2->gtIntConCommon.SetIconValue(-op2->gtIntConCommon.IconValue());
+ oper = GT_ADD;
+ tree->ChangeOper(oper);
+ goto CM_ADD_OP;
+ }
- noway_assert(op1);
- if (op1->IsCnsIntOrI())
- {
- noway_assert(varTypeIsIntOrI(tree));
+ /* Check for "cns1 - op2" , we change it to "(cns1 + (-op2))" */
- tree->gtOp.gtOp2 = op2 =
- gtNewOperNode(GT_NEG, tree->gtType, op2); // The type of the new GT_NEG node should be the same
- // as the type of the tree, i.e. tree->gtType.
- fgMorphTreeDone(op2);
+ noway_assert(op1);
+ if (op1->IsCnsIntOrI())
+ {
+ noway_assert(varTypeIsIntOrI(tree));
- oper = GT_ADD;
- tree->ChangeOper(oper);
- goto CM_ADD_OP;
- }
+ tree->gtOp.gtOp2 = op2 = gtNewOperNode(GT_NEG, tree->gtType, op2); // The type of the new GT_NEG
+ // node should be the same
+ // as the type of the tree, i.e. tree->gtType.
+ fgMorphTreeDone(op2);
- /* No match - exit */
+ oper = GT_ADD;
+ tree->ChangeOper(oper);
+ goto CM_ADD_OP;
+ }
+ /* No match - exit */
+ }
break;
#ifdef _TARGET_ARM64_
@@ -12281,7 +12525,8 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
// Dereferencing the pointer in either case will have the
// same effect.
- if (!gtIsActiveCSE_Candidate(op1) && varTypeIsGC(op2->TypeGet()))
+ if (!optValnumCSE_phase && varTypeIsGC(op2->TypeGet()) &&
+ ((op1->gtFlags & GTF_ALL_EFFECT) == 0))
{
op2->gtType = tree->gtType;
DEBUG_DESTROY_NODE(op1);
@@ -12520,7 +12765,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
// Also make sure that the tree type matches the fieldVarType and that it's lvFldOffset
// is zero
- if (fieldVarDsc->TypeGet() == tree->TypeGet() && (fieldVarDsc->lvFldOffset == 0))
+ if (fieldVarDsc->TypeGet() == typ && (fieldVarDsc->lvFldOffset == 0))
{
// We can just use the existing promoted field LclNum
temp->gtLclVarCommon.SetLclNum(lclNumFld);
@@ -12538,8 +12783,8 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
else if (varTypeIsSmall(typ) && (genTypeSize(lvaTable[lclNum].lvType) == genTypeSize(typ)) &&
!lvaTable[lclNum].lvNormalizeOnLoad())
{
- tree->gtType = temp->gtType;
- foldAndReturnTemp = true;
+ tree->gtType = typ = temp->TypeGet();
+ foldAndReturnTemp = true;
}
else
{
@@ -12554,7 +12799,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
// Append the field sequence, change the type.
temp->AsLclFld()->gtFieldSeq =
GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
- temp->gtType = tree->TypeGet();
+ temp->gtType = typ;
foldAndReturnTemp = true;
}
@@ -12623,9 +12868,9 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
#ifdef _TARGET_ARM_
// Check for a LclVar TYP_STRUCT with misalignment on a Floating Point field
//
- if (varTypeIsFloating(tree->TypeGet()))
+ if (varTypeIsFloating(typ))
{
- if ((ival1 % emitTypeSize(tree->TypeGet())) != 0)
+ if ((ival1 % emitTypeSize(typ)) != 0)
{
tree->gtFlags |= GTF_IND_UNALIGNED;
break;
@@ -12638,24 +12883,35 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
}
}
-#ifdef DEBUG
- // If we have decided to fold, then temp cannot be nullptr
- if (foldAndReturnTemp)
- {
- assert(temp != nullptr);
- }
-#endif
-
- if (temp != nullptr)
- {
- noway_assert(op1->gtOper == GT_ADD || op1->gtOper == GT_ADDR);
-
- // If we haven't already decided to fold this expression
- //
- if (!foldAndReturnTemp)
+ // At this point we may have a lclVar or lclFld that might be foldable with a bit of extra massaging:
+ // - We may have a load of a local where the load has a different type than the local
+ // - We may have a load of a local plus an offset
+ //
+ // In these cases, we will change the lclVar or lclFld into a lclFld of the appropriate type and
+ // offset if doing so is legal. The only cases in which this transformation is illegal are if the load
+ // begins before the local or if the load extends beyond the end of the local (i.e. if the load is
+ // out-of-bounds w.r.t. the local).
+ if ((temp != nullptr) && !foldAndReturnTemp)
+ {
+ assert(temp->OperIsLocal());
+
+ const unsigned lclNum = temp->AsLclVarCommon()->gtLclNum;
+ LclVarDsc* const varDsc = &lvaTable[lclNum];
+
+ const var_types tempTyp = temp->TypeGet();
+ const bool useExactSize =
+ varTypeIsStruct(tempTyp) || (tempTyp == TYP_BLK) || (tempTyp == TYP_LCLBLK);
+ const unsigned varSize = useExactSize ? varDsc->lvExactSize : genTypeSize(temp);
+
+ // If the size of the load is greater than the size of the lclVar, we cannot fold this access into
+ // a lclFld: the access represented by an lclFld node must begin at or after the start of the
+ // lclVar and must not extend beyond the end of the lclVar.
+ if ((ival1 < 0) || ((ival1 + genTypeSize(typ)) > varSize))
+ {
+ lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
+ }
+ else
{
- noway_assert(temp->OperIsLocal());
- LclVarDsc* varDsc = &(lvaTable[temp->AsLclVarCommon()->gtLclNum]);
// Make sure we don't separately promote the fields of this struct.
if (varDsc->lvRegStruct)
{
@@ -12664,7 +12920,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
}
else
{
- lvaSetVarDoNotEnregister(temp->gtLclVarCommon.gtLclNum DEBUGARG(DNER_LocalField));
+ lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
}
// We will turn a GT_LCL_VAR into a GT_LCL_FLD with an gtLclOffs of 'ival'
@@ -12689,19 +12945,19 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
temp->gtType = tree->gtType;
foldAndReturnTemp = true;
}
+ }
- assert(foldAndReturnTemp == true);
+ if (foldAndReturnTemp)
+ {
+ assert(temp != nullptr);
+ assert(temp->TypeGet() == typ);
+ assert((op1->OperGet() == GT_ADD) || (op1->OperGet() == GT_ADDR));
- // Keep the DONT_CSE flag in sync
- // (i.e keep the original value of this flag from tree)
- // as it can be set for 'temp' because a GT_ADDR always marks it for it's op1
- //
+ // Copy the value of GTF_DONT_CSE from the original tree to `temp`: it can be set for
+ // 'temp' because a GT_ADDR always marks it for its operand.
temp->gtFlags &= ~GTF_DONT_CSE;
temp->gtFlags |= (tree->gtFlags & GTF_DONT_CSE);
- noway_assert(op1->gtOper == GT_ADD || op1->gtOper == GT_ADDR);
- noway_assert(temp->gtType == tree->gtType);
-
if (op1->OperGet() == GT_ADD)
{
DEBUG_DESTROY_NODE(op1->gtOp.gtOp1); // GT_ADDR
@@ -12984,7 +13240,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
// If we are in the Valuenum CSE phase then don't morph away anything as these
// nodes may have CSE defs/uses in them.
//
- if (!optValnumCSE_phase && (oper != GT_ASG) && (oper != GT_COLON) && !tree->IsList())
+ if (!optValnumCSE_phase && (oper != GT_ASG) && (oper != GT_COLON) && !tree->OperIsAnyList())
{
/* Check for op1 as a GT_COMMA with a unconditional throw node */
if (op1 && fgIsCommaThrow(op1, true))
@@ -13530,6 +13786,7 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
/* The target is used as well as being defined */
if (op1->OperIsLocal())
{
+ op1->gtFlags &= ~GTF_VAR_USEDEF;
op1->gtFlags |= GTF_VAR_USEASG;
}
@@ -13666,7 +13923,7 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
/* Check for the case "(val + icon) << icon" */
- if (op2->IsCnsIntOrI() && op1->gtOper == GT_ADD && !op1->gtOverflow())
+ if (!optValnumCSE_phase && op2->IsCnsIntOrI() && op1->gtOper == GT_ADD && !op1->gtOverflow())
{
GenTreePtr cns = op1->gtOp.gtOp2;
@@ -13731,192 +13988,45 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
break;
+ case GT_INIT_VAL:
+ // Initialization values for initBlk have special semantics - their lower
+ // byte is used to fill the struct. However, we allow 0 as a "bare" value,
+ // which enables them to get a VNForZero, and be propagated.
+ if (op1->IsIntegralConst(0))
+ {
+ return op1;
+ }
+ break;
+
default:
break;
}
return tree;
}
-// code to generate a magic number and shift amount for the magic number division
-// optimization. This code is previously from UTC where it notes it was taken from
-// _The_PowerPC_Compiler_Writer's_Guide_, pages 57-58.
-// The paper it is based on is "Division by invariant integers using multiplication"
-// by Torbjorn Granlund and Peter L. Montgomery in PLDI 94
-
-template <typename T>
-T GetSignedMagicNumberForDivide(T denom, int* shift /*out*/)
-{
- // static SMAG smag;
- const int bits = sizeof(T) * 8;
- const int bits_minus_1 = bits - 1;
-
- typedef typename jitstd::make_unsigned<T>::type UT;
-
- const UT two_nminus1 = UT(1) << bits_minus_1;
-
- int p;
- UT absDenom;
- UT absNc;
- UT delta;
- UT q1;
- UT r1;
- UT r2;
- UT q2;
- UT t;
- T result_magic;
- int result_shift;
- int iters = 0;
-
- absDenom = abs(denom);
- t = two_nminus1 + ((unsigned int)denom >> 31);
- absNc = t - 1 - (t % absDenom); // absolute value of nc
- p = bits_minus_1; // initialize p
- q1 = two_nminus1 / absNc; // initialize q1 = 2^p / abs(nc)
- r1 = two_nminus1 - (q1 * absNc); // initialize r1 = rem(2^p, abs(nc))
- q2 = two_nminus1 / absDenom; // initialize q1 = 2^p / abs(denom)
- r2 = two_nminus1 - (q2 * absDenom); // initialize r1 = rem(2^p, abs(denom))
-
- do
- {
- iters++;
- p++;
- q1 *= 2; // update q1 = 2^p / abs(nc)
- r1 *= 2; // update r1 = rem(2^p / abs(nc))
-
- if (r1 >= absNc)
- { // must be unsigned comparison
- q1++;
- r1 -= absNc;
- }
-
- q2 *= 2; // update q2 = 2^p / abs(denom)
- r2 *= 2; // update r2 = rem(2^p / abs(denom))
-
- if (r2 >= absDenom)
- { // must be unsigned comparison
- q2++;
- r2 -= absDenom;
- }
-
- delta = absDenom - r2;
- } while (q1 < delta || (q1 == delta && r1 == 0));
-
- result_magic = q2 + 1; // resulting magic number
- if (denom < 0)
- {
- result_magic = -result_magic;
- }
- *shift = p - bits; // resulting shift
-
- return result_magic;
-}
-
-bool Compiler::fgShouldUseMagicNumberDivide(GenTreeOp* tree)
-{
-#ifdef _TARGET_ARM64_
- // TODO-ARM64-NYI: We don't have a 'mulHi' implementation yet for ARM64
- return false;
-#else
-
- // During the optOptimizeValnumCSEs phase we can call fgMorph and when we do,
- // if this method returns true we will introduce a new LclVar and
- // a couple of new GenTree nodes, including an assignment to the new LclVar.
- // None of these new GenTree nodes will have valid ValueNumbers.
- // That is an invalid state for a GenTree node during the optOptimizeValnumCSEs phase.
- //
- // Also during optAssertionProp when extracting side effects we can assert
- // during gtBuildCommaList if we have one tree that has Value Numbers
- // and another one that does not.
- //
- if (!fgGlobalMorph)
- {
- // We only perform the Magic Number Divide optimization during
- // the initial global morph phase
- return false;
- }
-
- if (tree->gtFlags & GTF_OVERFLOW)
- {
- return false;
- }
-
- if (tree->gtOp2->gtOper != GT_CNS_INT && tree->gtOp2->gtOper != GT_CNS_LNG)
- {
- return false;
- }
-
- ssize_t cons = tree->gtOp2->gtIntConCommon.IconValue();
-
- if (cons == 0 || cons == -1 || cons == 1)
- {
- return false;
- }
-
- // codegen will expand these
- if (cons == SSIZE_T_MIN || isPow2(abs(cons)))
- {
- return false;
- }
-
- // someone else will fold this away, so don't make it complicated for them
- if (tree->gtOp1->IsCnsIntOrI())
- {
- return false;
- }
-
- // There is no technical barrier to handling unsigned, however it is quite rare
- // and more work to support and test
- if (tree->gtFlags & GTF_UNSIGNED)
- {
- return false;
- }
-
- return true;
-#endif
-}
-
-// transform x%c -> x-((x/c)*c)
-
-GenTree* Compiler::fgMorphModByConst(GenTreeOp* tree)
-{
- assert(fgShouldUseMagicNumberDivide(tree));
-
- var_types type = tree->gtType;
-
- GenTree* cns = tree->gtOp2;
-
- GenTree* numerator = fgMakeMultiUse(&tree->gtOp1);
-
- tree->SetOper(GT_DIV);
-
- GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(cns));
-
- GenTree* sub = gtNewOperNode(GT_SUB, type, numerator, mul);
-
-#ifdef DEBUG
- sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
-#endif
-
- return sub;
-}
-
-// For ARM64 we don't have a remainder instruction,
-// The architecture manual suggests the following transformation to
-// generate code for such operator:
+//------------------------------------------------------------------------
+// fgMorphModToSubMulDiv: Transform a % b into the equivalent a - (a / b) * b
+// (see ECMA III 3.55 and III.3.56).
//
-// a % b = a - (a / b) * b;
+// Arguments:
+// tree - The GT_MOD/GT_UMOD tree to morph
//
-// This method will produce the above expression in 'a' and 'b' are
-// leaf nodes, otherwise, if any of them is not a leaf it will spill
-// its value into a temporary variable, an example:
-// (x * 2 - 1) % (y + 1) -> t1 - (t2 * ( comma(t1 = x * 2 - 1, t1) / comma(t2 = y + 1, t2) ) )
+// Returns:
+// The morphed tree
+//
+// Notes:
+// For ARM64 we don't have a remainder instruction so this transform is
+// always done. For XARCH this transform is done if we know that magic
+// division will be used, in that case this transform allows CSE to
+// eliminate the redundant div from code like "x = a / 3; y = a % 3;".
+//
+// This method will produce the above expression in 'a' and 'b' are
+// leaf nodes, otherwise, if any of them is not a leaf it will spill
+// its value into a temporary variable, an example:
+// (x * 2 - 1) % (y + 1) -> t1 - (t2 * ( comma(t1 = x * 2 - 1, t1) / comma(t2 = y + 1, t2) ) )
//
GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree)
{
-#ifndef _TARGET_ARM64_
- assert(!"This should only be called for ARM64");
-#endif
-
if (tree->OperGet() == GT_MOD)
{
tree->SetOper(GT_DIV);
@@ -13944,8 +14054,16 @@ GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree)
denominator = fgMakeMultiUse(&tree->gtOp2);
}
+ // The numerator and denominator may have been assigned to temps, in which case
+ // their defining assignments are in the current tree. Therefore, we need to
+ // set the execuction order accordingly on the nodes we create.
+ // That is, the "mul" will be evaluated in "normal" order, and the "sub" must
+ // be set to be evaluated in reverse order.
+ //
GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(denominator));
+ assert(!mul->IsReverseOp());
GenTree* sub = gtNewOperNode(GT_SUB, type, gtCloneExpr(numerator), mul);
+ sub->gtFlags |= GTF_REVERSE_OPS;
#ifdef DEBUG
sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
@@ -13954,95 +14072,6 @@ GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree)
return sub;
}
-// Turn a division by a constant into a multiplication by constant + some adjustments
-// see comments on GetSignedMagicNumberForDivide for source of this algorithm.
-// returns: the transformed tree
-
-GenTree* Compiler::fgMorphDivByConst(GenTreeOp* tree)
-{
- assert(fgShouldUseMagicNumberDivide(tree));
-
- JITDUMP("doing magic number divide optimization\n");
-
- int64_t denominator = tree->gtOp2->gtIntConCommon.IconValue();
- int64_t magic;
- int shift;
- var_types type = tree->gtType;
-
- if (tree->gtType == TYP_INT)
- {
- magic = GetSignedMagicNumberForDivide<int32_t>((int32_t)denominator, &shift);
- }
- else
- {
- magic = GetSignedMagicNumberForDivide<int64_t>((int64_t)denominator, &shift);
- }
-
- GenTree* numerator = nullptr;
-
- // If signs of the denominator and magic number don't match,
- // we will need to use the numerator again.
- if (signum(denominator) != signum(magic))
- {
- numerator = fgMakeMultiUse(&tree->gtOp1);
- tree->gtFlags |= GTF_ASG;
- }
-
- if (type == TYP_LONG)
- {
- tree->gtOp2->gtIntConCommon.SetLngValue(magic);
- }
- else
- {
- tree->gtOp2->gtIntConCommon.SetIconValue((ssize_t)magic);
- }
-
- tree->SetOper(GT_MULHI);
-
- GenTree* t = tree;
- GenTree* mulresult = tree;
-
- JITDUMP("Multiply Result:\n");
- DISPTREE(mulresult);
-
- GenTree* adjusted = mulresult;
-
- if (denominator > 0 && magic < 0)
- {
- // add the numerator back in
- adjusted = gtNewOperNode(GT_ADD, type, mulresult, numerator);
- }
- else if (denominator < 0 && magic > 0)
- {
- // subtract the numerator off
- adjusted = gtNewOperNode(GT_SUB, type, mulresult, numerator);
- }
- else
- {
- adjusted = mulresult;
- }
-
- GenTree* result1 = adjusted;
- if (shift != 0)
- {
- result1 = gtNewOperNode(GT_RSH, type, adjusted, gtNewIconNode(shift, TYP_INT));
- }
-
- GenTree* secondClone = fgMakeMultiUse(&result1);
-
- GenTree* result2 = gtNewOperNode(GT_RSZ, type, secondClone, gtNewIconNode(genTypeSize(type) * 8 - 1, type));
-
- GenTree* result = gtNewOperNode(GT_ADD, type, result1, result2);
- JITDUMP("Final Magic Number divide:\n");
- DISPTREE(result);
-
-#ifdef DEBUG
- result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
-#endif
-
- return result;
-}
-
//------------------------------------------------------------------------------
// fgOperIsBitwiseRotationRoot : Check if the operation can be a root of a bitwise rotation tree.
//
@@ -14238,10 +14267,10 @@ GenTreePtr Compiler::fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree)
#ifndef _TARGET_64BIT_
if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64))
{
- // TODO: we need to handle variable-sized long shifts specially on x86.
+ // TODO-X86-CQ: we need to handle variable-sized long shifts specially on x86.
// GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need
// to add helpers for GT_ROL and GT_ROR.
- NYI("Rotation of a long value by variable amount");
+ return tree;
}
#endif
@@ -14276,7 +14305,15 @@ GenTreePtr Compiler::fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree)
tree->gtOp.gtOp1 = rotatedValue;
tree->gtOp.gtOp2 = rotateIndex;
tree->ChangeOper(rotateOp);
- noway_assert(inputTreeEffects == ((rotatedValue->gtFlags | rotateIndex->gtFlags) & GTF_ALL_EFFECT));
+
+ unsigned childFlags = 0;
+ for (GenTree* op : tree->Operands())
+ {
+ childFlags |= (op->gtFlags & GTF_ALL_EFFECT);
+ }
+
+ // The parent's flags should be a superset of its operands' flags
+ noway_assert((inputTreeEffects & childFlags) == childFlags);
}
else
{
@@ -14719,29 +14756,15 @@ DONE:
}
#if LOCAL_ASSERTION_PROP
-/*****************************************************************************
- *
- * Kill all dependent assertions with regard to lclNum.
- *
- */
-
-void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTreePtr tree))
+//------------------------------------------------------------------------
+// fgKillDependentAssertionsSingle: Kill all assertions specific to lclNum
+//
+// Arguments:
+// lclNum - The varNum of the lclVar for which we're killing assertions.
+// tree - (DEBUG only) the tree responsible for killing its assertions.
+//
+void Compiler::fgKillDependentAssertionsSingle(unsigned lclNum DEBUGARG(GenTreePtr tree))
{
- LclVarDsc* varDsc = &lvaTable[lclNum];
-
- if (varDsc->lvPromoted)
- {
- noway_assert(varTypeIsStruct(varDsc));
-
- // Kill the field locals.
- for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
- {
- fgKillDependentAssertions(i DEBUGARG(tree));
- }
-
- // Fall through to kill the struct local itself.
- }
-
/* All dependent assertions are killed here */
ASSERT_TP killed = BitVecOps::MakeCopy(apTraits, GetAssertionDep(lclNum));
@@ -14778,6 +14801,48 @@ void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTreePtr tre
noway_assert(BitVecOps::IsEmpty(apTraits, killed));
}
}
+//------------------------------------------------------------------------
+// fgKillDependentAssertions: Kill all dependent assertions with regard to lclNum.
+//
+// Arguments:
+// lclNum - The varNum of the lclVar for which we're killing assertions.
+// tree - (DEBUG only) the tree responsible for killing its assertions.
+//
+// Notes:
+// For structs and struct fields, it will invalidate the children and parent
+// respectively.
+// Calls fgKillDependentAssertionsSingle to kill the assertions for a single lclVar.
+//
+void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTreePtr tree))
+{
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+
+ if (varDsc->lvPromoted)
+ {
+ noway_assert(varTypeIsStruct(varDsc));
+
+ // Kill the field locals.
+ for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
+ {
+ fgKillDependentAssertionsSingle(i DEBUGARG(tree));
+ }
+
+ // Kill the struct local itself.
+ fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
+ }
+ else if (varDsc->lvIsStructField)
+ {
+ // Kill the field local.
+ fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
+
+ // Kill the parent struct.
+ fgKillDependentAssertionsSingle(varDsc->lvParentLcl DEBUGARG(tree));
+ }
+ else
+ {
+ fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
+ }
+}
#endif // LOCAL_ASSERTION_PROP
/*****************************************************************************
@@ -14841,13 +14906,12 @@ void Compiler::fgMorphTreeDone(GenTreePtr tree,
if (optAssertionCount > 0)
{
/* Is this an assignment to a local variable */
-
- if ((tree->OperKind() & GTK_ASGOP) &&
- (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR || tree->gtOp.gtOp1->gtOper == GT_LCL_FLD))
+ GenTreeLclVarCommon* lclVarTree = nullptr;
+ if (tree->DefinesLocal(this, &lclVarTree))
{
- unsigned op1LclNum = tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
- noway_assert(op1LclNum < lvaCount);
- fgKillDependentAssertions(op1LclNum DEBUGARG(tree));
+ unsigned lclNum = lclVarTree->gtLclNum;
+ noway_assert(lclNum < lvaCount);
+ fgKillDependentAssertions(lclNum DEBUGARG(tree));
}
}
@@ -15223,14 +15287,15 @@ bool Compiler::fgFoldConditional(BasicBlock* block)
// Returns false if 'stmt' is still in the block (even if other statements were removed).
//
-bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreePtr stmt DEBUGARG(const char* msg))
+bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreeStmt* stmt DEBUGARG(const char* msg))
{
- noway_assert(stmt->gtOper == GT_STMT);
+ assert(block != nullptr);
+ assert(stmt != nullptr);
compCurBB = block;
compCurStmt = stmt;
- GenTreePtr morph = fgMorphTree(stmt->gtStmt.gtStmtExpr);
+ GenTree* morph = fgMorphTree(stmt->gtStmtExpr);
// Bug 1106830 - During the CSE phase we can't just remove
// morph->gtOp.gtOp2 as it could contain CSE expressions.
@@ -15239,7 +15304,7 @@ bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreePtr stmt DEBUGARG(cons
//
if (!optValnumCSE_phase)
{
- /* Check for morph as a GT_COMMA with an unconditional throw */
+ // Check for morph as a GT_COMMA with an unconditional throw
if (fgIsCommaThrow(morph, true))
{
#ifdef DEBUG
@@ -15251,12 +15316,12 @@ bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreePtr stmt DEBUGARG(cons
printf("\n");
}
#endif
- /* Use the call as the new stmt */
+ // Use the call as the new stmt
morph = morph->gtOp.gtOp1;
noway_assert(morph->gtOper == GT_CALL);
}
- /* we can get a throw as a statement root*/
+ // we can get a throw as a statement root
if (fgIsThrow(morph))
{
#ifdef DEBUG
@@ -15271,15 +15336,19 @@ bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreePtr stmt DEBUGARG(cons
}
}
- stmt->gtStmt.gtStmtExpr = morph;
+ stmt->gtStmtExpr = morph;
- /* Can the entire tree be removed ? */
+ if (lvaLocalVarRefCounted)
+ {
+ // fgMorphTree may have introduced new lclVar references. Bump the ref counts if requested.
+ lvaRecursiveIncRefCounts(stmt->gtStmtExpr);
+ }
+ // Can the entire tree be removed?
bool removedStmt = fgCheckRemoveStmt(block, stmt);
- /* Or this is the last statement of a conditional branch that was just folded */
-
- if ((!removedStmt) && (stmt->gtNext == nullptr) && !fgRemoveRestOfBlock)
+ // Or this is the last statement of a conditional branch that was just folded?
+ if (!removedStmt && (stmt->getNextStmt() == nullptr) && !fgRemoveRestOfBlock)
{
if (fgFoldConditional(block))
{
@@ -15292,11 +15361,10 @@ bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreePtr stmt DEBUGARG(cons
if (!removedStmt)
{
- /* Have to re-do the evaluation order since for example
- * some later code does not expect constants as op1 */
+ // Have to re-do the evaluation order since for example some later code does not expect constants as op1
gtSetStmtInfo(stmt);
- /* Have to re-link the nodes for this statement */
+ // Have to re-link the nodes for this statement
fgSetStmtSeq(stmt);
}
@@ -15311,18 +15379,13 @@ bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreePtr stmt DEBUGARG(cons
if (fgRemoveRestOfBlock)
{
- /* Remove the rest of the stmts in the block */
-
- while (stmt->gtNext)
+ // Remove the rest of the stmts in the block
+ for (stmt = stmt->getNextStmt(); stmt != nullptr; stmt = stmt->getNextStmt())
{
- stmt = stmt->gtNext;
- noway_assert(stmt->gtOper == GT_STMT);
-
fgRemoveStmt(block, stmt);
}
- // The rest of block has been removed
- // and we will always throw an exception
+ // The rest of block has been removed and we will always throw an exception.
// Update succesors of block
fgRemoveBlockAsPred(block);
@@ -15368,8 +15431,9 @@ void Compiler::fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loa
fgCurrentlyInUseArgTemps = hashBv::Create(this);
- GenTreePtr stmt, prev;
- for (stmt = block->bbTreeList, prev = nullptr; stmt; prev = stmt->gtStmt.gtStmtExpr, stmt = stmt->gtNext)
+ GenTreeStmt* stmt = block->firstStmt();
+ GenTreePtr prev = nullptr;
+ for (; stmt != nullptr; prev = stmt->gtStmtExpr, stmt = stmt->gtNextStmt)
{
noway_assert(stmt->gtOper == GT_STMT);
@@ -15379,8 +15443,7 @@ void Compiler::fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loa
continue;
}
#ifdef FEATURE_SIMD
- if (!opts.MinOpts() && stmt->gtStmt.gtStmtExpr->TypeGet() == TYP_FLOAT &&
- stmt->gtStmt.gtStmtExpr->OperGet() == GT_ASG)
+ if (!opts.MinOpts() && stmt->gtStmtExpr->TypeGet() == TYP_FLOAT && stmt->gtStmtExpr->OperGet() == GT_ASG)
{
fgMorphCombineSIMDFieldAssignments(block, stmt);
}
@@ -15388,7 +15451,7 @@ void Compiler::fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loa
fgMorphStmt = stmt;
compCurStmt = stmt;
- GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+ GenTreePtr tree = stmt->gtStmtExpr;
#ifdef DEBUG
compCurStmtNum++;
@@ -15416,15 +15479,15 @@ void Compiler::fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loa
// Has fgMorphStmt been sneakily changed ?
- if (stmt->gtStmt.gtStmtExpr != tree)
+ if (stmt->gtStmtExpr != tree)
{
/* This must be tailcall. Ignore 'morph' and carry on with
the tail-call node */
- morph = stmt->gtStmt.gtStmtExpr;
+ morph = stmt->gtStmtExpr;
noway_assert(compTailCallUsed);
noway_assert((morph->gtOper == GT_CALL) && morph->AsCall()->IsTailCall());
- noway_assert(stmt->gtNext == nullptr);
+ noway_assert(stmt->gtNextStmt == nullptr);
GenTreeCall* call = morph->AsCall();
// Could either be
@@ -15448,7 +15511,7 @@ void Compiler::fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loa
noway_assert(compTailCallUsed);
noway_assert((tree->gtOper == GT_CALL) && tree->AsCall()->IsTailCall());
- noway_assert(stmt->gtNext == nullptr);
+ noway_assert(stmt->gtNextStmt == nullptr);
GenTreeCall* call = morph->AsCall();
@@ -15505,7 +15568,7 @@ void Compiler::fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loa
fgRemoveRestOfBlock = true;
}
- stmt->gtStmt.gtStmtExpr = tree = morph;
+ stmt->gtStmtExpr = tree = morph;
noway_assert(fgPtrArgCntCur == 0);
@@ -15958,6 +16021,45 @@ void Compiler::fgMorphBlocks()
#endif
}
+//------------------------------------------------------------------------
+// fgCheckArgCnt: Check whether the maximum arg size will change codegen requirements
+//
+// Notes:
+// fpPtrArgCntMax records the maximum number of pushed arguments.
+// Depending upon this value of the maximum number of pushed arguments
+// we may need to use an EBP frame or be partially interuptible.
+// This functionality has been factored out of fgSetOptions() because
+// the Rationalizer can create new calls.
+//
+// Assumptions:
+// This must be called before isFramePointerRequired() is called, because it is a
+// phased variable (can only be written before it has been read).
+//
+void Compiler::fgCheckArgCnt()
+{
+ if (!compCanEncodePtrArgCntMax())
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Too many pushed arguments for fully interruptible encoding, marking method as partially "
+ "interruptible\n");
+ }
+#endif
+ genInterruptible = false;
+ }
+ if (fgPtrArgCntMax >= sizeof(unsigned))
+ {
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Too many pushed arguments for an ESP based encoding, forcing an EBP frame\n");
+ }
+#endif
+ codeGen->setFramePointerRequired(true);
+ }
+}
+
/*****************************************************************************
*
* Make some decisions about the kind of code to generate.
@@ -15974,13 +16076,11 @@ void Compiler::fgSetOptions()
}
#endif
-#ifdef DEBUGGING_SUPPORT
if (opts.compDbgCode)
{
assert(!codeGen->isGCTypeFixed());
genInterruptible = true; // debugging is easier this way ...
}
-#endif
/* Assume we won't need an explicit stack frame if this is allowed */
@@ -16035,32 +16135,7 @@ void Compiler::fgSetOptions()
#endif // _TARGET_X86_
- // fpPtrArgCntMax records the maximum number of pushed arguments
- // Depending upon this value of the maximum number of pushed arguments
- // we may need to use an EBP frame or be partially interuptible
- //
-
- if (!compCanEncodePtrArgCntMax())
- {
-#ifdef DEBUG
- if (verbose)
- {
- printf("Too many pushed arguments for fully interruptible encoding, marking method as partially "
- "interruptible\n");
- }
-#endif
- genInterruptible = false;
- }
- if (fgPtrArgCntMax >= sizeof(unsigned))
- {
-#ifdef DEBUG
- if (verbose)
- {
- printf("Too many pushed arguments for an ESP based encoding, forcing an EBP frame\n");
- }
-#endif
- codeGen->setFramePointerRequiredGCInfo(true);
- }
+ fgCheckArgCnt();
if (info.compCallUnmanaged)
{
@@ -16121,6 +16196,23 @@ GenTreePtr Compiler::fgInitThisClass()
}
else
{
+#ifdef FEATURE_READYTORUN_COMPILER
+ // Only CoreRT understands CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE. Don't do this on CoreCLR.
+ if (opts.IsReadyToRun() && IsTargetAbi(CORINFO_CORERT_ABI))
+ {
+ CORINFO_RESOLVED_TOKEN resolvedToken;
+ memset(&resolvedToken, 0, sizeof(resolvedToken));
+
+ GenTreePtr ctxTree = getRuntimeContextTree(kind.runtimeLookupKind);
+
+ // CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE with a zeroed out resolvedToken means "get the static
+ // base of the class that owns the method being compiled". If we're in this method, it means we're not
+ // inlining and there's no ambiguity.
+ return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE, TYP_BYREF,
+ gtNewArgList(ctxTree), &kind);
+ }
+#endif
+
// Collectible types requires that for shared generic code, if we use the generic context paramter
// that we report it. (This is a conservative approach, we could detect some cases particularly when the
// context parameter is this that we don't need the eager reporting logic.)
@@ -16774,19 +16866,13 @@ void Compiler::fgMorph()
fgRemoveEmptyBlocks();
- /* Add any internal blocks/trees we may need */
-
- fgAddInternal();
-
-#if OPT_BOOL_OPS
- fgMultipleNots = false;
-#endif
-
#ifdef DEBUG
/* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
fgDebugCheckBBlist(false, false);
#endif // DEBUG
+ EndPhase(PHASE_MORPH_INIT);
+
/* Inline */
fgInline();
#if 0
@@ -16796,6 +16882,16 @@ void Compiler::fgMorph()
RecordStateAtEndOfInlining(); // Record "start" values for post-inlining cycles and elapsed time.
+ EndPhase(PHASE_MORPH_INLINE);
+
+ /* Add any internal blocks/trees we may need */
+
+ fgAddInternal();
+
+#if OPT_BOOL_OPS
+ fgMultipleNots = false;
+#endif
+
#ifdef DEBUG
/* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
fgDebugCheckBBlist(false, false);
@@ -16804,6 +16900,8 @@ void Compiler::fgMorph()
/* For x64 and ARM64 we need to mark irregular parameters early so that they don't get promoted */
fgMarkImplicitByRefArgs();
+ EndPhase(PHASE_MORPH_IMPBYREF);
+
/* Promote struct locals if necessary */
fgPromoteStructs();
@@ -16816,10 +16914,14 @@ void Compiler::fgMorph()
fgStress64RsltMul();
#endif // DEBUG
+ EndPhase(PHASE_STR_ADRLCL);
+
/* Morph the trees in all the blocks of the method */
fgMorphBlocks();
+ EndPhase(PHASE_MORPH_GLOBAL);
+
#if 0
JITDUMP("trees after fgMorphBlocks\n");
DBEXEC(VERBOSE, fgDispBasicBlocks(true));
@@ -17454,9 +17556,6 @@ enum AddrExposedContext
AXC_AddrWide, // The address being computed will be dereferenced by a block operation that operates
// on more bytes than the width of the storage location addressed. If this is a
// field of a promoted struct local, declare the entire struct local address-taken.
- AXC_InitBlk, // An GT_INITBLK is the immediate parent. The first argument is in an IND context.
- AXC_CopyBlk, // An GT_COPYBLK is the immediate parent. The first argument is in a GT_LIST, whose
- // args should be evaluated in an IND context.
AXC_IndAdd, // A GT_ADD is the immediate parent, and it was evaluated in an IND contxt.
// If one arg is a constant int, evaluate the other in an IND context. Otherwise, none.
};
@@ -17572,14 +17671,8 @@ Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPreCB(GenTreePtr* pTree, f
return WALK_CONTINUE;
case GT_LIST:
- if (axc == AXC_InitBlk || axc == AXC_CopyBlk)
- {
- axcStack->Push(axc);
- }
- else
- {
- axcStack->Push(AXC_None);
- }
+ case GT_FIELD_LIST:
+ axcStack->Push(AXC_None);
return WALK_CONTINUE;
case GT_INDEX:
@@ -18083,9 +18176,6 @@ bool Compiler::fgShouldCreateAssignOp(GenTreePtr tree, bool* bReverse)
#endif // defined(LEGACY_BACKEND)
}
-// Static variables.
-Compiler::MorphAddrContext Compiler::s_CopyBlockMAC(Compiler::MACK_CopyBlock);
-
#ifdef FEATURE_SIMD
//-----------------------------------------------------------------------------------
diff --git a/src/jit/nodeinfo.h b/src/jit/nodeinfo.h
index a73033a91f..1937cc4377 100644
--- a/src/jit/nodeinfo.h
+++ b/src/jit/nodeinfo.h
@@ -21,17 +21,18 @@ public:
_internalIntCount = 0;
_internalFloatCount = 0;
- srcCandsIndex = 0;
- dstCandsIndex = 0;
- internalCandsIndex = 0;
- isLocalDefUse = false;
- isHelperCallWithKills = false;
- isLsraAdded = false;
- isDelayFree = false;
- hasDelayFreeSrc = false;
- isTgtPref = false;
- regOptional = false;
- definesAnyRegisters = false;
+ srcCandsIndex = 0;
+ dstCandsIndex = 0;
+ internalCandsIndex = 0;
+ isLocalDefUse = false;
+ isHelperCallWithKills = false;
+ isLsraAdded = false;
+ isDelayFree = false;
+ hasDelayFreeSrc = false;
+ isTgtPref = false;
+ regOptional = false;
+ definesAnyRegisters = false;
+ isInternalRegDelayFree = false;
#ifdef DEBUG
isInitialized = false;
#endif
@@ -99,42 +100,54 @@ public:
LsraLocation loc;
-private:
- unsigned char _dstCount;
- unsigned char _srcCount;
- unsigned char _internalIntCount;
- unsigned char _internalFloatCount;
-
public:
unsigned char srcCandsIndex;
unsigned char dstCandsIndex;
unsigned char internalCandsIndex;
+private:
+ unsigned char _srcCount : 5;
+ unsigned char _dstCount : 3;
+ unsigned char _internalIntCount : 3;
+ unsigned char _internalFloatCount : 3;
+
+public:
// isLocalDefUse identifies trees that produce a value that is not consumed elsewhere.
// Examples include stack arguments to a call (they are immediately stored), lhs of comma
// nodes, or top-level nodes that are non-void.
unsigned char isLocalDefUse : 1;
+
// isHelperCallWithKills is set when this is a helper call that kills more than just its in/out regs.
unsigned char isHelperCallWithKills : 1;
+
// Is this node added by LSRA, e.g. as a resolution or copy/reload move.
unsigned char isLsraAdded : 1;
+
// isDelayFree is set when the register defined by this node will interfere with the destination
// of the consuming node, and therefore it must not be freed immediately after use.
unsigned char isDelayFree : 1;
+
// hasDelayFreeSrc is set when this node has sources that are marked "isDelayFree". This is because,
// we may eventually "contain" this node, in which case we don't want it's children (which have
// already been marked "isDelayFree" to be handled that way when allocating.
unsigned char hasDelayFreeSrc : 1;
+
// isTgtPref is set to true when we have a rmw op, where we would like the result to be allocated
// in the same register as op1.
unsigned char isTgtPref : 1;
+
// Whether a spilled second src can be treated as a contained operand
unsigned char regOptional : 1;
+
// Whether or not a node defines any registers, whether directly (for nodes where dstCout is non-zero)
// or indirectly (for contained nodes, which propagate the transitive closure of the registers
// defined by their inputs). Used during buildRefPositionsForNode in order to avoid unnecessary work.
unsigned char definesAnyRegisters : 1;
+ // Whether internal register needs to be different from targetReg
+ // in which result is produced.
+ unsigned char isInternalRegDelayFree : 1;
+
#ifdef DEBUG
// isInitialized is set when the tree node is handled.
unsigned char isInitialized : 1;
diff --git a/src/jit/optcse.cpp b/src/jit/optcse.cpp
index d23b4cd198..3ff4cea385 100644
--- a/src/jit/optcse.cpp
+++ b/src/jit/optcse.cpp
@@ -301,15 +301,15 @@ Compiler::fgWalkResult Compiler::optCSE_MaskHelper(GenTreePtr* pTree, fgWalkData
if (IS_CSE_INDEX(tree->gtCSEnum))
{
- unsigned cseIndex = GET_CSE_INDEX(tree->gtCSEnum);
- EXPSET_TP cseBit = genCSEnum2bit(cseIndex);
+ unsigned cseIndex = GET_CSE_INDEX(tree->gtCSEnum);
+ unsigned cseBit = genCSEnum2bit(cseIndex);
if (IS_CSE_DEF(tree->gtCSEnum))
{
- pUserData->CSE_defMask |= cseBit;
+ BitVecOps::AddElemD(comp->cseTraits, pUserData->CSE_defMask, cseBit);
}
else
{
- pUserData->CSE_useMask |= cseBit;
+ BitVecOps::AddElemD(comp->cseTraits, pUserData->CSE_useMask, cseBit);
}
}
@@ -321,8 +321,8 @@ Compiler::fgWalkResult Compiler::optCSE_MaskHelper(GenTreePtr* pTree, fgWalkData
//
void Compiler::optCSE_GetMaskData(GenTreePtr tree, optCSE_MaskData* pMaskData)
{
- pMaskData->CSE_defMask = 0;
- pMaskData->CSE_useMask = 0;
+ pMaskData->CSE_defMask = BitVecOps::MakeCopy(cseTraits, cseEmpty);
+ pMaskData->CSE_useMask = BitVecOps::MakeCopy(cseTraits, cseEmpty);
fgWalkTreePre(&tree, optCSE_MaskHelper, (void*)pMaskData);
}
@@ -355,14 +355,14 @@ bool Compiler::optCSE_canSwap(GenTree* op1, GenTree* op2)
optCSE_GetMaskData(op2, &op2MaskData);
// We cannot swap if op1 contains a CSE def that is used by op2
- if ((op1MaskData.CSE_defMask & op2MaskData.CSE_useMask) != 0)
+ if (!BitVecOps::IsEmptyIntersection(cseTraits, op1MaskData.CSE_defMask, op2MaskData.CSE_useMask))
{
canSwap = false;
}
else
{
// We also cannot swap if op2 contains a CSE def that is used by op1.
- if ((op2MaskData.CSE_defMask & op1MaskData.CSE_useMask) != 0)
+ if (!BitVecOps::IsEmptyIntersection(cseTraits, op2MaskData.CSE_defMask, op1MaskData.CSE_useMask))
{
canSwap = false;
}
@@ -495,6 +495,14 @@ void Compiler::optValnumCSE_Init()
optCSEtab = nullptr;
#endif
+ // Init traits and full/empty bitvectors. This will be used to track the
+ // individual cse indexes.
+ cseTraits = new (getAllocator()) BitVecTraits(EXPSET_SZ, this);
+ cseFull = BitVecOps::UninitVal();
+ cseEmpty = BitVecOps::UninitVal();
+ BitVecOps::AssignNoCopy(cseTraits, cseFull, BitVecOps::MakeFull(cseTraits));
+ BitVecOps::AssignNoCopy(cseTraits, cseEmpty, BitVecOps::MakeEmpty(cseTraits));
+
/* Allocate and clear the hash bucket table */
optCSEhash = new (this, CMK_CSE) CSEdsc*[s_optCSEhashSize]();
@@ -631,8 +639,8 @@ unsigned Compiler::optValnumCSE_Index(GenTreePtr tree, GenTreePtr stmt)
C_ASSERT((signed char)MAX_CSE_CNT == MAX_CSE_CNT);
- unsigned CSEindex = ++optCSECandidateCount;
- EXPSET_TP CSEmask = genCSEnum2bit(CSEindex);
+ unsigned CSEindex = ++optCSECandidateCount;
+ // EXPSET_TP CSEmask = genCSEnum2bit(CSEindex);
/* Record the new CSE index in the hashDsc */
hashDsc->csdIndex = CSEindex;
@@ -649,10 +657,11 @@ unsigned Compiler::optValnumCSE_Index(GenTreePtr tree, GenTreePtr stmt)
#ifdef DEBUG
if (verbose)
{
+ EXPSET_TP tempMask = BitVecOps::MakeSingleton(cseTraits, genCSEnum2bit(CSEindex));
printf("\nCSE candidate #%02u, vn=", CSEindex);
vnPrint(vnlib, 0);
- printf(" cseMask=%s in BB%02u, [cost=%2u, size=%2u]: \n", genES2str(genCSEnum2bit(CSEindex)),
- compCurBB->bbNum, tree->gtCostEx, tree->gtCostSz);
+ printf(" cseMask=%s in BB%02u, [cost=%2u, size=%2u]: \n", genES2str(cseTraits, tempMask), compCurBB->bbNum,
+ tree->gtCostEx, tree->gtCostSz);
gtDispTree(tree);
}
#endif // DEBUG
@@ -773,19 +782,18 @@ void Compiler::optValnumCSE_InitDataFlow()
if (init_to_zero)
{
/* Initialize to {ZERO} prior to dataflow */
-
- block->bbCseIn = 0;
+ block->bbCseIn = BitVecOps::MakeCopy(cseTraits, cseEmpty);
}
else
{
/* Initialize to {ALL} prior to dataflow */
-
- block->bbCseIn = EXPSET_ALL;
+ block->bbCseIn = BitVecOps::MakeCopy(cseTraits, cseFull);
}
- block->bbCseOut = EXPSET_ALL;
+
+ block->bbCseOut = BitVecOps::MakeCopy(cseTraits, cseFull);
/* Initialize to {ZERO} prior to locating the CSE candidates */
- block->bbCseGen = 0;
+ block->bbCseGen = BitVecOps::MakeCopy(cseTraits, cseEmpty);
}
// We walk the set of CSE candidates and set the bit corresponsing to the CSEindex
@@ -801,7 +809,7 @@ void Compiler::optValnumCSE_InitDataFlow()
while (lst != nullptr)
{
BasicBlock* block = lst->tslBlock;
- block->bbCseGen |= genCSEnum2bit(CSEindex);
+ BitVecOps::AddElemD(cseTraits, block->bbCseGen, genCSEnum2bit(CSEindex));
lst = lst->tslNext;
}
}
@@ -814,7 +822,7 @@ void Compiler::optValnumCSE_InitDataFlow()
bool headerPrinted = false;
for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
{
- if (block->bbCseGen != 0)
+ if (block->bbCseGen != nullptr)
{
if (!headerPrinted)
{
@@ -822,7 +830,7 @@ void Compiler::optValnumCSE_InitDataFlow()
headerPrinted = true;
}
printf("BB%02u", block->bbNum);
- printf(" cseGen = %s\n", genES2str(block->bbCseGen));
+ printf(" cseGen = %s\n", genES2str(cseTraits, block->bbCseGen));
}
}
}
@@ -857,21 +865,24 @@ public:
// At the start of the merge function of the dataflow equations, initialize premerge state (to detect changes.)
void StartMerge(BasicBlock* block)
{
- m_preMergeOut = block->bbCseOut;
+ m_preMergeOut = BitVecOps::MakeCopy(m_pCompiler->cseTraits, block->bbCseOut);
}
// During merge, perform the actual merging of the predecessor's (since this is a forward analysis) dataflow flags.
void Merge(BasicBlock* block, BasicBlock* predBlock, flowList* preds)
{
- block->bbCseIn &= predBlock->bbCseOut;
+ BitVecOps::IntersectionD(m_pCompiler->cseTraits, block->bbCseIn, predBlock->bbCseOut);
}
// At the end of the merge store results of the dataflow equations, in a postmerge state.
bool EndMerge(BasicBlock* block)
{
- EXPSET_TP mergeOut = block->bbCseOut & (block->bbCseIn | block->bbCseGen);
- block->bbCseOut = mergeOut;
- return (mergeOut != m_preMergeOut);
+ BitVecTraits* traits = m_pCompiler->cseTraits;
+ EXPSET_TP mergeOut = BitVecOps::MakeCopy(traits, block->bbCseIn);
+ BitVecOps::UnionD(traits, mergeOut, block->bbCseGen);
+ BitVecOps::IntersectionD(traits, mergeOut, block->bbCseOut);
+ BitVecOps::Assign(traits, block->bbCseOut, mergeOut);
+ return (!BitVecOps::Equal(traits, mergeOut, m_preMergeOut));
}
};
@@ -905,8 +916,8 @@ void Compiler::optValnumCSE_DataFlow()
for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
{
printf("BB%02u", block->bbNum);
- printf(" cseIn = %s", genES2str(block->bbCseIn));
- printf(" cseOut = %s", genES2str(block->bbCseOut));
+ printf(" cseIn = %s", genES2str(cseTraits, block->bbCseIn));
+ printf(" cseOut = %s", genES2str(cseTraits, block->bbCseOut));
printf("\n");
}
@@ -946,7 +957,7 @@ void Compiler::optValnumCSE_Availablity()
compCurBB = block;
- EXPSET_TP available_cses = block->bbCseIn;
+ EXPSET_TP available_cses = BitVecOps::MakeCopy(cseTraits, block->bbCseIn);
optCSEweight = block->getBBWeight(this);
@@ -961,13 +972,13 @@ void Compiler::optValnumCSE_Availablity()
{
if (IS_CSE_INDEX(tree->gtCSEnum))
{
- EXPSET_TP mask = genCSEnum2bit(tree->gtCSEnum);
- CSEdsc* desc = optCSEfindDsc(tree->gtCSEnum);
- unsigned stmw = block->getBBWeight(this);
+ unsigned int cseBit = genCSEnum2bit(tree->gtCSEnum);
+ CSEdsc* desc = optCSEfindDsc(tree->gtCSEnum);
+ unsigned stmw = block->getBBWeight(this);
/* Is this expression available here? */
- if (available_cses & mask)
+ if (BitVecOps::IsMember(cseTraits, available_cses, cseBit))
{
/* This is a CSE use */
@@ -993,8 +1004,7 @@ void Compiler::optValnumCSE_Availablity()
tree->gtCSEnum = TO_CSE_DEF(tree->gtCSEnum);
/* This CSE will be available after this def */
-
- available_cses |= mask;
+ BitVecOps::AddElemD(cseTraits, available_cses, cseBit);
}
#ifdef DEBUG
if (verbose && IS_CSE_INDEX(tree->gtCSEnum))
@@ -1236,6 +1246,7 @@ public:
{
printf("\nSorted CSE candidates:\n");
/* Print out the CSE candidates */
+ EXPSET_TP tempMask;
for (unsigned cnt = 0; cnt < m_pCompiler->optCSECandidateCount; cnt++)
{
Compiler::CSEdsc* dsc = sortTab[cnt];
@@ -1255,8 +1266,9 @@ public:
use = dsc->csdUseWtCnt; // weighted use count (excluding the implicit uses at defs)
}
+ tempMask = BitVecOps::MakeSingleton(m_pCompiler->cseTraits, genCSEnum2bit(dsc->csdIndex));
printf("CSE #%02u,cseMask=%s,useCnt=%d: [def=%3u, use=%3u", dsc->csdIndex,
- genES2str(genCSEnum2bit(dsc->csdIndex)), dsc->csdUseCount, def, use);
+ genES2str(m_pCompiler->cseTraits, tempMask), dsc->csdUseCount, def, use);
printf("] :: ");
m_pCompiler->gtDispTree(expr, nullptr, nullptr, true);
}
@@ -2038,7 +2050,7 @@ public:
assert(m_pCompiler->fgRemoveRestOfBlock == false);
/* re-morph the statement */
- m_pCompiler->fgMorphBlockStmt(blk, stm DEBUGARG("optValnumCSE"));
+ m_pCompiler->fgMorphBlockStmt(blk, stm->AsStmt() DEBUGARG("optValnumCSE"));
} while (lst != nullptr);
}
@@ -2516,8 +2528,6 @@ void Compiler::optCleanupCSEs()
//
for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
{
- unsigned blkFlags = block->bbFlags;
-
// And clear all the "visited" bits on the block
//
block->bbFlags &= ~(BBF_VISITED | BBF_MARKED);
diff --git a/src/jit/optimizer.cpp b/src/jit/optimizer.cpp
index 0fbdb27770..bd82f6a6f3 100644
--- a/src/jit/optimizer.cpp
+++ b/src/jit/optimizer.cpp
@@ -822,6 +822,10 @@ bool Compiler::optCheckIterInLoopTest(
if (limitOp->gtOper == GT_CNS_INT)
{
optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT;
+ if ((limitOp->gtFlags & GTF_ICON_SIMD_COUNT) != 0)
+ {
+ optLoopTable[loopInd].lpFlags |= LPFLG_SIMD_LIMIT;
+ }
}
else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum))
{
@@ -1081,9 +1085,24 @@ bool Compiler::optExtractInitTestIncr(
// If it is a duplicated loop condition, skip it.
if (init->gtFlags & GTF_STMT_CMPADD)
{
- // Must be a duplicated loop condition.
- noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
- init = init->gtPrev;
+ bool doGetPrev = true;
+#ifdef DEBUG
+ if (opts.optRepeat)
+ {
+ // Previous optimization passes may have inserted compiler-generated
+ // statements other than duplicated loop conditions.
+ doGetPrev = (init->gtPrev != nullptr);
+ }
+ else
+ {
+ // Must be a duplicated loop condition.
+ noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
+ }
+#endif // DEBUG
+ if (doGetPrev)
+ {
+ init = init->gtPrev;
+ }
noway_assert(init != nullptr);
}
@@ -1217,10 +1236,14 @@ void Compiler::optRecordLoop(BasicBlock* head,
}
// Make sure the "iterVar" initialization is never skipped,
- // i.e. HEAD dominates the ENTRY.
- if (!fgDominate(head, entry))
+ // i.e. every pred of ENTRY other than HEAD is in the loop.
+ for (flowList* predEdge = entry->bbPreds; predEdge; predEdge = predEdge->flNext)
{
- goto DONE_LOOP;
+ BasicBlock* predBlock = predEdge->flBlock;
+ if ((predBlock != head) && !optLoopTable[loopInd].lpContains(predBlock))
+ {
+ goto DONE_LOOP;
+ }
}
if (!optPopulateInitInfo(loopInd, init, iterVar))
@@ -2798,11 +2821,6 @@ void Compiler::optUnrollLoops()
}
#endif
- if (optCanCloneLoops())
- {
- return;
- }
-
#ifdef DEBUG
if (verbose)
{
@@ -2811,276 +2829,266 @@ void Compiler::optUnrollLoops()
#endif
/* Look for loop unrolling candidates */
- /* Double loop so that after unrolling an inner loop we set change to true
- * and we then go back over all of the loop candidates and try to unroll
- * the next outer loop, until we don't unroll any loops,
- * then change will be false and we are done.
- */
- for (;;)
- {
- bool change = false;
+ bool change = false;
+
+ // Visit loops from highest to lowest number to vist them in innermost
+ // to outermost order
+ for (unsigned lnum = optLoopCount - 1; lnum != ~0U; --lnum)
+ {
+ BasicBlock* block;
+ BasicBlock* head;
+ BasicBlock* bottom;
+
+ GenTree* loop;
+ GenTree* test;
+ GenTree* incr;
+ GenTree* phdr;
+ GenTree* init;
+
+ bool dupCond;
+ int lval;
+ int lbeg; // initial value for iterator
+ int llim; // limit value for iterator
+ unsigned lvar; // iterator lclVar #
+ int iterInc; // value to increment the iterator
+ genTreeOps iterOper; // type of iterator increment (i.e. ADD, SUB, etc.)
+ var_types iterOperType; // type result of the oper (for overflow instrs)
+ genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.)
+ bool unsTest; // Is the comparison u/int
+
+ unsigned loopRetCount; // number of BBJ_RETURN blocks in loop
+ unsigned totalIter; // total number of iterations in the constant loop
+ unsigned loopFlags; // actual lpFlags
+ unsigned requiredFlags; // required lpFlags
+
+ static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = {
+ 10, // BLENDED_CODE
+ 0, // SMALL_CODE
+ 20, // FAST_CODE
+ 0 // COUNT_OPT_CODE
+ };
+
+ noway_assert(ITER_LIMIT[SMALL_CODE] == 0);
+ noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0);
+
+ unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()];
- for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
+#ifdef DEBUG
+ if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
{
- BasicBlock* block;
- BasicBlock* head;
- BasicBlock* bottom;
-
- GenTree* loop;
- GenTree* test;
- GenTree* incr;
- GenTree* phdr;
- GenTree* init;
-
- bool dupCond;
- int lval;
- int lbeg; // initial value for iterator
- int llim; // limit value for iterator
- unsigned lvar; // iterator lclVar #
- int iterInc; // value to increment the iterator
- genTreeOps iterOper; // type of iterator increment (i.e. ASG_ADD, ASG_SUB, etc.)
- var_types iterOperType; // type result of the oper (for overflow instrs)
- genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.)
- bool unsTest; // Is the comparison u/int
-
- unsigned totalIter; // total number of iterations in the constant loop
- unsigned loopCostSz; // Cost is size of one iteration
- unsigned loopFlags; // actual lpFlags
- unsigned requiredFlags; // required lpFlags
+ iterLimit *= 10;
+ }
+#endif
- GenTree* loopList; // new stmt list of the unrolled loop
- GenTree* loopLast;
+ static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = {
+ 300, // BLENDED_CODE
+ 0, // SMALL_CODE
+ 600, // FAST_CODE
+ 0 // COUNT_OPT_CODE
+ };
- static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = {
- 10, // BLENDED_CODE
- 0, // SMALL_CODE
- 20, // FAST_CODE
- 0 // COUNT_OPT_CODE
- };
+ noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0);
+ noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0);
- noway_assert(ITER_LIMIT[SMALL_CODE] == 0);
- noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0);
+ int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()];
- unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()];
+ loopFlags = optLoopTable[lnum].lpFlags;
+ // Check for required flags:
+ // LPFLG_DO_WHILE - required because this transform only handles loops of this form
+ // LPFLG_CONST - required because this transform only handles full unrolls
+ // LPFLG_SIMD_LIMIT - included here as a heuristic, not for correctness/structural reasons
+ requiredFlags = LPFLG_DO_WHILE | LPFLG_CONST | LPFLG_SIMD_LIMIT;
#ifdef DEBUG
- if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
- {
- iterLimit *= 10;
- }
-#endif
-
- static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = {
- 30, // BLENDED_CODE
- 0, // SMALL_CODE
- 60, // FAST_CODE
- 0 // COUNT_OPT_CODE
- };
-
- noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0);
- noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0);
-
- int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()];
+ if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
+ {
+ // In stress mode, quadruple the size limit, and drop
+ // the restriction that loop limit must be Vector<T>.Count.
-#ifdef DEBUG
- if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
- {
- unrollLimitSz *= 10;
- }
+ unrollLimitSz *= 4;
+ requiredFlags &= ~LPFLG_SIMD_LIMIT;
+ }
#endif
- loopFlags = optLoopTable[lnum].lpFlags;
- requiredFlags = LPFLG_DO_WHILE | LPFLG_ONE_EXIT | LPFLG_CONST;
+ /* Ignore the loop if we don't have a do-while
+ that has a constant number of iterations */
- /* Ignore the loop if we don't have a do-while with a single exit
- that has a constant number of iterations */
-
- if ((loopFlags & requiredFlags) != requiredFlags)
- {
- continue;
- }
+ if ((loopFlags & requiredFlags) != requiredFlags)
+ {
+ continue;
+ }
- /* ignore if removed or marked as not unrollable */
+ /* ignore if removed or marked as not unrollable */
- if (optLoopTable[lnum].lpFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED))
- {
- continue;
- }
+ if (loopFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED))
+ {
+ continue;
+ }
- head = optLoopTable[lnum].lpHead;
- noway_assert(head);
- bottom = optLoopTable[lnum].lpBottom;
- noway_assert(bottom);
+ head = optLoopTable[lnum].lpHead;
+ noway_assert(head);
+ bottom = optLoopTable[lnum].lpBottom;
+ noway_assert(bottom);
- /* The single exit must be at the bottom of the loop */
- noway_assert(optLoopTable[lnum].lpExit);
- if (optLoopTable[lnum].lpExit != bottom)
- {
- continue;
- }
+ /* Get the loop data:
+ - initial constant
+ - limit constant
+ - iterator
+ - iterator increment
+ - increment operation type (i.e. ADD, SUB, etc...)
+ - loop test type (i.e. GT_GE, GT_LT, etc...)
+ */
- /* Unrolling loops with jumps in them is not worth the headache
- * Later we might consider unrolling loops after un-switching */
+ lbeg = optLoopTable[lnum].lpConstInit;
+ llim = optLoopTable[lnum].lpConstLimit();
+ testOper = optLoopTable[lnum].lpTestOper();
- block = head;
- do
- {
- block = block->bbNext;
- noway_assert(block);
+ lvar = optLoopTable[lnum].lpIterVar();
+ iterInc = optLoopTable[lnum].lpIterConst();
+ iterOper = optLoopTable[lnum].lpIterOper();
- if (block->bbJumpKind != BBJ_NONE)
- {
- if (block != bottom)
- {
- goto DONE_LOOP;
- }
- }
- } while (block != bottom);
+ iterOperType = optLoopTable[lnum].lpIterOperType();
+ unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;
- /* Get the loop data:
- - initial constant
- - limit constant
- - iterator
- - iterator increment
- - increment operation type (i.e. ASG_ADD, ASG_SUB, etc...)
- - loop test type (i.e. GT_GE, GT_LT, etc...)
- */
+ if (lvaTable[lvar].lvAddrExposed)
+ { // If the loop iteration variable is address-exposed then bail
+ continue;
+ }
+ if (lvaTable[lvar].lvIsStructField)
+ { // If the loop iteration variable is a promoted field from a struct then
+ // bail
+ continue;
+ }
- lbeg = optLoopTable[lnum].lpConstInit;
- llim = optLoopTable[lnum].lpConstLimit();
- testOper = optLoopTable[lnum].lpTestOper();
+ /* Locate the pre-header and initialization and increment/test statements */
- lvar = optLoopTable[lnum].lpIterVar();
- iterInc = optLoopTable[lnum].lpIterConst();
- iterOper = optLoopTable[lnum].lpIterOper();
+ phdr = head->bbTreeList;
+ noway_assert(phdr);
+ loop = bottom->bbTreeList;
+ noway_assert(loop);
- iterOperType = optLoopTable[lnum].lpIterOperType();
- unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;
+ init = head->lastStmt();
+ noway_assert(init && (init->gtNext == nullptr));
+ test = bottom->lastStmt();
+ noway_assert(test && (test->gtNext == nullptr));
+ incr = test->gtPrev;
+ noway_assert(incr);
- if (lvaTable[lvar].lvAddrExposed)
- { // If the loop iteration variable is address-exposed then bail
- continue;
- }
- if (lvaTable[lvar].lvIsStructField)
- { // If the loop iteration variable is a promoted field from a struct then
- // bail
- continue;
- }
+ if (init->gtFlags & GTF_STMT_CMPADD)
+ {
+ /* Must be a duplicated loop condition */
+ noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
- /* Locate the pre-header and initialization and increment/test statements */
+ dupCond = true;
+ init = init->gtPrev;
+ noway_assert(init);
+ }
+ else
+ {
+ dupCond = false;
+ }
- phdr = head->bbTreeList;
- noway_assert(phdr);
- loop = bottom->bbTreeList;
- noway_assert(loop);
+ /* Find the number of iterations - the function returns false if not a constant number */
- init = head->lastStmt();
- noway_assert(init && (init->gtNext == nullptr));
- test = bottom->lastStmt();
- noway_assert(test && (test->gtNext == nullptr));
- incr = test->gtPrev;
- noway_assert(incr);
+ if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter))
+ {
+ continue;
+ }
- if (init->gtFlags & GTF_STMT_CMPADD)
- {
- /* Must be a duplicated loop condition */
- noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
+ /* Forget it if there are too many repetitions or not a constant loop */
- dupCond = true;
- init = init->gtPrev;
- noway_assert(init);
- }
- else
- {
- dupCond = false;
- }
+ if (totalIter > iterLimit)
+ {
+ continue;
+ }
- /* Find the number of iterations - the function returns false if not a constant number */
+ noway_assert(init->gtOper == GT_STMT);
+ init = init->gtStmt.gtStmtExpr;
+ noway_assert(test->gtOper == GT_STMT);
+ test = test->gtStmt.gtStmtExpr;
+ noway_assert(incr->gtOper == GT_STMT);
+ incr = incr->gtStmt.gtStmtExpr;
- if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter))
- {
- continue;
- }
+ // Don't unroll loops we don't understand.
+ if (incr->gtOper != GT_ASG)
+ {
+ continue;
+ }
+ incr = incr->gtOp.gtOp2;
- /* Forget it if there are too many repetitions or not a constant loop */
+ /* Make sure everything looks ok */
+ if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
+ (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
+ (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) ||
- if (totalIter > iterLimit)
- {
- continue;
- }
+ !((incr->gtOper == GT_ADD) || (incr->gtOper == GT_SUB)) || (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
+ (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
+ (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) ||
- noway_assert(init->gtOper == GT_STMT);
- init = init->gtStmt.gtStmtExpr;
- noway_assert(test->gtOper == GT_STMT);
- test = test->gtStmt.gtStmtExpr;
- noway_assert(incr->gtOper == GT_STMT);
- incr = incr->gtStmt.gtStmtExpr;
+ (test->gtOper != GT_JTRUE))
+ {
+ noway_assert(!"Bad precondition in Compiler::optUnrollLoops()");
+ continue;
+ }
- // Don't unroll loops we don't understand.
- if (incr->gtOper == GT_ASG)
- {
- continue;
- }
+ /* heuristic - Estimated cost in code size of the unrolled loop */
- /* Make sure everything looks ok */
- if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
- (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
- (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) ||
+ {
+ ClrSafeInt<unsigned> loopCostSz; // Cost is size of one iteration
- !((incr->gtOper == GT_ASG_ADD) || (incr->gtOper == GT_ASG_SUB)) ||
- (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) || (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) ||
- (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) || (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) ||
+ block = head->bbNext;
+ auto tryIndex = block->bbTryIndex;
- (test->gtOper != GT_JTRUE))
+ loopRetCount = 0;
+ for (;; block = block->bbNext)
{
- noway_assert(!"Bad precondition in Compiler::optUnrollLoops()");
- continue;
- }
-
- /* heuristic - Estimated cost in code size of the unrolled loop */
-
- loopCostSz = 0;
-
- block = head;
+ if (block->bbTryIndex != tryIndex)
+ {
+ // Unrolling would require cloning EH regions
+ goto DONE_LOOP;
+ }
- do
- {
- block = block->bbNext;
+ if (block->bbJumpKind == BBJ_RETURN)
+ {
+ ++loopRetCount;
+ }
/* Visit all the statements in the block */
for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
{
- /* Get the expression and stop if end reached */
-
- GenTreePtr expr = stmt->gtStmtExpr;
- if (expr == incr)
- {
- break;
- }
-
/* Calculate gtCostSz */
gtSetStmtInfo(stmt);
/* Update loopCostSz */
loopCostSz += stmt->gtCostSz;
}
- } while (block != bottom);
+
+ if (block == bottom)
+ {
+ break;
+ }
+ }
+
+#ifdef JIT32_GCENCODER
+ if (fgReturnCount + loopRetCount * (totalIter - 1) > SET_EPILOGCNT_MAX)
+ {
+ // Jit32 GC encoder can't report more than SET_EPILOGCNT_MAX epilogs.
+ goto DONE_LOOP;
+ }
+#endif // !JIT32_GCENCODER
/* Compute the estimated increase in code size for the unrolled loop */
- unsigned int fixedLoopCostSz;
- fixedLoopCostSz = 8;
+ ClrSafeInt<unsigned> fixedLoopCostSz(8);
- int unrollCostSz;
- unrollCostSz = (loopCostSz * totalIter) - (loopCostSz + fixedLoopCostSz);
+ ClrSafeInt<int> unrollCostSz = ClrSafeInt<int>(loopCostSz * ClrSafeInt<unsigned>(totalIter)) -
+ ClrSafeInt<int>(loopCostSz + fixedLoopCostSz);
/* Don't unroll if too much code duplication would result. */
- if (unrollCostSz > unrollLimitSz)
+ if (unrollCostSz.IsOverflow() || (unrollCostSz.Value() > unrollLimitSz))
{
- /* prevent this loop from being revisited */
- optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
goto DONE_LOOP;
}
@@ -3100,76 +3108,81 @@ void Compiler::optUnrollLoops()
printf("\n");
}
#endif
+ }
- /* Create the unrolled loop statement list */
-
- loopList = loopLast = nullptr;
+ /* Create the unrolled loop statement list */
+ {
+ BlockToBlockMap blockMap(getAllocator());
+ BasicBlock* insertAfter = bottom;
for (lval = lbeg; totalIter; totalIter--)
{
- block = head;
-
- do
+ for (block = head->bbNext;; block = block->bbNext)
{
- GenTreeStmt* stmt;
- GenTree* expr;
-
- block = block->bbNext;
- noway_assert(block);
+ BasicBlock* newBlock = insertAfter =
+ fgNewBBafter(block->bbJumpKind, insertAfter, /*extendRegion*/ true);
+ blockMap.Set(block, newBlock);
- /* Visit all the statements in the block */
-
- for (stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+ if (!BasicBlock::CloneBlockState(this, newBlock, block, lvar, lval))
{
- /* Stop if we've reached the end of the loop */
-
- if (stmt->gtStmtExpr == incr)
- {
- break;
- }
-
- /* Clone/substitute the expression */
-
- expr = gtCloneExpr(stmt, 0, lvar, lval);
-
// cloneExpr doesn't handle everything
+ BasicBlock* oldBottomNext = insertAfter->bbNext;
+ bottom->bbNext = oldBottomNext;
+ oldBottomNext->bbPrev = bottom;
+ optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
+ goto DONE_LOOP;
+ }
+ // Block weight should no longer have the loop multiplier
+ newBlock->modifyBBWeight(newBlock->bbWeight / BB_LOOP_WEIGHT);
+ // Jump dests are set in a post-pass; make sure CloneBlockState hasn't tried to set them.
+ assert(newBlock->bbJumpDest == nullptr);
- if (!expr)
- {
- optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
- goto DONE_LOOP;
- }
-
- /* Append the expression to our list */
-
- if (loopList)
+ if (block == bottom)
+ {
+ // Remove the test; we're doing a full unroll.
+
+ GenTreeStmt* testCopyStmt = newBlock->lastStmt();
+ GenTreePtr testCopyExpr = testCopyStmt->gtStmt.gtStmtExpr;
+ assert(testCopyExpr->gtOper == GT_JTRUE);
+ GenTreePtr sideEffList = nullptr;
+ gtExtractSideEffList(testCopyExpr, &sideEffList, GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF);
+ if (sideEffList == nullptr)
{
- loopLast->gtNext = expr;
+ fgRemoveStmt(newBlock, testCopyStmt);
}
else
{
- loopList = expr;
+ testCopyStmt->gtStmt.gtStmtExpr = sideEffList;
}
+ newBlock->bbJumpKind = BBJ_NONE;
- expr->gtPrev = loopLast;
- loopLast = expr;
+ // Exit this loop; we've walked all the blocks.
+ break;
}
- } while (block != bottom);
+ }
+
+ // Now redirect any branches within the newly-cloned iteration
+ for (block = head->bbNext; block != bottom; block = block->bbNext)
+ {
+ BasicBlock* newBlock = blockMap[block];
+ optCopyBlkDest(block, newBlock);
+ optRedirectBlock(newBlock, &blockMap);
+ }
/* update the new value for the unrolled iterator */
switch (iterOper)
{
- case GT_ASG_ADD:
+ case GT_ADD:
lval += iterInc;
break;
- case GT_ASG_SUB:
+ case GT_SUB:
lval -= iterInc;
break;
- case GT_ASG_RSH:
- case GT_ASG_LSH:
+ case GT_RSH:
+ case GT_LSH:
noway_assert(!"Unrolling not implemented for this loop iterator");
goto DONE_LOOP;
@@ -3179,46 +3192,22 @@ void Compiler::optUnrollLoops()
}
}
- /* Finish the linked list */
-
- if (loopList)
+ // Gut the old loop body
+ for (block = head->bbNext;; block = block->bbNext)
{
- loopList->gtPrev = loopLast;
- loopLast->gtNext = nullptr;
- }
-
- /* Replace the body with the unrolled one */
-
- block = head;
-
- do
- {
- block = block->bbNext;
- noway_assert(block);
block->bbTreeList = nullptr;
block->bbJumpKind = BBJ_NONE;
- block->bbFlags &= ~BBF_NEEDS_GCPOLL;
- } while (block != bottom);
-
- bottom->bbJumpKind = BBJ_NONE;
- bottom->bbTreeList = loopList;
- bottom->bbFlags &= ~BBF_NEEDS_GCPOLL;
- bottom->modifyBBWeight(bottom->bbWeight / BB_LOOP_WEIGHT);
-
- bool dummy;
-
- fgMorphStmts(bottom, &dummy, &dummy, &dummy);
-
- /* Update bbRefs and bbPreds */
- /* Here head->bbNext is bottom !!! - Replace it */
-
- fgRemoveRefPred(head->bbNext, bottom);
-
- /* Now change the initialization statement in the HEAD to "lvar = lval;"
- * (the last value of the iterator in the loop)
- * and drop the jump condition since the unrolled loop will always execute */
+ block->bbFlags &= ~(BBF_NEEDS_GCPOLL | BBF_LOOP_HEAD);
+ if (block->bbJumpDest != nullptr)
+ {
+ block->bbJumpDest = nullptr;
+ }
- init->gtOp.gtOp2->gtIntCon.gtIconVal = lval;
+ if (block == bottom)
+ {
+ break;
+ }
+ }
/* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */
@@ -3240,10 +3229,6 @@ void Compiler::optUnrollLoops()
phdr->gtPrev = init;
head->bbJumpKind = BBJ_NONE;
head->bbFlags &= ~BBF_NEEDS_GCPOLL;
-
- /* Update bbRefs and bbPreds */
-
- fgRemoveRefPred(head->bbJumpDest, head);
}
else
{
@@ -3256,18 +3241,9 @@ void Compiler::optUnrollLoops()
{
printf("Whole unrolled loop:\n");
- GenTreePtr s = loopList;
-
- while (s)
- {
- noway_assert(s->gtOper == GT_STMT);
- gtDispTree(s);
- s = s->gtNext;
- }
- printf("\n");
-
gtDispTree(init);
printf("\n");
+ fgDumpTrees(head->bbNext, insertAfter);
}
#endif
@@ -3278,22 +3254,25 @@ void Compiler::optUnrollLoops()
/* Make sure to update loop table */
/* Use the LPFLG_REMOVED flag and update the bbLoopMask acordingly
- * (also make head and bottom NULL - to hit an assert or GPF) */
+ * (also make head and bottom NULL - to hit an assert or GPF) */
optLoopTable[lnum].lpFlags |= LPFLG_REMOVED;
optLoopTable[lnum].lpHead = optLoopTable[lnum].lpBottom = nullptr;
- DONE_LOOP:;
+ // Note if we created new BBJ_RETURNs
+ fgReturnCount += loopRetCount * (totalIter - 1);
}
- if (!change)
- {
- break;
- }
+ DONE_LOOP:;
+ }
+
+ if (change)
+ {
+ fgUpdateChangedFlowGraph();
}
#ifdef DEBUG
- fgDebugCheckBBlist();
+ fgDebugCheckBBlist(true);
#endif
}
#ifdef _PREFAST_
@@ -3639,12 +3618,10 @@ void Compiler::fgOptWhileLoop(BasicBlock* block)
copyOfCondStmt->gtFlags |= GTF_STMT_CMPADD;
-#ifdef DEBUGGING_SUPPORT
if (opts.compDbgInfo)
{
copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx;
}
-#endif
// Flag the block that received the copy as potentially having an array/vtable
// reference if the block copied from did; this is a conservative guess.
@@ -4265,7 +4242,7 @@ void Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTreePtr insertBefore
GenTreePtr logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID);
GenTreePtr stmt = fgNewStmtFromTree(logCall);
fgInsertStmtBefore(block, insertBefore, stmt);
- fgMorphBlockStmt(block, stmt DEBUGARG("Debug log loop cloning"));
+ fgMorphBlockStmt(block, stmt->AsStmt() DEBUGARG("Debug log loop cloning"));
}
#endif
@@ -4394,14 +4371,18 @@ bool Compiler::optIsLoopClonable(unsigned loopInd)
}
// We've previously made a decision whether to have separate return epilogs, or branch to one.
- // There's a GCInfo limitation in the x86 case, so that there can be no more than 4 separate epilogs.
- // (I thought this was x86-specific, but it's not if-d. On other architectures, the decision should be made as a
- // heuristic tradeoff; perhaps we're just choosing to live with 4 as the limit.)
- if (fgReturnCount + loopRetCount > 4)
+ // There's a GCInfo limitation in the x86 case, so that there can be no more than SET_EPILOGCNT_MAX separate
+ // epilogs. Other architectures have a limit of 4 here for "historical reasons", but this should be revisited
+ // (or return blocks should not be considered part of the loop, rendering this issue moot).
+ unsigned epilogLimit = 4;
+#ifdef JIT32_GCENCODER
+ epilogLimit = SET_EPILOGCNT_MAX;
+#endif // JIT32_GCENCODER
+ if (fgReturnCount + loopRetCount > epilogLimit)
{
JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, "
- "would exceed the limit of 4.\n",
- loopRetCount, fgReturnCount);
+ "would exceed the limit of %d.\n",
+ loopRetCount, fgReturnCount, epilogLimit);
return false;
}
@@ -4642,7 +4623,11 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind, newPred,
/*extendRegion*/ true);
- BasicBlock::CloneBlockState(this, newBlk, blk);
+ // Call CloneBlockState to make a copy of the block's statements (and attributes), and assert that it
+ // has a return value indicating success, because optCanOptimizeByLoopCloningVisitor has already
+ // checked them to guarantee they are clonable.
+ bool cloneOk = BasicBlock::CloneBlockState(this, newBlk, blk);
+ noway_assert(cloneOk);
// TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert
// the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding
// loop, if one exists -- the parent of the loop we're cloning.
@@ -4716,6 +4701,12 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
}
assert(foundIt && e2 != nullptr);
+ // Don't unroll loops that we've cloned -- the unroller expects any loop it should unroll to
+ // initialize the loop counter immediately before entering the loop, but we've left a shared
+ // initialization of the loop counter up above the test that determines which version of the
+ // loop to take.
+ optLoopTable[loopInd].lpFlags |= LPFLG_DONT_UNROLL;
+
fgUpdateChangedFlowGraph();
}
@@ -6226,9 +6217,28 @@ bool Compiler::optHoistLoopExprsForTree(
// be hoisted so that they are evaluated in the same order as they would have been in the loop,
// and therefore throw exceptions in the same order. (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS
// here, since that includes exceptions.)
- if (tree->gtFlags & GTF_CALL)
+ if (tree->IsCall())
{
- *pFirstBlockAndBeforeSideEffect = false;
+ // If it's a call, it must be a helper call that does not mutate the heap.
+ // Further, if it may run a cctor, it must be labeled as "Hoistable"
+ // (meaning it won't run a cctor because the class is not precise-init).
+ GenTreeCall* call = tree->AsCall();
+ if (call->gtCallType != CT_HELPER)
+ {
+ *pFirstBlockAndBeforeSideEffect = false;
+ }
+ else
+ {
+ CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
+ if (s_helperCallProperties.MutatesHeap(helpFunc))
+ {
+ *pFirstBlockAndBeforeSideEffect = false;
+ }
+ else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
+ {
+ *pFirstBlockAndBeforeSideEffect = false;
+ }
+ }
}
else if (tree->OperIsAssignment())
{
@@ -6748,15 +6758,17 @@ void Compiler::fgCreateLoopPreHeader(unsigned lnum)
bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum)
{
- unsigned lnum = blk->bbNatLoopNum;
- while (lnum != BasicBlock::NOT_IN_LOOP)
+ for (unsigned lnum = blk->bbNatLoopNum; lnum != BasicBlock::NOT_IN_LOOP; lnum = optLoopTable[lnum].lpParent)
{
+ if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
+ {
+ continue;
+ }
if (optLoopTable[lnum].lpEntry == blk)
{
*pLnum = lnum;
return true;
}
- lnum = optLoopTable[lnum].lpParent;
}
return false;
}
@@ -7239,7 +7251,7 @@ void Compiler::optRemoveRangeCheck(
noway_assert(stmt->gtOper == GT_STMT);
noway_assert(tree->gtOper == GT_COMMA);
- noway_assert(tree->gtOp.gtOp1->gtOper == GT_ARR_BOUNDS_CHECK);
+ noway_assert(tree->gtOp.gtOp1->OperIsBoundsCheck());
noway_assert(forceRemove || optIsRangeCheckRemovable(tree->gtOp.gtOp1));
GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk();
diff --git a/src/jit/protojit/CMakeLists.txt b/src/jit/protojit/CMakeLists.txt
index e3cc769ba0..91c69e9a83 100644
--- a/src/jit/protojit/CMakeLists.txt
+++ b/src/jit/protojit/CMakeLists.txt
@@ -3,8 +3,13 @@ project(protojit)
add_definitions(-DALT_JIT)
add_definitions(-DFEATURE_NO_HOST)
add_definitions(-DSELF_NO_HOST)
+add_definitions(-DFEATURE_READYTORUN_COMPILER)
remove_definitions(-DFEATURE_MERGE_JIT_AND_ENGINE)
+if(WIN32)
+ add_definitions(-DFX_VER_INTERNALNAME_STR=protojit.dll)
+endif(WIN32)
+
add_library_clr(protojit
SHARED
${SHARED_LIB_SOURCES}
@@ -28,7 +33,8 @@ if(CLR_CMAKE_PLATFORM_UNIX)
)
else()
list(APPEND RYUJIT_LINK_LIBRARIES
- msvcrt.lib
+ ${STATIC_MT_CRT_LIB}
+ ${STATIC_MT_VCRT_LIB}
kernel32.lib
advapi32.lib
ole32.lib
diff --git a/src/jit/rangecheck.cpp b/src/jit/rangecheck.cpp
index ae0c792f11..8d16cce31a 100644
--- a/src/jit/rangecheck.cpp
+++ b/src/jit/rangecheck.cpp
@@ -208,7 +208,7 @@ void RangeCheck::OptimizeRangeCheck(BasicBlock* block, GenTreePtr stmt, GenTreeP
// If we are not looking at array bounds check, bail.
GenTreePtr tree = treeParent->gtOp.gtOp1;
- if (tree->gtOper != GT_ARR_BOUNDS_CHECK)
+ if (!tree->OperIsBoundsCheck())
{
return;
}
@@ -233,6 +233,9 @@ void RangeCheck::OptimizeRangeCheck(BasicBlock* block, GenTreePtr stmt, GenTreeP
}
}
else
+#ifdef FEATURE_SIMD
+ if (tree->gtOper != GT_SIMD_CHK)
+#endif // FEATURE_SIMD
{
arrSize = GetArrLength(arrLenVn);
}
diff --git a/src/jit/rationalize.cpp b/src/jit/rationalize.cpp
index 03e0c9a27e..7f5a26fa1f 100644
--- a/src/jit/rationalize.cpp
+++ b/src/jit/rationalize.cpp
@@ -16,44 +16,6 @@ struct SplitData
Rationalizer* thisPhase;
};
-//------------------------------------------------------------------------------
-// isNodeCallArg - given a context (stack of parent nodes), determine if the TOS is an arg to a call
-//------------------------------------------------------------------------------
-
-GenTree* isNodeCallArg(ArrayStack<GenTree*>* parentStack)
-{
- for (int i = 1; // 0 is current node, so start at 1
- i < parentStack->Height(); i++)
- {
- GenTree* node = parentStack->Index(i);
- switch (node->OperGet())
- {
- case GT_LIST:
- case GT_ARGPLACE:
- break;
- case GT_NOP:
- // Currently there's an issue when the rationalizer performs
- // the fixup of a call argument: the case is when we remove an
- // inserted NOP as a parent of a call introduced by fgMorph;
- // when then the rationalizer removes it, the tree stack in the
- // walk is not consistent with the node it was just deleted, so the
- // solution is just to go 1 level deeper.
- // TODO-Cleanup: This has to be fixed in a proper way: make the rationalizer
- // correctly modify the evaluation stack when removing treenodes.
- if (node->gtOp.gtOp1->gtOper == GT_CALL)
- {
- return node->gtOp.gtOp1;
- }
- break;
- case GT_CALL:
- return node;
- default:
- return nullptr;
- }
- }
- return nullptr;
-}
-
// return op that is the store equivalent of the given load opcode
genTreeOps storeForm(genTreeOps loadForm)
{
@@ -109,54 +71,6 @@ void copyFlags(GenTree* dst, GenTree* src, unsigned mask)
dst->gtFlags |= (src->gtFlags & mask);
}
-// call args have other pointers to them which must be fixed up if
-// they are replaced
-void Compiler::fgFixupIfCallArg(ArrayStack<GenTree*>* parentStack, GenTree* oldChild, GenTree* newChild)
-{
- GenTree* parentCall = isNodeCallArg(parentStack);
- if (!parentCall)
- {
- return;
- }
-
- // we have replaced an arg, so update pointers in argtable
- fgFixupArgTabEntryPtr(parentCall, oldChild, newChild);
-}
-
-//------------------------------------------------------------------------
-// fgFixupArgTabEntryPtr: Fixup the fgArgTabEntryPtr of parentCall after
-// replacing oldArg with newArg
-//
-// Arguments:
-// parentCall - a pointer to the parent call node
-// oldArg - the original argument node
-// newArg - the replacement argument node
-//
-
-void Compiler::fgFixupArgTabEntryPtr(GenTreePtr parentCall, GenTreePtr oldArg, GenTreePtr newArg)
-{
- assert(parentCall != nullptr);
- assert(oldArg != nullptr);
- assert(newArg != nullptr);
-
- JITDUMP("parent call was :\n");
- DISPNODE(parentCall);
-
- JITDUMP("old child was :\n");
- DISPNODE(oldArg);
-
- if (oldArg->gtFlags & GTF_LATE_ARG)
- {
- newArg->gtFlags |= GTF_LATE_ARG;
- }
- else
- {
- fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(parentCall, oldArg);
- assert(fp->node == oldArg);
- fp->node = newArg;
- }
-}
-
// Rewrite a SIMD indirection as GT_IND(GT_LEA(obj.op1)), or as a simple
// lclVar if possible.
//
@@ -191,8 +105,8 @@ void Rationalizer::RewriteSIMDOperand(LIR::Use& use, bool keepBlk)
return;
}
- // If the operand of is a GT_ADDR(GT_LCL_VAR) and LclVar is known to be of simdType,
- // replace obj by GT_LCL_VAR.
+ // If we have GT_IND(GT_LCL_VAR_ADDR) and the GT_LCL_VAR_ADDR is TYP_BYREF/TYP_I_IMPL,
+ // and the var is a SIMD type, replace the expression by GT_LCL_VAR.
GenTree* addr = tree->AsIndir()->Addr();
if (addr->OperIsLocalAddr() && comp->isAddrOfSIMDType(addr))
{
@@ -202,6 +116,17 @@ void Rationalizer::RewriteSIMDOperand(LIR::Use& use, bool keepBlk)
addr->gtType = simdType;
use.ReplaceWith(comp, addr);
}
+#if defined(_TARGET_X86_)
+ // For x86, if we have GT_IND(GT_ADDR(GT_SIMD)), remove the GT_IND(GT_ADDR()), leaving just
+ // the GT_SIMD.
+ else if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->OperGet() == GT_SIMD))
+ {
+ BlockRange().Remove(tree);
+ BlockRange().Remove(addr);
+
+ use.ReplaceWith(comp, addr->gtGetOp1());
+ }
+#endif // defined(_TARGET_X86_)
else if (!keepBlk)
{
tree->SetOper(GT_IND);
@@ -242,13 +167,32 @@ void Rationalizer::RewriteNodeAsCall(GenTree** use,
// Create the call node
GenTreeCall* call = comp->gtNewCallNode(CT_USER_FUNC, callHnd, tree->gtType, args);
- call = comp->fgMorphArgs(call);
+
+#if DEBUG
+ CORINFO_SIG_INFO sig;
+ comp->eeGetMethodSig(callHnd, &sig);
+ assert(JITtype2varType(sig.retType) == tree->gtType);
+#endif // DEBUG
+
+ call = comp->fgMorphArgs(call);
+ // Determine if this call has changed any codegen requirements.
+ comp->fgCheckArgCnt();
+
#ifdef FEATURE_READYTORUN_COMPILER
call->gtCall.setEntryPoint(entryPoint);
#endif
// Replace "tree" with "call"
- *use = call;
+ if (data->parentStack->Height() > 1)
+ {
+ data->parentStack->Index(1)->ReplaceOperand(use, call);
+ }
+ else
+ {
+ // If there's no parent, the tree being replaced is the root of the
+ // statement (and no special handling is necessary).
+ *use = call;
+ }
// Rebuild the evaluation order.
comp->gtSetStmtInfo(root);
@@ -278,8 +222,6 @@ void Rationalizer::RewriteNodeAsCall(GenTree** use,
treeNextNode->gtPrev = treeLastNode;
}
- comp->fgFixupIfCallArg(data->parentStack, tree, call);
-
// Propagate flags of "call" to its parents.
// 0 is current node, so start at 1
for (int i = 1; i < data->parentStack->Height(); i++)
@@ -510,33 +452,77 @@ void Rationalizer::RewriteAssignment(LIR::Use& use)
genTreeOps locationOp = location->OperGet();
-#ifdef FEATURE_SIMD
- if (varTypeIsSIMD(location) && assignment->OperIsInitBlkOp())
+ if (assignment->OperIsBlkOp())
{
- if (location->OperGet() == GT_LCL_VAR)
+#ifdef FEATURE_SIMD
+ if (varTypeIsSIMD(location) && assignment->OperIsInitBlkOp())
{
- var_types simdType = location->TypeGet();
- GenTree* initVal = assignment->gtOp.gtOp2;
- var_types baseType = comp->getBaseTypeOfSIMDLocal(location);
- if (baseType != TYP_UNKNOWN)
+ if (location->OperGet() == GT_LCL_VAR)
{
- GenTreeSIMD* simdTree = new (comp, GT_SIMD)
- GenTreeSIMD(simdType, initVal, SIMDIntrinsicInit, baseType, genTypeSize(simdType));
- assignment->gtOp.gtOp2 = simdTree;
- value = simdTree;
- initVal->gtNext = simdTree;
- simdTree->gtPrev = initVal;
-
- simdTree->gtNext = location;
- location->gtPrev = simdTree;
+ var_types simdType = location->TypeGet();
+ GenTree* initVal = assignment->gtOp.gtOp2;
+ var_types baseType = comp->getBaseTypeOfSIMDLocal(location);
+ if (baseType != TYP_UNKNOWN)
+ {
+ GenTreeSIMD* simdTree = new (comp, GT_SIMD)
+ GenTreeSIMD(simdType, initVal, SIMDIntrinsicInit, baseType, genTypeSize(simdType));
+ assignment->gtOp.gtOp2 = simdTree;
+ value = simdTree;
+ initVal->gtNext = simdTree;
+ simdTree->gtPrev = initVal;
+
+ simdTree->gtNext = location;
+ location->gtPrev = simdTree;
+ }
}
}
- else
+#endif // FEATURE_SIMD
+ if ((location->TypeGet() == TYP_STRUCT) && !assignment->IsPhiDefn() && !value->IsMultiRegCall())
{
- assert(location->OperIsBlk());
+ if ((location->OperGet() == GT_LCL_VAR))
+ {
+ // We need to construct a block node for the location.
+ // Modify lcl to be the address form.
+ location->SetOper(addrForm(locationOp));
+ LclVarDsc* varDsc = &(comp->lvaTable[location->AsLclVarCommon()->gtLclNum]);
+ location->gtType = TYP_BYREF;
+ GenTreeBlk* storeBlk = nullptr;
+ unsigned int size = varDsc->lvExactSize;
+
+ if (varDsc->lvStructGcCount != 0)
+ {
+ CORINFO_CLASS_HANDLE structHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+ GenTreeObj* objNode = comp->gtNewObjNode(structHnd, location)->AsObj();
+ unsigned int slots = (unsigned)(roundUp(size, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE);
+
+ objNode->SetGCInfo(varDsc->lvGcLayout, varDsc->lvStructGcCount, slots);
+ objNode->ChangeOper(GT_STORE_OBJ);
+ objNode->SetData(value);
+ comp->fgMorphUnsafeBlk(objNode);
+ storeBlk = objNode;
+ }
+ else
+ {
+ storeBlk = new (comp, GT_STORE_BLK) GenTreeBlk(GT_STORE_BLK, TYP_STRUCT, location, value, size);
+ }
+ storeBlk->gtFlags |= (GTF_REVERSE_OPS | GTF_ASG);
+ storeBlk->gtFlags |= ((location->gtFlags | value->gtFlags) & GTF_ALL_EFFECT);
+
+ GenTree* insertionPoint = location->gtNext;
+ BlockRange().InsertBefore(insertionPoint, storeBlk);
+ use.ReplaceWith(comp, storeBlk);
+ BlockRange().Remove(assignment);
+ JITDUMP("After transforming local struct assignment into a block op:\n");
+ DISPTREERANGE(BlockRange(), use.Def());
+ JITDUMP("\n");
+ return;
+ }
+ else
+ {
+ assert(location->OperIsBlk());
+ }
}
}
-#endif // FEATURE_SIMD
switch (locationOp)
{
@@ -605,10 +591,10 @@ void Rationalizer::RewriteAssignment(LIR::Use& use)
}
JITDUMP("Rewriting GT_ASG(%s(X), Y) to %s(X,Y):\n", GenTree::NodeName(location->gtOper),
GenTree::NodeName(storeOper));
- storeBlk->gtOper = storeOper;
+ storeBlk->SetOperRaw(storeOper);
storeBlk->gtFlags &= ~GTF_DONT_CSE;
storeBlk->gtFlags |= (assignment->gtFlags & (GTF_ALL_EFFECT | GTF_REVERSE_OPS | GTF_BLK_VOLATILE |
- GTF_BLK_UNALIGNED | GTF_BLK_INIT | GTF_DONT_CSE));
+ GTF_BLK_UNALIGNED | GTF_DONT_CSE));
storeBlk->gtBlk.Data() = value;
// Replace the assignment node with the store
@@ -693,21 +679,20 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStack<G
const bool isLateArg = (node->gtFlags & GTF_LATE_ARG) != 0;
#endif
- // First, remove any preceeding GT_LIST nodes, which are not otherwise visited by the tree walk.
+ // First, remove any preceeding list nodes, which are not otherwise visited by the tree walk.
//
- // NOTE: GT_LIST nodes that are used as aggregates, by block ops, and by phi nodes will in fact be visited.
- for (GenTree* prev = node->gtPrev;
- prev != nullptr && prev->OperGet() == GT_LIST && !(prev->AsArgList()->IsAggregate());
- prev = node->gtPrev)
+ // NOTE: GT_FIELD_LIST head nodes, and GT_LIST nodes used by phi nodes will in fact be visited.
+ for (GenTree* prev = node->gtPrev; prev != nullptr && prev->OperIsAnyList() && !(prev->OperIsFieldListHead());
+ prev = node->gtPrev)
{
BlockRange().Remove(prev);
}
// In addition, remove the current node if it is a GT_LIST node that is not an aggregate.
- if (node->OperGet() == GT_LIST)
+ if (node->OperIsAnyList())
{
GenTreeArgList* list = node->AsArgList();
- if (!list->IsAggregate())
+ if (!list->OperIsFieldListHead())
{
BlockRange().Remove(list);
}
@@ -741,6 +726,11 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStack<G
RewriteAddress(use);
break;
+ case GT_IND:
+ // Clear the `GTF_IND_ASG_LHS` flag, which overlaps with `GTF_IND_REQ_ADDR_IN_REG`.
+ node->gtFlags &= ~GTF_IND_ASG_LHS;
+ break;
+
case GT_NOP:
// fgMorph sometimes inserts NOP nodes between defs and uses
// supposedly 'to prevent constant folding'. In this case, remove the
@@ -931,19 +921,27 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStack<G
#endif // FEATURE_SIMD
default:
+ // JCC nodes should not be present in HIR.
+ assert(node->OperGet() != GT_JCC);
break;
}
// Do some extra processing on top-level nodes to remove unused local reads.
- if (use.IsDummyUse() && node->OperIsLocalRead())
+ if (node->OperIsLocalRead())
{
- assert((node->gtFlags & GTF_ALL_EFFECT) == 0);
-
- comp->lvaDecRefCnts(node);
- BlockRange().Remove(node);
+ if (use.IsDummyUse())
+ {
+ comp->lvaDecRefCnts(node);
+ BlockRange().Remove(node);
+ }
+ else
+ {
+ // Local reads are side-effect-free; clear any flags leftover from frontend transformations.
+ node->gtFlags &= ~GTF_ALL_EFFECT;
+ }
}
- assert(isLateArg == ((node->gtFlags & GTF_LATE_ARG) != 0));
+ assert(isLateArg == ((use.Def()->gtFlags & GTF_LATE_ARG) != 0));
return Compiler::WALK_CONTINUE;
}
diff --git a/src/jit/regalloc.cpp b/src/jit/regalloc.cpp
index 9dd7299906..8a7ad5a163 100644
--- a/src/jit/regalloc.cpp
+++ b/src/jit/regalloc.cpp
@@ -53,8 +53,6 @@ regMaskTP Compiler::raConfigRestrictMaskFP()
return result;
}
-#ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
-
#if DOUBLE_ALIGN
DWORD Compiler::getCanDoubleAlign()
{
@@ -67,8 +65,84 @@ DWORD Compiler::getCanDoubleAlign()
return DEFAULT_DOUBLE_ALIGN;
#endif
}
+
+//------------------------------------------------------------------------
+// shouldDoubleAlign: Determine whether to double-align the frame
+//
+// Arguments:
+// refCntStk - sum of ref counts for all stack based variables
+// refCntEBP - sum of ref counts for EBP enregistered variables
+// refCntWtdEBP - sum of wtd ref counts for EBP enregistered variables
+// refCntStkParam - sum of ref counts for all stack based parameters
+// refCntWtdStkDbl - sum of wtd ref counts for stack based doubles (including structs
+// with double fields).
+//
+// Return Value:
+// Returns true if this method estimates that a double-aligned frame would be beneficial
+//
+// Notes:
+// The impact of a double-aligned frame is computed as follows:
+// - We save a byte of code for each parameter reference (they are frame-pointer relative)
+// - We pay a byte of code for each non-parameter stack reference.
+// - We save the misalignment penalty and possible cache-line crossing penalty.
+// This is estimated as 0 for SMALL_CODE, 16 for FAST_CODE and 4 otherwise.
+// - We pay 7 extra bytes for:
+// MOV EBP,ESP,
+// LEA ESP,[EBP-offset]
+// AND ESP,-8 to double align ESP
+// - We pay one extra memory reference for each variable that could have been enregistered in EBP (refCntWtdEBP).
+//
+// If the misalignment penalty is estimated to be less than the bytes used, we don't double align.
+// Otherwise, we compare the weighted ref count of ebp-enregistered variables aginst double the
+// ref count for double-aligned values.
+//
+bool Compiler::shouldDoubleAlign(
+ unsigned refCntStk, unsigned refCntEBP, unsigned refCntWtdEBP, unsigned refCntStkParam, unsigned refCntWtdStkDbl)
+{
+ bool doDoubleAlign = false;
+ const unsigned DBL_ALIGN_SETUP_SIZE = 7;
+
+ unsigned bytesUsed = refCntStk + refCntEBP - refCntStkParam + DBL_ALIGN_SETUP_SIZE;
+ unsigned misaligned_weight = 4;
+
+ if (compCodeOpt() == Compiler::SMALL_CODE)
+ misaligned_weight = 0;
+
+ if (compCodeOpt() == Compiler::FAST_CODE)
+ misaligned_weight *= 4;
+
+ JITDUMP("\nDouble alignment:\n");
+ JITDUMP(" Bytes that could be saved by not using EBP frame: %i\n", bytesUsed);
+ JITDUMP(" Sum of weighted ref counts for EBP enregistered variables: %i\n", refCntWtdEBP);
+ JITDUMP(" Sum of weighted ref counts for weighted stack based doubles: %i\n", refCntWtdStkDbl);
+
+ if (bytesUsed > ((refCntWtdStkDbl * misaligned_weight) / BB_UNITY_WEIGHT))
+ {
+ JITDUMP(" Predicting not to double-align ESP to save %d bytes of code.\n", bytesUsed);
+ }
+ else if (refCntWtdEBP > refCntWtdStkDbl * 2)
+ {
+ // TODO-CQ: On P4 2 Proc XEON's, SciMark.FFT degrades if SciMark.FFT.transform_internal is
+ // not double aligned.
+ // Here are the numbers that make this not double-aligned.
+ // refCntWtdStkDbl = 0x164
+ // refCntWtdEBP = 0x1a4
+ // We think we do need to change the heuristic to be in favor of double-align.
+
+ JITDUMP(" Predicting not to double-align ESP to allow EBP to be used to enregister variables.\n");
+ }
+ else
+ {
+ // OK we passed all of the benefit tests, so we'll predict a double aligned frame.
+ JITDUMP(" Predicting to create a double-aligned frame\n");
+ doDoubleAlign = true;
+ }
+ return doDoubleAlign;
+}
#endif // DOUBLE_ALIGN
+#ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
+
void Compiler::raInit()
{
#if FEATURE_STACK_FP_X87
@@ -2415,12 +2489,6 @@ regMaskTP Compiler::rpPredictTreeRegUse(GenTreePtr tree,
{
case GT_ASG:
- if (tree->OperIsBlkOp())
- {
- interferingRegs |= rpPredictBlkAsgRegUse(tree, predictReg, lockedRegs, rsvdRegs);
- regMask = 0;
- goto RETURN_CHECK;
- }
/* Is the value being assigned into a LCL_VAR? */
if (op1->gtOper == GT_LCL_VAR)
{
@@ -2486,6 +2554,12 @@ regMaskTP Compiler::rpPredictTreeRegUse(GenTreePtr tree,
}
}
}
+ else if (tree->OperIsBlkOp())
+ {
+ interferingRegs |= rpPredictBlkAsgRegUse(tree, predictReg, lockedRegs, rsvdRegs);
+ regMask = 0;
+ goto RETURN_CHECK;
+ }
__fallthrough;
case GT_CHS:
@@ -4384,6 +4458,13 @@ regMaskTP Compiler::rpPredictTreeRegUse(GenTreePtr tree,
case GT_ARR_LENGTH:
goto GENERIC_UNARY;
+ case GT_INIT_VAL:
+ // This unary operator simply passes through the value from its child (much like GT_NOP)
+ // and thus won't need a scratch register.
+ regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+ tree->gtUsedRegs = op1->gtUsedRegs;
+ goto RETURN_CHECK;
+
default:
#ifdef DEBUG
gtDispTree(tree);
@@ -4525,7 +4606,7 @@ regMaskTP Compiler::rpPredictTreeRegUse(GenTreePtr tree,
curArgMask = RBM_NONE; // Set of argument registers that are going to be setup by this arg
tmpMask = RBM_NONE; // Set of additional temp registers that are need only to setup the current arg
- assert(list->IsList());
+ assert(list->OperIsList());
args = list->Current();
list = list->Rest();
@@ -5840,114 +5921,14 @@ regMaskTP Compiler::rpPredictAssignRegVars(regMaskTP regAvail)
if (getCanDoubleAlign() == CAN_DOUBLE_ALIGN && (refCntWtdStkDbl > 0))
{
- /* OK, there may be some benefit to double-aligning the frame */
- /* But let us compare the benefits vs. the costs of this */
-
- /*
- One cost to consider is the benefit of smaller code
- when using EBP as a frame pointer register
-
- Each stack variable reference is an extra byte of code
- if we use a double-aligned frame, parameters are
- accessed via EBP for a double-aligned frame so they
- don't use an extra byte of code.
-
- We pay one byte of code for each refCntStk and we pay
- one byte or more for each refCntEBP but we save one
- byte for each refCntStkParam.
-
- Our savings are the elimination of a possible misaligned
- access and a possible DCU spilt when an access crossed
- a cache-line boundry.
-
- We use the loop weighted value of
- refCntWtdStkDbl * misaligned_weight (0, 4, 16)
- to represent this savings.
- */
-
- // We also pay 7 extra bytes for the MOV EBP,ESP,
- // LEA ESP,[EBP-0x10] and the AND ESP,-8 to double align ESP
- const unsigned DBL_ALIGN_SETUP_SIZE = 7;
-
- unsigned bytesUsed = refCntStk + refCntEBP - refCntStkParam + DBL_ALIGN_SETUP_SIZE;
- unsigned misaligned_weight = 4;
-
- if (compCodeOpt() == SMALL_CODE)
- misaligned_weight = 0;
-
- if (compCodeOpt() == FAST_CODE)
- misaligned_weight *= 4;
-
-#ifdef DEBUG
- if (verbose)
- {
- printf("; Double alignment:\n");
- printf("; Bytes that could be save by not using EBP frame: %i\n", bytesUsed);
- printf("; Sum of weighted ref counts for EBP enregistered variables: %i\n", refCntWtdEBP);
- printf("; Sum of weighted ref counts for weighted stack based doubles: %i\n", refCntWtdStkDbl);
- }
-#endif
-
- if (bytesUsed > ((refCntWtdStkDbl * misaligned_weight) / BB_UNITY_WEIGHT))
- {
- /* It's probably better to use EBP as a frame pointer */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef DEBUG
- if (verbose)
- printf("; Predicting not to double-align ESP to save %d bytes of code.\n", bytesUsed);
-#endif
- goto NO_DOUBLE_ALIGN;
- }
-
- /*
- Another cost to consider is the benefit of using EBP to enregister
- one or more integer variables
-
- We pay one extra memory reference for each refCntWtdEBP
-
- Our savings are the elimination of a possible misaligned
- access and a possible DCU spilt when an access crossed
- a cache-line boundry.
-
- */
-
- // <BUGNUM>
- // VSW 346717: On P4 2 Proc XEON's, SciMark.FFT degrades if SciMark.FFT.transform_internal is
- // not double aligned.
- // Here are the numbers that make this not double-aligned.
- // refCntWtdStkDbl = 0x164
- // refCntWtdEBP = 0x1a4
- // We think we do need to change the heuristic to be in favor of double-align.
- // </BUGNUM>
-
- if (refCntWtdEBP > refCntWtdStkDbl * 2)
+ if (shouldDoubleAlign(refCntStk, refCntEBP, refCntWtdEBP, refCntStkParam, refCntWtdStkDbl))
{
- /* It's probably better to use EBP to enregister integer variables */
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef DEBUG
- if (verbose)
- printf("; Predicting not to double-align ESP to allow EBP to be used to enregister variables\n");
-#endif
- goto NO_DOUBLE_ALIGN;
+ rpFrameType = FT_DOUBLE_ALIGN_FRAME;
+ goto REVERSE_EBP_ENREG;
}
-
-#ifdef DEBUG
- if (verbose)
- printf("; Predicting to create a double-aligned frame\n");
-#endif
- /*
- OK we passed all of the benefit tests
- so we'll predict a double aligned frame
- */
-
- rpFrameType = FT_DOUBLE_ALIGN_FRAME;
- goto REVERSE_EBP_ENREG;
}
}
-NO_DOUBLE_ALIGN:
#endif // DOUBLE_ALIGN
if (!codeGen->isFramePointerRequired() && !codeGen->isFrameRequired())
@@ -6673,8 +6654,6 @@ void Compiler::raMarkStkVars()
#endif // FEATURE_FIXED_OUT_ARGS
-#ifdef DEBUGGING_SUPPORT
-
#ifdef DEBUG
/* For debugging, note that we have to reserve space even for
unused variables if they are ever in scope. However, this is not
@@ -6709,7 +6688,6 @@ void Compiler::raMarkStkVars()
varDsc->lvMustInit = true;
}
}
-#endif // DEBUGGING_SUPPORT
#ifndef LEGACY_BACKEND
varDsc->lvOnFrame = needSlot;
diff --git a/src/jit/regalloc.h b/src/jit/regalloc.h
index 7e2d7c7eb1..5054b4568e 100644
--- a/src/jit/regalloc.h
+++ b/src/jit/regalloc.h
@@ -17,6 +17,18 @@ enum FrameType
#endif
};
+#if DOUBLE_ALIGN
+enum CanDoubleAlign
+{
+ CANT_DOUBLE_ALIGN,
+ CAN_DOUBLE_ALIGN,
+ MUST_DOUBLE_ALIGN,
+ COUNT_DOUBLE_ALIGN,
+
+ DEFAULT_DOUBLE_ALIGN = CAN_DOUBLE_ALIGN
+};
+#endif
+
#ifdef LEGACY_BACKEND
#include "varset.h"
@@ -94,18 +106,6 @@ enum rpPredictReg
#endif // _TARGET_
};
-#if DOUBLE_ALIGN
-enum CanDoubleAlign
-{
- CANT_DOUBLE_ALIGN,
- CAN_DOUBLE_ALIGN,
- MUST_DOUBLE_ALIGN,
- COUNT_DOUBLE_ALIGN,
-
- DEFAULT_DOUBLE_ALIGN = CAN_DOUBLE_ALIGN
-};
-#endif
-
#endif // LEGACY_BACKEND
#endif // REGALLOC_H_
diff --git a/src/jit/registerfp.cpp b/src/jit/registerfp.cpp
index 997c223ed4..3a3143e629 100644
--- a/src/jit/registerfp.cpp
+++ b/src/jit/registerfp.cpp
@@ -326,10 +326,8 @@ void CodeGen::genFloatAssign(GenTree* tree)
bool unaligned = false; // Is this an unaligned store
regNumber op2reg = REG_NA;
-#ifdef DEBUGGING_SUPPORT
unsigned lclVarNum = compiler->lvaCount;
unsigned lclILoffs = DUMMY_INIT(0);
-#endif
noway_assert(tree->OperGet() == GT_ASG);
@@ -358,7 +356,6 @@ void CodeGen::genFloatAssign(GenTree* tree)
noway_assert(varNum < compiler->lvaCount);
varDsc = compiler->lvaTable + varNum;
-#ifdef DEBUGGING_SUPPORT
// For non-debuggable code, every definition of a lcl-var has
// to be checked to see if we need to open a new scope for it.
// Remember the local var info to call siCheckVarScope
@@ -369,7 +366,6 @@ void CodeGen::genFloatAssign(GenTree* tree)
lclVarNum = varNum;
lclILoffs = op1->gtLclVar.gtLclILoffs;
}
-#endif
// Dead Store assert (with min opts we may have dead stores)
//
@@ -607,13 +603,11 @@ DONE_ASG:
genUpdateLife(tree);
-#ifdef DEBUGGING_SUPPORT
/* For non-debuggable code, every definition of a lcl-var has
* to be checked to see if we need to open a new scope for it.
*/
if (lclVarNum < compiler->lvaCount)
siCheckVarScope(lclVarNum, lclILoffs);
-#endif
}
void CodeGen::genCodeForTreeFloat(GenTreePtr tree, RegSet::RegisterPreference* pref)
diff --git a/src/jit/regset.cpp b/src/jit/regset.cpp
index 2980f96813..0d0ac3e0ce 100644
--- a/src/jit/regset.cpp
+++ b/src/jit/regset.cpp
@@ -3175,6 +3175,16 @@ var_types Compiler::tmpNormalizeType(var_types type)
type = genActualType(type);
+#if defined(FEATURE_SIMD) && !defined(_TARGET_64BIT_)
+ // For SIMD on 32-bit platforms, we always spill SIMD12 to a 16-byte SIMD16 temp.
+ // This is because we don't have a single instruction to store 12 bytes. We also
+ // allocate non-argument locals as 16 bytes; see lvSize().
+ if (type == TYP_SIMD12)
+ {
+ type = TYP_SIMD16;
+ }
+#endif // defined(FEATURE_SIMD) && !defined(_TARGET_64BIT_)
+
#else // LEGACY_BACKEND
if (!varTypeIsGC(type))
{
diff --git a/src/jit/scopeinfo.cpp b/src/jit/scopeinfo.cpp
index f2a7902317..29c18f941c 100644
--- a/src/jit/scopeinfo.cpp
+++ b/src/jit/scopeinfo.cpp
@@ -58,10 +58,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "emit.h"
#include "codegen.h"
-/*****************************************************************************/
-#ifdef DEBUGGING_SUPPORT
-/*****************************************************************************/
-
bool Compiler::siVarLoc::vlIsInReg(regNumber reg)
{
switch (vlType)
@@ -1050,7 +1046,6 @@ void CodeGen::psiBegProlog()
void CodeGen::psiAdjustStackLevel(unsigned size)
{
-#ifdef DEBUGGING_SUPPORT
if (!compiler->opts.compScopeInfo || (compiler->info.compVarScopesCount == 0))
{
return;
@@ -1082,7 +1077,6 @@ void CodeGen::psiAdjustStackLevel(unsigned size)
}
#endif // ACCURATE_PROLOG_DEBUG_INFO
-#endif // DEBUGGING_SUPPORT
}
/*****************************************************************************
@@ -1094,7 +1088,6 @@ void CodeGen::psiAdjustStackLevel(unsigned size)
void CodeGen::psiMoveESPtoEBP()
{
-#ifdef DEBUGGING_SUPPORT
if (!compiler->opts.compScopeInfo || (compiler->info.compVarScopesCount == 0))
{
return;
@@ -1127,7 +1120,6 @@ void CodeGen::psiMoveESPtoEBP()
}
#endif // ACCURATE_PROLOG_DEBUG_INFO
-#endif // DEBUGGING_SUPPORT
}
/*****************************************************************************
@@ -1141,7 +1133,6 @@ void CodeGen::psiMoveESPtoEBP()
void CodeGen::psiMoveToReg(unsigned varNum, regNumber reg, regNumber otherReg)
{
-#ifdef DEBUGGING_SUPPORT
assert(compiler->compGeneratingProlog);
if (!compiler->opts.compScopeInfo)
@@ -1195,7 +1186,6 @@ void CodeGen::psiMoveToReg(unsigned varNum, regNumber reg, regNumber otherReg)
!"Parameter scope not found (Assert doesnt always indicate error)");
#endif // ACCURATE_PROLOG_DEBUG_INFO
-#endif // DEBUGGING_SUPPORT
}
/*****************************************************************************
@@ -1207,7 +1197,6 @@ void CodeGen::psiMoveToReg(unsigned varNum, regNumber reg, regNumber otherReg)
void CodeGen::psiMoveToStack(unsigned varNum)
{
-#ifdef DEBUGGING_SUPPORT
if (!compiler->opts.compScopeInfo || (compiler->info.compVarScopesCount == 0))
{
return;
@@ -1248,7 +1237,6 @@ void CodeGen::psiMoveToStack(unsigned varNum)
!"Parameter scope not found (Assert doesnt always indicate error)");
#endif // ACCURATE_PROLOG_DEBUG_INFO
-#endif // DEBUGGING_SUPPORT
}
/*****************************************************************************
@@ -1264,8 +1252,4 @@ void CodeGen::psiEndProlog()
{
psiEndPrologScope(scope);
}
-}
-
-/*****************************************************************************/
-#endif // DEBUGGING_SUPPORT
-/*****************************************************************************/
+} \ No newline at end of file
diff --git a/src/jit/sideeffects.h b/src/jit/sideeffects.h
index 33fac16f05..e14b2925ed 100644
--- a/src/jit/sideeffects.h
+++ b/src/jit/sideeffects.h
@@ -136,6 +136,12 @@ public:
// SideEffectSet:
// Represents a set of side effects for the purposes of analyzing code
// motion.
+// Note that for non-fixed-size frames without a frame pointer (currently
+// x86-only), we don't track the modification of the stack level that occurs
+// with a GT_PUTARG_STK as a side-effect. If we ever support general code
+// reordering, that would have to be taken into account. As it happens,
+// we currently do not reorder any other side-effecting nodes relative to
+// these.
//
class SideEffectSet final
{
diff --git a/src/jit/simd.cpp b/src/jit/simd.cpp
index 1f0c867b55..39664c47bf 100644
--- a/src/jit/simd.cpp
+++ b/src/jit/simd.cpp
@@ -77,10 +77,10 @@ int Compiler::getSIMDVectorLength(CORINFO_CLASS_HANDLE typeHnd)
//
int Compiler::getSIMDTypeAlignment(var_types simdType)
{
-#ifdef _TARGET_AMD64_
+#ifdef _TARGET_XARCH_
// Fixed length vectors have the following alignment preference
- // Vector2/3 = 8 byte alignment
- // Vector4 = 16-byte alignment
+ // Vector2 = 8 byte alignment
+ // Vector3/4 = 16-byte alignment
unsigned size = genTypeSize(simdType);
// preferred alignment for SSE2 128-bit vectors is 16-bytes
@@ -88,13 +88,16 @@ int Compiler::getSIMDTypeAlignment(var_types simdType)
{
return 8;
}
-
- // As per Intel manual, AVX vectors preferred alignment is 32-bytes but on Amd64
- // RSP/EBP is aligned at 16-bytes, therefore to align SIMD types at 32-bytes we need even
- // RSP/EBP to be 32-byte aligned. It is not clear whether additional stack space used in
- // aligning stack is worth the benefit and for now will use 16-byte alignment for AVX
- // 256-bit vectors with unaligned load/stores to/from memory.
- return 16;
+ else if (size <= 16)
+ {
+ assert((size == 12) || (size == 16));
+ return 16;
+ }
+ else
+ {
+ assert(size == 32);
+ return 32;
+ }
#else
assert(!"getSIMDTypeAlignment() unimplemented on target arch");
unreached();
@@ -391,7 +394,6 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in
CORINFO_CLASS_HANDLE typeHnd = *inOutTypeHnd;
*baseType = getBaseTypeAndSizeOfSIMDType(typeHnd, sizeBytes);
- bool isHWAcceleratedIntrinsic = false;
if (typeHnd == SIMDVectorHandle)
{
// All of the supported intrinsics on this static class take a first argument that's a vector,
@@ -424,6 +426,16 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in
return nullptr;
}
+#ifdef _TARGET_X86_
+ // NYI: support LONG type SIMD intrinsics. Need support in long decomposition.
+ // (Don't use NYI fallback mechanism; just call the function.)
+ if ((*baseType == TYP_LONG) || (*baseType == TYP_ULONG))
+ {
+ JITDUMP("NYI: x86 long base type SIMD intrinsics\n");
+ return nullptr;
+ }
+#endif // _TARGET_X86_
+
// account for implicit "this" arg
*argCount = sig->numArgs;
if (sig->hasThis())
@@ -525,7 +537,8 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in
// We don't check anything in that case.
if (!isThisPtr || !isNewObj)
{
- GenTreePtr arg = impStackTop(stackIndex).val;
+ GenTreePtr arg = impStackTop(stackIndex).val;
+ var_types argType = arg->TypeGet();
var_types expectedArgType;
if (argIndex < fixedArgCnt)
@@ -540,6 +553,7 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in
{
// The type of the argument will be genActualType(*baseType).
expectedArgType = genActualType(*baseType);
+ argType = genActualType(argType);
}
}
else
@@ -547,7 +561,6 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in
expectedArgType = *baseType;
}
- var_types argType = arg->TypeGet();
if (!isThisPtr && argType == TYP_I_IMPL)
{
// The reference implementation has a constructor that takes a pointer.
@@ -715,7 +728,7 @@ GenTreeSIMD* Compiler::impSIMDGetFixed(var_types simdType, var_types baseType, u
return simdTree;
}
-#ifdef _TARGET_AMD64_
+#ifdef _TARGET_XARCH_
// impSIMDLongRelOpEqual: transforms operands and returns the SIMD intrinsic to be applied on
// transformed operands to obtain == comparison result.
//
@@ -741,7 +754,7 @@ SIMDIntrinsicID Compiler::impSIMDLongRelOpEqual(CORINFO_CLASS_HANDLE typeHnd,
//
// Equality(v1, v2):
// tmp = (v1 == v2) i.e. compare for equality as if v1 and v2 are vector<int>
- // result = BitwiseAnd(t, shuffle(t, (2, 3, 1 0)))
+ // result = BitwiseAnd(t, shuffle(t, (2, 3, 0, 1)))
// Shuffle is meant to swap the comparison results of low-32-bits and high 32-bits of respective long elements.
// Compare vector<long> as if they were vector<int> and assign the result to a temp
@@ -755,7 +768,7 @@ SIMDIntrinsicID Compiler::impSIMDLongRelOpEqual(CORINFO_CLASS_HANDLE typeHnd,
// op2 = Shuffle(tmp, 0xB1)
// IntrinsicId = BitwiseAnd
*pOp1 = gtNewOperNode(GT_COMMA, simdType, asg, tmp);
- *pOp2 = gtNewSIMDNode(simdType, gtNewLclvNode(lclNum, simdType), gtNewIconNode(SHUFFLE_ZWYX, TYP_INT),
+ *pOp2 = gtNewSIMDNode(simdType, gtNewLclvNode(lclNum, simdType), gtNewIconNode(SHUFFLE_ZWXY, TYP_INT),
SIMDIntrinsicShuffleSSE2, TYP_INT, size);
return SIMDIntrinsicBitwiseAnd;
}
@@ -971,7 +984,7 @@ SIMDIntrinsicID Compiler::impSIMDIntegralRelOpGreaterThanOrEqual(
return SIMDIntrinsicBitwiseOr;
}
-#endif //_TARGET_AMD64_
+#endif // _TARGET_XARCH_
// Transforms operands and returns the SIMD intrinsic to be applied on
// transformed operands to obtain given relop result.
@@ -999,7 +1012,7 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId,
assert(isRelOpSIMDIntrinsic(relOpIntrinsicId));
-#ifdef _TARGET_AMD64_
+#ifdef _TARGET_XARCH_
SIMDIntrinsicID intrinsicID = relOpIntrinsicId;
var_types baseType = *inOutBaseType;
@@ -1076,7 +1089,7 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId,
//
// We need to treat op1 and op2 as signed for comparison purpose after
// the transformation.
- ssize_t constVal = 0;
+ __int64 constVal = 0;
switch (baseType)
{
case TYP_UBYTE:
@@ -1105,9 +1118,19 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId,
if (intrinsicID != SIMDIntrinsicEqual)
{
// For constructing const vector use either long or int base type.
- var_types tempBaseType = (baseType == TYP_ULONG) ? TYP_LONG : TYP_INT;
- GenTree* initVal = gtNewIconNode(constVal);
- initVal->gtType = tempBaseType;
+ var_types tempBaseType;
+ GenTree* initVal;
+ if (baseType == TYP_ULONG)
+ {
+ tempBaseType = TYP_LONG;
+ initVal = gtNewLconNode(constVal);
+ }
+ else
+ {
+ tempBaseType = TYP_INT;
+ initVal = gtNewIconNode((ssize_t)constVal);
+ }
+ initVal->gtType = tempBaseType;
GenTree* constVector = gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, tempBaseType, size);
// Assign constVector to a temp, since we intend to use it more than once
@@ -1127,10 +1150,10 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId,
}
return intrinsicID;
-#else
+#else // !_TARGET_XARCH_
assert(!"impSIMDRelOp() unimplemented on target arch");
unreached();
-#endif //_TARGET_AMD64_
+#endif // !_TARGET_XARCH_
}
// Creates a GT_SIMD tree for Select operation
@@ -1210,7 +1233,7 @@ GenTreePtr Compiler::impSIMDMinMax(SIMDIntrinsicID intrinsicId,
var_types simdType = op1->TypeGet();
assert(op2->TypeGet() == simdType);
-#ifdef _TARGET_AMD64_
+#ifdef _TARGET_XARCH_
// SSE2 has direct support for float/double/signed word/unsigned byte.
// For other integer types we compute min/max as follows
//
@@ -1347,10 +1370,10 @@ GenTreePtr Compiler::impSIMDMinMax(SIMDIntrinsicID intrinsicId,
assert(simdTree != nullptr);
return simdTree;
-#else
+#else // !_TARGET_XARCH_
assert(!"impSIMDMinMax() unimplemented on target arch");
unreached();
-#endif //_TARGET_AMD64_
+#endif // !_TARGET_XARCH_
}
//------------------------------------------------------------------------
@@ -1791,6 +1814,8 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
int length = getSIMDVectorLength(clsHnd);
GenTreeIntCon* intConstTree = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, length);
retVal = intConstTree;
+
+ intConstTree->gtFlags |= GTF_ICON_SIMD_COUNT;
}
break;
@@ -2223,7 +2248,11 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
assert(op2->TypeGet() == simdType);
simdTree = gtNewSIMDNode(genActualType(callType), op1, op2, SIMDIntrinsicOpEquality, baseType, size);
- retVal = simdTree;
+ if (simdType == TYP_SIMD12)
+ {
+ simdTree->gtFlags |= GTF_SIMD12_OP;
+ }
+ retVal = simdTree;
}
break;
@@ -2234,7 +2263,11 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
op2 = impSIMDPopStack(simdType);
op1 = impSIMDPopStack(simdType, instMethod);
simdTree = gtNewSIMDNode(genActualType(callType), op1, op2, SIMDIntrinsicOpInEquality, baseType, size);
- retVal = simdTree;
+ if (simdType == TYP_SIMD12)
+ {
+ simdTree->gtFlags |= GTF_SIMD12_OP;
+ }
+ retVal = simdTree;
}
break;
@@ -2262,7 +2295,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
case SIMDIntrinsicBitwiseOr:
case SIMDIntrinsicBitwiseXor:
{
-#if defined(_TARGET_AMD64_) && defined(DEBUG)
+#if defined(_TARGET_XARCH_) && defined(DEBUG)
// check for the cases where we don't support intrinsics.
// This check should be done before we make modifications to type stack.
// Note that this is more of a double safety check for robustness since
@@ -2290,7 +2323,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
return nullptr;
}
}
-#endif //_TARGET_AMD64_ && DEBUG
+#endif // _TARGET_XARCH_ && DEBUG
// op1 is the first operand; if instance method, op1 is "this" arg
// op2 is the second operand
@@ -2331,9 +2364,9 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
{
// op1 is a SIMD variable that is "this" arg
// op2 is an index of TYP_INT
- op2 = impSIMDPopStack(TYP_INT);
- op1 = impSIMDPopStack(simdType, instMethod);
- unsigned int vectorLength = getSIMDVectorLength(size, baseType);
+ op2 = impSIMDPopStack(TYP_INT);
+ op1 = impSIMDPopStack(simdType, instMethod);
+ int vectorLength = getSIMDVectorLength(size, baseType);
if (!op2->IsCnsIntOrI() || op2->AsIntCon()->gtIconVal >= vectorLength)
{
// We need to bounds-check the length of the vector.
@@ -2366,15 +2399,15 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
case SIMDIntrinsicDotProduct:
{
-#if defined(_TARGET_AMD64_) && defined(DEBUG)
- // Right now dot product is supported only for float vectors.
- // See SIMDIntrinsicList.h for supported base types for this intrinsic.
- if (!varTypeIsFloating(baseType))
+#if defined(_TARGET_XARCH_)
+ // Right now dot product is supported only for float/double vectors and
+ // int vectors on SSE4/AVX.
+ if (!varTypeIsFloating(baseType) &&
+ !(baseType == TYP_INT && getSIMDInstructionSet() >= InstructionSet_SSE3_4))
{
- assert(!"Dot product on integer type vectors not supported");
return nullptr;
}
-#endif //_TARGET_AMD64_ && DEBUG
+#endif // _TARGET_XARCH_
// op1 is a SIMD variable that is the first source and also "this" arg.
// op2 is a SIMD variable which is the second source.
@@ -2382,13 +2415,17 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
op1 = impSIMDPopStack(simdType, instMethod);
simdTree = gtNewSIMDNode(baseType, op1, op2, simdIntrinsicID, baseType, size);
- retVal = simdTree;
+ if (simdType == TYP_SIMD12)
+ {
+ simdTree->gtFlags |= GTF_SIMD12_OP;
+ }
+ retVal = simdTree;
}
break;
case SIMDIntrinsicSqrt:
{
-#if defined(_TARGET_AMD64_) && defined(DEBUG)
+#if defined(_TARGET_XARCH_) && defined(DEBUG)
// SSE/AVX doesn't support sqrt on integer type vectors and hence
// should never be seen as an intrinsic here. See SIMDIntrinsicList.h
// for supported base types for this intrinsic.
@@ -2397,7 +2434,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
assert(!"Sqrt not supported on integer vectors\n");
return nullptr;
}
-#endif // _TARGET_AMD64_ && DEBUG
+#endif // _TARGET_XARCH_ && DEBUG
op1 = impSIMDPopStack(simdType);
@@ -2409,7 +2446,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
{
op1 = impSIMDPopStack(simdType);
-#ifdef _TARGET_AMD64_
+#ifdef _TARGET_XARCH_
if (varTypeIsFloating(baseType))
{
// Abs(vf) = vf & new SIMDVector<float>(0x7fffffff);
@@ -2448,10 +2485,10 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
unreached();
}
-#else //!_TARGET_AMD64_
- assert(!"Abs intrinsic on non-Amd64 target not implemented");
+#else // !_TARGET_XARCH_
+ assert(!"Abs intrinsic on non-xarch target not implemented");
unreached();
-#endif //!_TARGET_AMD64_
+#endif // !_TARGET_XARCH_
}
break;
@@ -2524,15 +2561,15 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
return nullptr;
}
-#ifdef _TARGET_AMD64_
- // Amd64: also indicate that we use floating point registers.
+#ifdef _TARGET_XARCH_
+ // XArch: also indicate that we use floating point registers.
// The need for setting this here is that a method may not have SIMD
// type lclvars, but might be exercising SIMD intrinsics on fields of
// SIMD type.
//
// e.g. public Vector<float> ComplexVecFloat::sqabs() { return this.r * this.r + this.i * this.i; }
compFloatingPointUsed = true;
-#endif
+#endif // _TARGET_XARCH_
// At this point, we have a tree that we are going to store into a destination.
// TODO-1stClassStructs: This should be a simple store or assignment, and should not require
diff --git a/src/jit/simd.h b/src/jit/simd.h
index c68899e412..c4a8866b07 100644
--- a/src/jit/simd.h
+++ b/src/jit/simd.h
@@ -29,13 +29,18 @@ struct SIMDIntrinsicInfo
var_types supportedBaseTypes[SIMD_INTRINSIC_MAX_BASETYPE_COUNT];
};
-#ifdef _TARGET_AMD64_
+#ifdef _TARGET_XARCH_
// SSE2 Shuffle control byte to shuffle vector <W, Z, Y, X>
// These correspond to shuffle immediate byte in shufps SSE2 instruction.
-#define SHUFFLE_XXXX 0x00
-#define SHUFFLE_ZWYX 0xB1
-#define SHUFFLE_WWYY 0xF5
-#define SHUFFLE_ZZXX 0xA0
+#define SHUFFLE_XXXX 0x00 // 00 00 00 00
+#define SHUFFLE_XXWW 0x0F // 00 00 11 11
+#define SHUFFLE_XYZW 0x1B // 00 01 10 11
+#define SHUFFLE_YXYX 0x44 // 01 00 01 00
+#define SHUFFLE_YYZZ 0x5A // 01 01 10 10
+#define SHUFFLE_ZXXY 0x81 // 10 00 00 01
+#define SHUFFLE_ZWXY 0xB1 // 10 11 00 01
+#define SHUFFLE_WWYY 0xF5 // 11 11 01 01
+#define SHUFFLE_ZZXX 0xA0 // 10 10 00 00
#endif
#endif // FEATURE_SIMD
diff --git a/src/jit/simdcodegenxarch.cpp b/src/jit/simdcodegenxarch.cpp
index 702f967aad..ec933fd5d7 100644
--- a/src/jit/simdcodegenxarch.cpp
+++ b/src/jit/simdcodegenxarch.cpp
@@ -17,7 +17,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator.
-#ifdef _TARGET_AMD64_
+#ifdef _TARGET_XARCH_
#include "emit.h"
#include "codegen.h"
#include "sideeffects.h"
@@ -62,7 +62,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
// AVX supports broadcast instructions to populate YMM reg with a single float/double value from memory.
// AVX2 supports broadcast instructions to populate YMM reg with a single value from memory or mm reg.
// If we decide to use AVX2 only, we can remove this assert.
- if ((compiler->opts.eeFlags & CORJIT_FLG_USE_AVX2) == 0)
+ if (!compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_USE_AVX2))
{
assert(baseType == TYP_FLOAT || baseType == TYP_DOUBLE);
}
@@ -205,12 +205,9 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
{
result = INS_pmullw;
}
- else if (compiler->canUseAVX())
+ else if ((baseType == TYP_INT) && (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4))
{
- if (baseType == TYP_INT)
- {
- result = INS_pmulld;
- }
+ result = INS_pmulld;
}
break;
@@ -300,7 +297,8 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
{
result = INS_pcmpeqb;
}
- else if (compiler->canUseAVX() && (baseType == TYP_ULONG || baseType == TYP_LONG))
+ else if ((baseType == TYP_ULONG || baseType == TYP_LONG) &&
+ (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4))
{
result = INS_pcmpeqq;
}
@@ -359,7 +357,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
{
result = INS_pcmpgtb;
}
- else if (compiler->canUseAVX() && (baseType == TYP_LONG))
+ else if ((baseType == TYP_LONG) && (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4))
{
result = INS_pcmpgtq;
}
@@ -464,7 +462,8 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
// to target mm reg, zeroing out the upper bits if and only if specified.
//
// Arguments:
-// type the type of value to be moved
+// targetType the target type
+// baseType the base type of value to be moved
// targetReg the target reg
// srcReg the src reg
// moveType action to be performed on target upper bits
@@ -475,10 +474,10 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
// Notes:
// This is currently only supported for floating point types.
//
-void CodeGen::genSIMDScalarMove(var_types type, regNumber targetReg, regNumber srcReg, SIMDScalarMoveType moveType)
+void CodeGen::genSIMDScalarMove(
+ var_types targetType, var_types baseType, regNumber targetReg, regNumber srcReg, SIMDScalarMoveType moveType)
{
- var_types targetType = compiler->getSIMDVectorType();
- assert(varTypeIsFloating(type));
+ assert(varTypeIsFloating(baseType));
#ifdef FEATURE_AVX_SUPPORT
if (compiler->getSIMDInstructionSet() == InstructionSet_AVX)
{
@@ -487,17 +486,17 @@ void CodeGen::genSIMDScalarMove(var_types type, regNumber targetReg, regNumber s
case SMT_PreserveUpper:
if (srcReg != targetReg)
{
- instruction ins = ins_Store(type);
+ instruction ins = ins_Store(baseType);
if (getEmitter()->IsThreeOperandMoveAVXInstruction(ins))
{
// In general, when we use a three-operands move instruction, we want to merge the src with
// itself. This is an exception in that we actually want the "merge" behavior, so we must
// specify it with all 3 operands.
- inst_RV_RV_RV(ins, targetReg, targetReg, srcReg, emitTypeSize(targetType));
+ inst_RV_RV_RV(ins, targetReg, targetReg, srcReg, emitTypeSize(baseType));
}
else
{
- inst_RV_RV(ins, targetReg, srcReg, targetType, emitTypeSize(targetType));
+ inst_RV_RV(ins, targetReg, srcReg, baseType, emitTypeSize(baseType));
}
}
break;
@@ -516,9 +515,9 @@ void CodeGen::genSIMDScalarMove(var_types type, regNumber targetReg, regNumber s
case SMT_ZeroInitUpper_SrcHasUpperZeros:
if (srcReg != targetReg)
{
- instruction ins = ins_Copy(type);
+ instruction ins = ins_Copy(baseType);
assert(!getEmitter()->IsThreeOperandMoveAVXInstruction(ins));
- inst_RV_RV(ins, targetReg, srcReg, targetType, emitTypeSize(targetType));
+ inst_RV_RV(ins, targetReg, srcReg, baseType, emitTypeSize(baseType));
}
break;
@@ -536,7 +535,7 @@ void CodeGen::genSIMDScalarMove(var_types type, regNumber targetReg, regNumber s
case SMT_PreserveUpper:
if (srcReg != targetReg)
{
- inst_RV_RV(ins_Store(type), targetReg, srcReg, targetType, emitTypeSize(targetType));
+ inst_RV_RV(ins_Store(baseType), targetReg, srcReg, baseType, emitTypeSize(baseType));
}
break;
@@ -545,22 +544,22 @@ void CodeGen::genSIMDScalarMove(var_types type, regNumber targetReg, regNumber s
{
// There is no guarantee that upper bits of op1Reg are zero.
// We achieve this by using left logical shift 12-bytes and right logical shift 12 bytes.
- instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, type);
+ instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, baseType);
getEmitter()->emitIns_R_I(ins, EA_16BYTE, srcReg, 12);
- ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, type);
+ ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, baseType);
getEmitter()->emitIns_R_I(ins, EA_16BYTE, srcReg, 12);
}
else
{
genSIMDZero(targetType, TYP_FLOAT, targetReg);
- inst_RV_RV(ins_Store(type), targetReg, srcReg);
+ inst_RV_RV(ins_Store(baseType), targetReg, srcReg);
}
break;
case SMT_ZeroInitUpper_SrcHasUpperZeros:
if (srcReg != targetReg)
{
- inst_RV_RV(ins_Copy(type), targetReg, srcReg, targetType, emitTypeSize(targetType));
+ inst_RV_RV(ins_Copy(baseType), targetReg, srcReg, baseType, emitTypeSize(baseType));
}
break;
@@ -676,7 +675,7 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
SIMDScalarMoveType moveType =
op1->IsCnsFltOrDbl() || op1->isMemoryOp() ? SMT_ZeroInitUpper_SrcHasUpperZeros : SMT_ZeroInitUpper;
- genSIMDScalarMove(TYP_FLOAT, targetReg, op1Reg, moveType);
+ genSIMDScalarMove(targetType, TYP_FLOAT, targetReg, op1Reg, moveType);
if (size == 8)
{
@@ -786,7 +785,7 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode)
{
getEmitter()->emitIns_R_I(insLeftShift, EA_16BYTE, vectorReg, baseTypeSize);
}
- genSIMDScalarMove(baseType, vectorReg, operandReg, SMT_PreserveUpper);
+ genSIMDScalarMove(targetType, baseType, vectorReg, operandReg, SMT_PreserveUpper);
offset += baseTypeSize;
}
@@ -1033,11 +1032,10 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
//
void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
{
- GenTree* op1 = simdNode->gtGetOp1();
- GenTree* op2 = simdNode->gtGetOp2();
- var_types baseType = simdNode->gtSIMDBaseType;
- regNumber targetReg = simdNode->gtRegNum;
- assert(targetReg != REG_NA);
+ GenTree* op1 = simdNode->gtGetOp1();
+ GenTree* op2 = simdNode->gtGetOp2();
+ var_types baseType = simdNode->gtSIMDBaseType;
+ regNumber targetReg = simdNode->gtRegNum;
var_types targetType = simdNode->TypeGet();
InstructionSet iset = compiler->getSIMDInstructionSet();
@@ -1051,8 +1049,16 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
case SIMDIntrinsicEqual:
case SIMDIntrinsicGreaterThan:
{
- // SSE2: vector<(u)long> relation op should be implemented in terms of TYP_INT comparison operations
- assert(((iset == InstructionSet_AVX) || (baseType != TYP_LONG)) && (baseType != TYP_ULONG));
+ assert(targetReg != REG_NA);
+
+#ifdef DEBUG
+ // SSE2: vector<(u)long> relational op should be implemented in terms of
+ // TYP_INT comparison operations
+ if (baseType == TYP_LONG || baseType == TYP_ULONG)
+ {
+ assert(iset >= InstructionSet_SSE3_4);
+ }
+#endif
// Greater-than: Floating point vectors use "<" with swapped operands
if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGreaterThan)
@@ -1093,6 +1099,8 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
case SIMDIntrinsicLessThan:
case SIMDIntrinsicLessThanOrEqual:
{
+ assert(targetReg != REG_NA);
+
// Int vectors use ">" and ">=" with swapped operands
assert(varTypeIsFloating(baseType));
@@ -1115,17 +1123,6 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
case SIMDIntrinsicOpEquality:
case SIMDIntrinsicOpInEquality:
{
- assert(genIsValidIntReg(targetReg));
-
- // We need two additional XMM register as scratch
- assert(simdNode->gtRsvdRegs != RBM_NONE);
- assert(genCountBits(simdNode->gtRsvdRegs) == 2);
-
- regMaskTP tmpRegsMask = simdNode->gtRsvdRegs;
- regMaskTP tmpReg1Mask = genFindLowestBit(tmpRegsMask);
- tmpRegsMask &= ~tmpReg1Mask;
- regNumber tmpReg1 = genRegNumFromMask(tmpReg1Mask);
- regNumber tmpReg2 = genRegNumFromMask(tmpRegsMask);
var_types simdType = op1->TypeGet();
// TODO-1stClassStructs: Temporary to minimize asmDiffs
if (simdType == TYP_DOUBLE)
@@ -1140,96 +1137,111 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
simdType = TYP_SIMD16;
}
- // tmpReg1 = (op1Reg == op2Reg)
- // Call this value of tmpReg1 as 'compResult' for further reference below.
- regNumber otherReg = op2Reg;
- if (tmpReg1 != op2Reg)
+ // On SSE4/AVX, we can generate optimal code for (in)equality against zero using ptest.
+ if ((compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4) && op2->IsIntegralConstVector(0))
{
- if (tmpReg1 != op1Reg)
- {
- inst_RV_RV(ins_Copy(simdType), tmpReg1, op1Reg, simdType, emitActualTypeSize(simdType));
- }
+ assert(op2->isContained());
+ inst_RV_RV(INS_ptest, op1->gtRegNum, op1->gtRegNum, simdType, emitActualTypeSize(simdType));
}
else
{
- otherReg = op1Reg;
- }
+ // We need one additional SIMD register to store the result of the SIMD compare.
+ regNumber tmpReg1 = genRegNumFromMask(simdNode->gtRsvdRegs & RBM_ALLFLOAT);
- // For all integer types we can use TYP_INT comparison.
- unsigned ival = 0;
- instruction ins =
- getOpForSIMDIntrinsic(SIMDIntrinsicEqual, varTypeIsFloating(baseType) ? baseType : TYP_INT, &ival);
+ // tmpReg1 = (op1Reg == op2Reg)
+ // Call this value of tmpReg1 as 'compResult' for further reference below.
+ regNumber otherReg = op2Reg;
+ if (tmpReg1 != op2Reg)
+ {
+ if (tmpReg1 != op1Reg)
+ {
+ inst_RV_RV(ins_Copy(simdType), tmpReg1, op1Reg, simdType, emitActualTypeSize(simdType));
+ }
+ }
+ else
+ {
+ otherReg = op1Reg;
+ }
- if (varTypeIsFloating(baseType))
- {
- getEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(simdType), tmpReg1, otherReg, ival);
- }
- else
- {
- inst_RV_RV(ins, tmpReg1, otherReg, simdType, emitActualTypeSize(simdType));
+ // For all integer types we can use TYP_INT comparison.
+ unsigned ival = 0;
+ instruction ins =
+ getOpForSIMDIntrinsic(SIMDIntrinsicEqual, varTypeIsFloating(baseType) ? baseType : TYP_INT, &ival);
+
+ if (varTypeIsFloating(baseType))
+ {
+ getEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(simdType), tmpReg1, otherReg, ival);
+ }
+ else
+ {
+ inst_RV_RV(ins, tmpReg1, otherReg, simdType, emitActualTypeSize(simdType));
+ }
+
+ regNumber intReg;
+ if (targetReg == REG_NA)
+ {
+ // If we are not materializing result into a register,
+ // we would have reserved an int type internal register.
+ intReg = genRegNumFromMask(simdNode->gtRsvdRegs & RBM_ALLINT);
+ }
+ else
+ {
+ // We can use targetReg for setting flags.
+ intReg = targetReg;
+
+ // Must have not reserved any int type internal registers.
+ assert(genCountBits(simdNode->gtRsvdRegs & RBM_ALLINT) == 0);
+ }
+
+ inst_RV_RV(INS_pmovmskb, intReg, tmpReg1, simdType, emitActualTypeSize(simdType));
+ // There's no pmovmskw/pmovmskd/pmovmskq but they're not needed anyway. Vector compare
+ // instructions produce "all ones"/"all zeroes" components and pmovmskb extracts a
+ // subset of each component's ones/zeroes. In the end we need to know if the result is
+ // "all ones" where the number of ones is given by the vector byte size, not by the
+ // vector component count. So, for AVX registers we need to compare to 0xFFFFFFFF and
+ // for SSE registers we need to compare to 0x0000FFFF.
+ // The SIMD12 case is handled specially, because we can't rely on the upper bytes being
+ // zero, so we must compare only the lower 3 floats (hence the byte mask of 0xFFF).
+ // Note that -1 is used instead of 0xFFFFFFFF, on x64 emit doesn't correctly recognize
+ // that 0xFFFFFFFF can be encoded in a single byte and emits the longer 3DFFFFFFFF
+ // encoding instead of 83F8FF.
+ ssize_t mask;
+ if ((simdNode->gtFlags & GTF_SIMD12_OP) != 0)
+ {
+ mask = 0x00000FFF;
+ getEmitter()->emitIns_R_I(INS_and, EA_4BYTE, intReg, mask);
+ }
+ else if (emitActualTypeSize(simdType) == 32)
+ {
+ mask = -1;
+ }
+ else
+ {
+ mask = 0x0000FFFF;
+ }
+ getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, intReg, mask);
}
- // If we have 32 bytes, start by anding the two 16-byte halves to get a 16-byte result.
- if (compiler->canUseAVX() && (simdType == TYP_SIMD32))
+ if (targetReg != REG_NA)
{
- // Reduce tmpReg1 from 256-bits to 128-bits bitwise-Anding the lower and uppper 128-bits
+ // If we need to materialize result into a register, targetReg needs to
+ // be set to 1 on true and zero on false.
+ // Equality:
+ // cmp targetReg, 0xFFFFFFFF or 0xFFFF
+ // sete targetReg
+ // movzx targetReg, targetReg
//
- // Generated code sequence
- // - vextractf128 tmpReg2, tmpReg1, 0x01
- // tmpReg2[128..255] <- 0
- // tmpReg2[0..127] <- tmpReg1[128..255]
- // - vandps tmpReg1, tempReg2
- // This will zero-out upper portion of tmpReg1 and
- // lower portion of tmpReg1 is and of upper and lower 128-bit comparison result.
- getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, tmpReg2, tmpReg1, 0x01);
- inst_RV_RV(INS_andps, tmpReg1, tmpReg2, simdType, emitActualTypeSize(simdType));
- }
- // Next, if we have more than 8 bytes, and the two 8-byte halves to get a 8-byte result.
- if (simdType != TYP_SIMD8)
- {
- // tmpReg2 = Shuffle(tmpReg1, (1,0,3,2))
- // Note: vpshufd is a 128-bit only instruction. Therefore, explicitly pass EA_16BYTE
- getEmitter()->emitIns_R_R_I(INS_pshufd, EA_16BYTE, tmpReg2, tmpReg1, 0x4E);
-
- // tmpReg1 = BitwiseAnd(tmpReg1, tmpReg2)
+ // InEquality:
+ // cmp targetReg, 0xFFFFFFFF or 0xFFFF
+ // setne targetReg
+ // movzx targetReg, targetReg
//
- // Note that what we have computed is as follows at this point:
- // tmpReg1[0] = compResult[0] & compResult[2]
- // tmpReg1[1] = compResult[1] & compResult[3]
- inst_RV_RV(INS_andps, tmpReg1, tmpReg2, simdType, emitActualTypeSize(simdType));
+ assert(simdNode->TypeGet() == TYP_INT);
+ inst_RV((simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality) ? INS_sete : INS_setne, targetReg,
+ TYP_INT, EA_1BYTE);
+ // Set the higher bytes to 0
+ inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
}
- // At this point, we have either reduced the result to 8 bytes: tmpReg1[0] and tmpReg1[1],
- // OR we have a Vector2 (TYP_SIMD8) in tmpReg1, which has only those two fields.
-
- // tmpReg2 = Shuffle(tmpReg1, (0,0,0,1))
- // tmpReg2[0] = compResult[1] & compResult[3]
- getEmitter()->emitIns_R_R_I(INS_pshufd, EA_16BYTE, tmpReg2, tmpReg1, 0x1);
-
- // tmpReg1 = BitwiseAnd(tmpReg1, tmpReg2)
- // That is tmpReg1[0] = compResult[0] & compResult[1] & compResult[2] & compResult[3]
- inst_RV_RV(INS_pand, tmpReg1, tmpReg2, simdType, emitActualTypeSize(simdType)); // ??? INS_andps??
-
- // targetReg = lower 32-bits of tmpReg1 = compResult[0] & compResult[1] & compResult[2] & compResult[3]
- // (Note that for mov_xmm2i, the int register is always in the reg2 position.
- inst_RV_RV(INS_mov_xmm2i, tmpReg1, targetReg, TYP_INT);
-
- // Since we need to compute a bool result, targetReg needs to be set to 1 on true and zero on false.
- // Equality:
- // cmp targetReg, 0xFFFFFFFF
- // sete targetReg
- // movzx targetReg, targetReg
- //
- // InEquality:
- // cmp targetReg, 0xFFFFFFFF
- // setne targetReg
- // movzx targetReg, targetReg
- //
- getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, targetReg, 0xFFFFFFFF);
- inst_RV((simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality) ? INS_sete : INS_setne, targetReg, TYP_INT,
- EA_1BYTE);
- assert(simdNode->TypeGet() == TYP_INT);
- // Set the higher bytes to 0
- inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
}
break;
@@ -1267,45 +1279,68 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
regNumber targetReg = simdNode->gtRegNum;
assert(targetReg != REG_NA);
- // DotProduct is only supported on floating point types.
var_types targetType = simdNode->TypeGet();
assert(targetType == baseType);
- assert(varTypeIsFloating(baseType));
genConsumeOperands(simdNode);
- regNumber op1Reg = op1->gtRegNum;
- regNumber op2Reg = op2->gtRegNum;
+ regNumber op1Reg = op1->gtRegNum;
+ regNumber op2Reg = op2->gtRegNum;
+ regNumber tmpReg1 = REG_NA;
+ regNumber tmpReg2 = REG_NA;
- regNumber tmpReg = REG_NA;
- // For SSE, or AVX with 32-byte vectors, we need an additional Xmm register as scratch.
- // However, it must be distinct from targetReg, so we request two from the register allocator.
- // Note that if this is a TYP_SIMD16 or smaller on AVX, then we don't need a tmpReg.
- if ((compiler->getSIMDInstructionSet() == InstructionSet_SSE2) || (simdEvalType == TYP_SIMD32))
+ InstructionSet iset = compiler->getSIMDInstructionSet();
+
+ // Dot product intrinsic is supported only on float/double vectors
+ // and 32-byte int vectors on AVX.
+ //
+ // Float/Double Vectors:
+ // For SSE, or AVX with 32-byte vectors, we need one additional Xmm register
+ // different from targetReg as scratch. Note that if this is a TYP_SIMD16 or
+ // smaller on AVX, then we don't need a tmpReg.
+ //
+ // 32-byte integer vector on AVX: we need two additional Xmm registers
+ // different from targetReg as scratch.
+ //
+ // 16-byte integer vector on SSE4: we need one additional Xmm register
+ // different from targetReg as scratch.
+ if (varTypeIsFloating(baseType))
{
- assert(simdNode->gtRsvdRegs != RBM_NONE);
- assert(genCountBits(simdNode->gtRsvdRegs) == 2);
+ if ((compiler->getSIMDInstructionSet() == InstructionSet_SSE2) || (simdEvalType == TYP_SIMD32))
+ {
+ assert(simdNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(simdNode->gtRsvdRegs) == 1);
- regMaskTP tmpRegsMask = simdNode->gtRsvdRegs;
- regMaskTP tmpReg1Mask = genFindLowestBit(tmpRegsMask);
- tmpRegsMask &= ~tmpReg1Mask;
- regNumber tmpReg1 = genRegNumFromMask(tmpReg1Mask);
- regNumber tmpReg2 = genRegNumFromMask(tmpRegsMask);
+ tmpReg1 = genRegNumFromMask(simdNode->gtRsvdRegs);
+ assert(tmpReg1 != REG_NA);
+ assert(tmpReg1 != targetReg);
+ }
+ else
+ {
+ assert(simdNode->gtRsvdRegs == RBM_NONE);
+ }
+ }
+ else
+ {
+ assert(baseType == TYP_INT);
+ assert(iset >= InstructionSet_SSE3_4);
- // Choose any register different from targetReg as tmpReg
- if (tmpReg1 != targetReg)
+ if (iset == InstructionSet_SSE3_4)
{
- tmpReg = tmpReg1;
+ // Must have reserved 1 scratch register.
+ assert(genCountBits(simdNode->gtRsvdRegs) == 1);
+ tmpReg1 = genRegNumFromMask(simdNode->gtRsvdRegs);
}
else
{
- assert(targetReg != tmpReg2);
- tmpReg = tmpReg2;
+ // Must have reserved 2 scratch registers.
+ assert(genCountBits(simdNode->gtRsvdRegs) == 2);
+ regMaskTP tmpRegMask = genFindLowestBit(simdNode->gtRsvdRegs);
+ tmpReg1 = genRegNumFromMask(tmpRegMask);
+ tmpReg2 = genRegNumFromMask(simdNode->gtRsvdRegs & ~tmpRegMask);
}
- assert(tmpReg != REG_NA);
- assert(tmpReg != targetReg);
}
- if (compiler->getSIMDInstructionSet() == InstructionSet_SSE2)
+ if (iset == InstructionSet_SSE2)
{
// We avoid reg move if either op1Reg == targetReg or op2Reg == targetReg
if (op1Reg == targetReg)
@@ -1323,96 +1358,187 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
}
// DotProduct(v1, v2)
- // Here v0 = targetReg, v1 = op1Reg, v2 = op2Reg and tmp = tmpReg
- if (baseType == TYP_FLOAT)
+ // Here v0 = targetReg, v1 = op1Reg, v2 = op2Reg and tmp = tmpReg1
+ if ((simdNode->gtFlags & GTF_SIMD12_OP) != 0)
+ {
+ assert(baseType == TYP_FLOAT);
+ // v0 = v1 * v2
+ // tmp = v0 // v0 = (3, 2, 1, 0) - each element is given by its
+ // // position
+ // tmp = shuffle(tmp, tmp, SHUFFLE_ZXXY) // tmp = (2, 0, 0, 1) - don't really care what's in upper
+ // // bits
+ // v0 = v0 + tmp // v0 = (3+2, 0+2, 1+0, 0+1)
+ // tmp = shuffle(tmp, tmp, SHUFFLE_XXWW) // tmp = ( 1, 1, 2, 2)
+ // v0 = v0 + tmp // v0 = (1+2+3, 0+1+2, 0+1+2, 0+1+2)
+ //
+ inst_RV_RV(INS_mulps, targetReg, op2Reg);
+ inst_RV_RV(INS_movaps, tmpReg1, targetReg);
+ inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg1, tmpReg1, SHUFFLE_ZXXY);
+ inst_RV_RV(INS_addps, targetReg, tmpReg1);
+ inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg1, tmpReg1, SHUFFLE_XXWW);
+ inst_RV_RV(INS_addps, targetReg, tmpReg1);
+ }
+ else if (baseType == TYP_FLOAT)
{
// v0 = v1 * v2
// tmp = v0 // v0 = (3, 2, 1, 0) - each element is given by its
// // position
- // tmp = shuffle(tmp, tmp, Shuffle(2,3,0,1)) // tmp = (2, 3, 0, 1)
+ // tmp = shuffle(tmp, tmp, SHUFFLE_ZWXY) // tmp = (2, 3, 0, 1)
// v0 = v0 + tmp // v0 = (3+2, 2+3, 1+0, 0+1)
// tmp = v0
- // tmp = shuffle(tmp, tmp, Shuffle(0,1,2,3)) // tmp = (0+1, 1+0, 2+3, 3+2)
+ // tmp = shuffle(tmp, tmp, SHUFFLE_XYZW) // tmp = (0+1, 1+0, 2+3, 3+2)
// v0 = v0 + tmp // v0 = (0+1+2+3, 0+1+2+3, 0+1+2+3, 0+1+2+3)
// // Essentially horizontal addtion of all elements.
// // We could achieve the same using SSEv3 instruction
// // HADDPS.
//
inst_RV_RV(INS_mulps, targetReg, op2Reg);
- inst_RV_RV(INS_movaps, tmpReg, targetReg);
- inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg, tmpReg, 0xb1);
- inst_RV_RV(INS_addps, targetReg, tmpReg);
- inst_RV_RV(INS_movaps, tmpReg, targetReg);
- inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg, tmpReg, 0x1b);
- inst_RV_RV(INS_addps, targetReg, tmpReg);
+ inst_RV_RV(INS_movaps, tmpReg1, targetReg);
+ inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg1, tmpReg1, SHUFFLE_ZWXY);
+ inst_RV_RV(INS_addps, targetReg, tmpReg1);
+ inst_RV_RV(INS_movaps, tmpReg1, targetReg);
+ inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg1, tmpReg1, SHUFFLE_XYZW);
+ inst_RV_RV(INS_addps, targetReg, tmpReg1);
}
- else if (baseType == TYP_DOUBLE)
+ else
{
+ assert(baseType == TYP_DOUBLE);
+
// v0 = v1 * v2
// tmp = v0 // v0 = (1, 0) - each element is given by its position
// tmp = shuffle(tmp, tmp, Shuffle(0,1)) // tmp = (0, 1)
// v0 = v0 + tmp // v0 = (1+0, 0+1)
inst_RV_RV(INS_mulpd, targetReg, op2Reg);
- inst_RV_RV(INS_movaps, tmpReg, targetReg);
- inst_RV_RV_IV(INS_shufpd, EA_16BYTE, tmpReg, tmpReg, 0x01);
- inst_RV_RV(INS_addpd, targetReg, tmpReg);
- }
- else
- {
- unreached();
+ inst_RV_RV(INS_movaps, tmpReg1, targetReg);
+ inst_RV_RV_IV(INS_shufpd, EA_16BYTE, tmpReg1, tmpReg1, 0x01);
+ inst_RV_RV(INS_addpd, targetReg, tmpReg1);
}
}
else
{
- // We avoid reg move if either op1Reg == targetReg or op2Reg == targetReg.
- // Note that this is a duplicate of the code above for SSE, but in the AVX case we can eventually
- // use the 3-op form, so that we can avoid these copies.
- // TODO-CQ: Add inst_RV_RV_RV_IV().
- if (op1Reg == targetReg)
- {
- // Best case
- // nothing to do, we have registers in the right place
- }
- else if (op2Reg == targetReg)
+ assert(iset >= InstructionSet_SSE3_4);
+
+ if (varTypeIsFloating(baseType))
{
- op2Reg = op1Reg;
+ // We avoid reg move if either op1Reg == targetReg or op2Reg == targetReg.
+ // Note that this is a duplicate of the code above for SSE, but in the AVX case we can eventually
+ // use the 3-op form, so that we can avoid these copies.
+ // TODO-CQ: Add inst_RV_RV_RV_IV().
+ if (op1Reg == targetReg)
+ {
+ // Best case
+ // nothing to do, we have registers in the right place
+ }
+ else if (op2Reg == targetReg)
+ {
+ op2Reg = op1Reg;
+ }
+ else
+ {
+ inst_RV_RV(ins_Copy(simdType), targetReg, op1Reg, simdEvalType, emitActualTypeSize(simdType));
+ }
+
+ emitAttr emitSize = emitActualTypeSize(simdEvalType);
+ if (baseType == TYP_FLOAT)
+ {
+ // dpps computes the dot product of the upper & lower halves of the 32-byte register.
+ // Notice that if this is a TYP_SIMD16 or smaller on AVX, then we don't need a tmpReg.
+ unsigned mask = ((simdNode->gtFlags & GTF_SIMD12_OP) != 0) ? 0x71 : 0xf1;
+ inst_RV_RV_IV(INS_dpps, emitSize, targetReg, op2Reg, mask);
+ // dpps computes the dot product of the upper & lower halves of the 32-byte register.
+ // Notice that if this is a TYP_SIMD16 or smaller on AVX, then we don't need a tmpReg.
+ // If this is TYP_SIMD32, we need to combine the lower & upper results.
+ if (simdEvalType == TYP_SIMD32)
+ {
+ getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, tmpReg1, targetReg, 0x01);
+ inst_RV_RV(INS_addps, targetReg, tmpReg1, targetType, emitTypeSize(targetType));
+ }
+ }
+ else if (baseType == TYP_DOUBLE)
+ {
+ if (simdEvalType == TYP_SIMD32)
+ {
+ // targetReg = targetReg * op2Reg
+ // targetReg = vhaddpd(targetReg, targetReg) ; horizontal sum of lower & upper halves
+ // tmpReg = vextractf128(targetReg, 1) ; Moves the upper sum into tempReg
+ // targetReg = targetReg + tmpReg1
+ inst_RV_RV(INS_mulpd, targetReg, op2Reg, simdEvalType, emitActualTypeSize(simdType));
+ inst_RV_RV(INS_haddpd, targetReg, targetReg, simdEvalType, emitActualTypeSize(simdType));
+ getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, tmpReg1, targetReg, 0x01);
+ inst_RV_RV(INS_addpd, targetReg, tmpReg1, targetType, emitTypeSize(targetType));
+ }
+ else
+ {
+ // On AVX, we have no 16-byte vectors of double. Note that, if we did, we could use
+ // dppd directly.
+ assert(iset == InstructionSet_SSE3_4);
+ inst_RV_RV_IV(INS_dppd, emitSize, targetReg, op2Reg, 0x31);
+ }
+ }
}
else
{
- inst_RV_RV(ins_Copy(simdType), targetReg, op1Reg, simdEvalType, emitActualTypeSize(simdType));
- }
+ // Dot product of 32-byte int vector on SSE4/AVX.
+ assert(baseType == TYP_INT);
+ assert(simdEvalType == TYP_SIMD16 || simdEvalType == TYP_SIMD32);
+
+#ifdef DEBUG
+ // SSE4: We need 1 scratch register.
+ // AVX2: We need 2 scratch registers.
+ if (simdEvalType == TYP_SIMD16)
+ {
+ assert(tmpReg1 != REG_NA);
+ }
+ else
+ {
+ assert(tmpReg1 != REG_NA);
+ assert(tmpReg2 != REG_NA);
+ }
+#endif
+
+ // tmpReg1 = op1 * op2
+ if (iset == InstructionSet_AVX)
+ {
+ // On AVX take advantage 3 operand form of pmulld
+ inst_RV_RV_RV(INS_pmulld, tmpReg1, op1Reg, op2Reg, emitTypeSize(simdEvalType));
+ }
+ else
+ {
+ inst_RV_RV(ins_Copy(simdEvalType), tmpReg1, op1Reg, simdEvalType);
+ inst_RV_RV(INS_pmulld, tmpReg1, op2Reg, simdEvalType);
+ }
- emitAttr emitSize = emitActualTypeSize(simdEvalType);
- if (baseType == TYP_FLOAT)
- {
- // dpps computes the dot product of the upper & lower halves of the 32-byte register.
- // Notice that if this is a TYP_SIMD16 or smaller on AVX, then we don't need a tmpReg.
- inst_RV_RV_IV(INS_dpps, emitSize, targetReg, op2Reg, 0xf1);
- // If this is TYP_SIMD32, we need to combine the lower & upper results.
if (simdEvalType == TYP_SIMD32)
{
- getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, tmpReg, targetReg, 0x01);
- inst_RV_RV(INS_addps, targetReg, tmpReg, targetType, emitTypeSize(targetType));
+ // tmpReg2[127..0] = Upper 128-bits of tmpReg1
+ getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, tmpReg2, tmpReg1, 0x01);
+
+ // tmpReg1[127..0] = tmpReg1[127..0] + tmpReg2[127..0]
+ // This will compute
+ // tmpReg1[0] = op1[0]*op2[0] + op1[4]*op2[4]
+ // tmpReg1[1] = op1[1]*op2[1] + op1[5]*op2[5]
+ // tmpReg1[2] = op1[2]*op2[2] + op1[6]*op2[6]
+ // tmpReg1[4] = op1[4]*op2[4] + op1[7]*op2[7]
+ inst_RV_RV(INS_paddd, tmpReg1, tmpReg2, TYP_SIMD16, EA_16BYTE);
}
- }
- else if (baseType == TYP_DOUBLE)
- {
- // On AVX, we have no 16-byte vectors of double. Note that, if we did, we could use
- // dppd directly.
- assert(simdType == TYP_SIMD32);
-
- // targetReg = targetReg * op2Reg
- // targetReg = vhaddpd(targetReg, targetReg) ; horizontal sum of lower & upper halves
- // tmpReg = vextractf128(targetReg, 1) ; Moves the upper sum into tempReg
- // targetReg = targetReg + tmpReg
- inst_RV_RV(INS_mulpd, targetReg, op2Reg, simdEvalType, emitActualTypeSize(simdType));
- inst_RV_RV(INS_haddpd, targetReg, targetReg, simdEvalType, emitActualTypeSize(simdType));
- getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, tmpReg, targetReg, 0x01);
- inst_RV_RV(INS_addpd, targetReg, tmpReg, targetType, emitTypeSize(targetType));
- }
- else
- {
- unreached();
+
+ // This horizontal add will compute
+ //
+ // TYP_SIMD16:
+ // tmpReg1[0] = tmpReg1[2] = op1[0]*op2[0] + op1[1]*op2[1]
+ // tmpReg1[1] = tmpReg1[3] = op1[2]*op2[2] + op1[4]*op2[4]
+ //
+ // TYP_SIMD32:
+ // tmpReg1[0] = tmpReg1[2] = op1[0]*op2[0] + op1[4]*op2[4] + op1[1]*op2[1] + op1[5]*op2[5]
+ // tmpReg1[1] = tmpReg1[3] = op1[2]*op2[2] + op1[6]*op2[6] + op1[4]*op2[4] + op1[7]*op2[7]
+ inst_RV_RV(INS_phaddd, tmpReg1, tmpReg1, TYP_SIMD16, EA_16BYTE);
+
+ // DotProduct(op1, op2) = tmpReg1[0] = tmpReg1[0] + tmpReg1[1]
+ inst_RV_RV(INS_phaddd, tmpReg1, tmpReg1, TYP_SIMD16, EA_16BYTE);
+
+ // TargetReg = integer result from tmpReg1
+ // (Note that for mov_xmm2i, the int register is always in the reg2 position)
+ inst_RV_RV(INS_mov_xmm2i, tmpReg1, targetReg, TYP_INT);
}
}
@@ -1456,6 +1582,59 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
genConsumeOperands(simdNode);
regNumber srcReg = op1->gtRegNum;
+ // Optimize the case of op1 is in memory and trying to access ith element.
+ if (op1->isMemoryOp())
+ {
+ assert(op1->isContained());
+
+ regNumber baseReg;
+ regNumber indexReg;
+ int offset = 0;
+
+ if (op1->OperGet() == GT_LCL_FLD)
+ {
+ // There are three parts to the total offset here:
+ // {offset of local} + {offset of SIMD Vector field} + {offset of element within SIMD vector}.
+ bool isEBPbased;
+ unsigned varNum = op1->gtLclVarCommon.gtLclNum;
+ offset += compiler->lvaFrameAddress(varNum, &isEBPbased);
+ offset += op1->gtLclFld.gtLclOffs;
+
+ baseReg = (isEBPbased) ? REG_EBP : REG_ESP;
+ }
+ else
+ {
+ // Require GT_IND addr to be not contained.
+ assert(op1->OperGet() == GT_IND);
+
+ GenTree* addr = op1->AsIndir()->Addr();
+ assert(!addr->isContained());
+ baseReg = addr->gtRegNum;
+ }
+
+ if (op2->isContainedIntOrIImmed())
+ {
+ indexReg = REG_NA;
+ offset += (int)op2->AsIntConCommon()->IconValue() * genTypeSize(baseType);
+ }
+ else
+ {
+ indexReg = op2->gtRegNum;
+ assert(genIsValidIntReg(indexReg));
+ }
+
+ // Now, load the desired element.
+ getEmitter()->emitIns_R_ARX(ins_Move_Extend(baseType, false), // Load
+ emitTypeSize(baseType), // Of the vector baseType
+ targetReg, // To targetReg
+ baseReg, // Base Reg
+ indexReg, // Indexed
+ genTypeSize(baseType), // by the size of the baseType
+ offset);
+ genProduceReg(simdNode);
+ return;
+ }
+
// SSE2 doesn't have an instruction to implement this intrinsic if the index is not a constant.
// For the non-constant case, we will use the SIMD temp location to store the vector, and
// the load the desired element.
@@ -1839,26 +2018,9 @@ void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode)
// Need an addtional Xmm register to read upper 4 bytes, which is different from targetReg
assert(treeNode->gtRsvdRegs != RBM_NONE);
- assert(genCountBits(treeNode->gtRsvdRegs) == 2);
-
- regNumber tmpReg = REG_NA;
- regMaskTP tmpRegsMask = treeNode->gtRsvdRegs;
- regMaskTP tmpReg1Mask = genFindLowestBit(tmpRegsMask);
- tmpRegsMask &= ~tmpReg1Mask;
- regNumber tmpReg1 = genRegNumFromMask(tmpReg1Mask);
- regNumber tmpReg2 = genRegNumFromMask(tmpRegsMask);
+ assert(genCountBits(treeNode->gtRsvdRegs) == 1);
- // Choose any register different from targetReg as tmpReg
- if (tmpReg1 != targetReg)
- {
- tmpReg = tmpReg1;
- }
- else
- {
- assert(targetReg != tmpReg2);
- tmpReg = tmpReg2;
- }
- assert(tmpReg != REG_NA);
+ regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
assert(tmpReg != targetReg);
// Load upper 4 bytes in tmpReg
@@ -1868,7 +2030,7 @@ void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode)
getEmitter()->emitIns_R_AR(ins_Load(TYP_DOUBLE), EA_8BYTE, targetReg, operandReg, 0);
// combine upper 4 bytes and lower 8 bytes in targetReg
- getEmitter()->emitIns_R_R_I(INS_shufps, emitActualTypeSize(TYP_SIMD16), targetReg, tmpReg, 0x44);
+ getEmitter()->emitIns_R_R_I(INS_shufps, emitActualTypeSize(TYP_SIMD16), targetReg, tmpReg, SHUFFLE_YXYX);
genProduceReg(treeNode);
}
@@ -1912,9 +2074,9 @@ void CodeGen::genStoreLclFldTypeSIMD12(GenTree* treeNode)
}
//-----------------------------------------------------------------------------
-// genLoadLclFldTypeSIMD12: load a TYP_SIMD12 (i.e. Vector3) type field.
-// Since Vector3 is not a hardware supported write size, it is performed
-// as two reads: 8 byte followed by 4-byte.
+// genLoadLclTypeSIMD12: load a TYP_SIMD12 (i.e. Vector3) type field.
+// Since Vector3 is not a hardware supported read size, it is performed
+// as two reads: 4 byte followed by 8 byte.
//
// Arguments:
// treeNode - tree node that is attempting to load TYP_SIMD12 field
@@ -1922,37 +2084,26 @@ void CodeGen::genStoreLclFldTypeSIMD12(GenTree* treeNode)
// Return Value:
// None.
//
-void CodeGen::genLoadLclFldTypeSIMD12(GenTree* treeNode)
+void CodeGen::genLoadLclTypeSIMD12(GenTree* treeNode)
{
- assert(treeNode->OperGet() == GT_LCL_FLD);
+ assert((treeNode->OperGet() == GT_LCL_FLD) || (treeNode->OperGet() == GT_LCL_VAR));
regNumber targetReg = treeNode->gtRegNum;
- unsigned offs = treeNode->gtLclFld.gtLclOffs;
+ unsigned offs = 0;
unsigned varNum = treeNode->gtLclVarCommon.gtLclNum;
assert(varNum < compiler->lvaCount);
- // Need an addtional Xmm register to read upper 4 bytes
- assert(treeNode->gtRsvdRegs != RBM_NONE);
- assert(genCountBits(treeNode->gtRsvdRegs) == 2);
-
- regNumber tmpReg = REG_NA;
- regMaskTP tmpRegsMask = treeNode->gtRsvdRegs;
- regMaskTP tmpReg1Mask = genFindLowestBit(tmpRegsMask);
- tmpRegsMask &= ~tmpReg1Mask;
- regNumber tmpReg1 = genRegNumFromMask(tmpReg1Mask);
- regNumber tmpReg2 = genRegNumFromMask(tmpRegsMask);
-
- // Choose any register different from targetReg as tmpReg
- if (tmpReg1 != targetReg)
+ if (treeNode->OperGet() == GT_LCL_FLD)
{
- tmpReg = tmpReg1;
+ offs = treeNode->gtLclFld.gtLclOffs;
}
- else
- {
- assert(targetReg != tmpReg2);
- tmpReg = tmpReg2;
- }
- assert(tmpReg != REG_NA);
+
+ // Need an additional Xmm register that is different from
+ // targetReg to read upper 4 bytes.
+ assert(treeNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(treeNode->gtRsvdRegs) == 1);
+
+ regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
assert(tmpReg != targetReg);
// Read upper 4 bytes to tmpReg
@@ -1962,11 +2113,54 @@ void CodeGen::genLoadLclFldTypeSIMD12(GenTree* treeNode)
getEmitter()->emitIns_R_S(ins_Move_Extend(TYP_DOUBLE, false), EA_8BYTE, targetReg, varNum, offs);
// combine upper 4 bytes and lower 8 bytes in targetReg
- getEmitter()->emitIns_R_R_I(INS_shufps, emitActualTypeSize(TYP_SIMD16), targetReg, tmpReg, 0x44);
+ getEmitter()->emitIns_R_R_I(INS_shufps, emitActualTypeSize(TYP_SIMD16), targetReg, tmpReg, SHUFFLE_YXYX);
genProduceReg(treeNode);
}
+#ifdef _TARGET_X86_
+
+//-----------------------------------------------------------------------------
+// genPutArgStkSIMD12: store a TYP_SIMD12 (i.e. Vector3) type field.
+// Since Vector3 is not a hardware supported write size, it is performed
+// as two stores: 8 byte followed by 4-byte.
+//
+// Arguments:
+// treeNode - tree node that is attempting to store TYP_SIMD12 field
+//
+// Return Value:
+// None.
+//
+void CodeGen::genPutArgStkSIMD12(GenTree* treeNode)
+{
+ assert(treeNode->OperGet() == GT_PUTARG_STK);
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ assert(!op1->isContained());
+ regNumber operandReg = genConsumeReg(op1);
+
+ // Need an addtional Xmm register to extract upper 4 bytes from data.
+ assert(treeNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(treeNode->gtRsvdRegs) == 1);
+ regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+
+ // Subtract from ESP; create space for argument.
+ // TODO-CQ: use 'push' instead?
+ inst_RV_IV(INS_sub, REG_SPBASE, 12, EA_PTRSIZE);
+ genStackLevel += 12;
+
+ // 8-byte write
+ getEmitter()->emitIns_AR_R(ins_Store(TYP_DOUBLE), EA_8BYTE, operandReg, REG_SPBASE, 0);
+
+ // Extract upper 4-bytes from data
+ getEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(TYP_SIMD16), tmpReg, operandReg, 0x02);
+
+ // 4-byte write
+ getEmitter()->emitIns_AR_R(ins_Store(TYP_FLOAT), EA_4BYTE, tmpReg, REG_SPBASE, 8);
+}
+
+#endif // _TARGET_X86_
+
//-----------------------------------------------------------------------------
// genSIMDIntrinsicUpperSave: save the upper half of a TYP_SIMD32 vector to
// the given register, if any, or to memory.
@@ -2139,5 +2333,5 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode)
}
#endif // FEATURE_SIMD
-#endif //_TARGET_AMD64_
+#endif //_TARGET_XARCH_
#endif // !LEGACY_BACKEND
diff --git a/src/jit/simdintrinsiclist.h b/src/jit/simdintrinsiclist.h
index a44fb9d0a1..c81f7b4bf0 100644
--- a/src/jit/simdintrinsiclist.h
+++ b/src/jit/simdintrinsiclist.h
@@ -20,7 +20,7 @@
e) TODO-Cleanup: when we plumb TYP_SIMD through front-end, replace TYP_STRUCT with TYP_SIMD.
*/
-#ifdef _TARGET_AMD64_
+#ifdef _TARGET_XARCH_
// Max number of parameters that we model in the table for SIMD intrinsic methods.
#define SIMD_INTRINSIC_MAX_MODELED_PARAM_COUNT 3
@@ -111,7 +111,8 @@ SIMD_INTRINSIC("op_BitwiseOr", false, BitwiseOr,
SIMD_INTRINSIC("op_ExclusiveOr", false, BitwiseXor, "^", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
// Dot Product
-SIMD_INTRINSIC("Dot", false, DotProduct, "Dot", TYP_UNKNOWN, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+// Is supported only on Vector<int> on AVX.
+SIMD_INTRINSIC("Dot", false, DotProduct, "Dot", TYP_UNKNOWN, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
// Select
SIMD_INTRINSIC("ConditionalSelect", false, Select, "Select", TYP_STRUCT, 3, {TYP_STRUCT, TYP_STRUCT, TYP_STRUCT}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
@@ -137,9 +138,9 @@ SIMD_INTRINSIC("UpperRestore", false, UpperRestore,
SIMD_INTRINSIC(nullptr, false, Invalid, "Invalid", TYP_UNDEF, 0, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
#undef SIMD_INTRINSIC
-#else //_TARGET_AMD64_
+#else //_TARGET_XARCH_
#error SIMD intrinsics not defined for target arch
-#endif //!_TARGET_AMD64_
+#endif //!_TARGET_XARCH_
#endif //FEATURE_SIMD
// clang-format on
diff --git a/src/jit/ssabuilder.cpp b/src/jit/ssabuilder.cpp
index 2da6902464..f0ee461c45 100644
--- a/src/jit/ssabuilder.cpp
+++ b/src/jit/ssabuilder.cpp
@@ -27,87 +27,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
namespace
{
/**
- * Visits basic blocks in the depth first order and arranges them in the order of
- * their DFS finish time.
- *
- * @param block The fgFirstBB or entry block.
- * @param comp A pointer to compiler.
- * @param visited In pointer initialized to false and of size at least fgMaxBBNum.
- * @param count Out pointer for count of all nodes reachable by DFS.
- * @param postOrder Out poitner to arrange the blocks and of size at least fgMaxBBNum.
- */
-static void TopologicalSortHelper(BasicBlock* block, Compiler* comp, bool* visited, int* count, BasicBlock** postOrder)
-{
- visited[block->bbNum] = true;
-
- ArrayStack<BasicBlock*> blocks(comp);
- ArrayStack<AllSuccessorIter> iterators(comp);
- ArrayStack<AllSuccessorIter> ends(comp);
-
- // there are three stacks used here and all should be same height
- // the first is for blocks
- // the second is the iterator to keep track of what succ of the block we are looking at
- // and the third is the end marker iterator
- blocks.Push(block);
- iterators.Push(block->GetAllSuccs(comp).begin());
- ends.Push(block->GetAllSuccs(comp).end());
-
- while (blocks.Height() > 0)
- {
- block = blocks.Top();
-
-#ifdef DEBUG
- if (comp->verboseSsa)
- {
- printf("[SsaBuilder::TopologicalSortHelper] Visiting BB%02u: ", block->bbNum);
- printf("[");
- unsigned numSucc = block->NumSucc(comp);
- for (unsigned i = 0; i < numSucc; ++i)
- {
- printf("BB%02u, ", block->GetSucc(i, comp)->bbNum);
- }
- EHSuccessorIter end = block->GetEHSuccs(comp).end();
- for (EHSuccessorIter ehsi = block->GetEHSuccs(comp).begin(); ehsi != end; ++ehsi)
- {
- printf("[EH]BB%02u, ", (*ehsi)->bbNum);
- }
- printf("]\n");
- }
-#endif
-
- if (iterators.TopRef() != ends.TopRef())
- {
- // if the block on TOS still has unreached successors, visit them
- AllSuccessorIter& iter = iterators.TopRef();
- BasicBlock* succ = *iter;
- ++iter;
- // push the child
-
- if (!visited[succ->bbNum])
- {
- blocks.Push(succ);
- iterators.Push(succ->GetAllSuccs(comp).begin());
- ends.Push(succ->GetAllSuccs(comp).end());
- visited[succ->bbNum] = true;
- }
- }
- else
- {
- // all successors have been visited
- blocks.Pop();
- iterators.Pop();
- ends.Pop();
-
- postOrder[*count] = block;
- block->bbPostOrderNum = *count;
- *count += 1;
-
- DBG_SSA_JITDUMP("postOrder[%d] = [%p] and BB%02u\n", *count, dspPtr(block), block->bbNum);
- }
- }
-}
-
-/**
* Method that finds a common IDom parent, much like least common ancestor.
*
* @param finger1 A basic block that might share IDom ancestor with finger2.
@@ -184,6 +103,8 @@ void Compiler::fgResetForSsa()
{
lvaTable[i].lvPerSsaData.Reset();
}
+ lvHeapPerSsaData.Reset();
+ m_heapSsaMap = nullptr;
for (BasicBlock* blk = fgFirstBB; blk != nullptr; blk = blk->bbNext)
{
// Eliminate phis.
@@ -197,6 +118,32 @@ void Compiler::fgResetForSsa()
blk->bbTreeList->gtPrev = last;
}
}
+
+ // Clear post-order numbers and SSA numbers; SSA construction will overwrite these,
+ // but only for reachable code, so clear them to avoid analysis getting confused
+ // by stale annotations in unreachable code.
+ blk->bbPostOrderNum = 0;
+ for (GenTreeStmt* stmt = blk->firstStmt(); stmt != nullptr; stmt = stmt->getNextStmt())
+ {
+ for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree != nullptr; tree = tree->gtNext)
+ {
+ if (tree->IsLocal())
+ {
+ tree->gtLclVarCommon.SetSsaNum(SsaConfig::RESERVED_SSA_NUM);
+ continue;
+ }
+
+ Compiler::IndirectAssignmentAnnotation* pIndirAssign = nullptr;
+ if ((tree->OperGet() != GT_ASG) || !GetIndirAssignMap()->Lookup(tree, &pIndirAssign) ||
+ (pIndirAssign == nullptr))
+ {
+ continue;
+ }
+
+ pIndirAssign->m_defSsaNum = SsaConfig::RESERVED_SSA_NUM;
+ pIndirAssign->m_useSsaNum = SsaConfig::RESERVED_SSA_NUM;
+ }
+ }
}
}
@@ -222,27 +169,97 @@ SsaBuilder::SsaBuilder(Compiler* pCompiler, IAllocator* pIAllocator)
{
}
-/**
- * Topologically sort the graph and return the number of nodes visited.
- *
- * @param postOrder The array in which the arranged basic blocks have to be returned.
- * @param count The size of the postOrder array.
- *
- * @return The number of nodes visited while performing DFS on the graph.
- */
+//------------------------------------------------------------------------
+// TopologicalSort: Topologically sort the graph and return the number of nodes visited.
+//
+// Arguments:
+// postOrder - The array in which the arranged basic blocks have to be returned.
+// count - The size of the postOrder array.
+//
+// Return Value:
+// The number of nodes visited while performing DFS on the graph.
+
int SsaBuilder::TopologicalSort(BasicBlock** postOrder, int count)
{
- // Allocate and initialize visited flags.
- bool* visited = (bool*)alloca(count * sizeof(bool));
- memset(visited, 0, count * sizeof(bool));
+ Compiler* comp = m_pCompiler;
+
+ BitVecTraits traits(comp->fgBBNumMax + 1, comp);
+ BitVec BITVEC_INIT_NOCOPY(visited, BitVecOps::MakeEmpty(&traits));
// Display basic blocks.
- DBEXEC(VERBOSE, m_pCompiler->fgDispBasicBlocks());
- DBEXEC(VERBOSE, m_pCompiler->fgDispHandlerTab());
+ DBEXEC(VERBOSE, comp->fgDispBasicBlocks());
+ DBEXEC(VERBOSE, comp->fgDispHandlerTab());
- // Call the recursive helper.
- int postIndex = 0;
- TopologicalSortHelper(m_pCompiler->fgFirstBB, m_pCompiler, visited, &postIndex, postOrder);
+ // Compute order.
+ int postIndex = 0;
+ BasicBlock* block = comp->fgFirstBB;
+ BitVecOps::AddElemD(&traits, visited, block->bbNum);
+
+ ArrayStack<BasicBlock*> blocks(comp);
+ ArrayStack<AllSuccessorIter> iterators(comp);
+ ArrayStack<AllSuccessorIter> ends(comp);
+
+ // there are three stacks used here and all should be same height
+ // the first is for blocks
+ // the second is the iterator to keep track of what succ of the block we are looking at
+ // and the third is the end marker iterator
+ blocks.Push(block);
+ iterators.Push(block->GetAllSuccs(comp).begin());
+ ends.Push(block->GetAllSuccs(comp).end());
+
+ while (blocks.Height() > 0)
+ {
+ block = blocks.Top();
+
+#ifdef DEBUG
+ if (comp->verboseSsa)
+ {
+ printf("[SsaBuilder::TopologicalSort] Visiting BB%02u: ", block->bbNum);
+ printf("[");
+ unsigned numSucc = block->NumSucc(comp);
+ for (unsigned i = 0; i < numSucc; ++i)
+ {
+ printf("BB%02u, ", block->GetSucc(i, comp)->bbNum);
+ }
+ EHSuccessorIter end = block->GetEHSuccs(comp).end();
+ for (EHSuccessorIter ehsi = block->GetEHSuccs(comp).begin(); ehsi != end; ++ehsi)
+ {
+ printf("[EH]BB%02u, ", (*ehsi)->bbNum);
+ }
+ printf("]\n");
+ }
+#endif
+
+ if (iterators.TopRef() != ends.TopRef())
+ {
+ // if the block on TOS still has unreached successors, visit them
+ AllSuccessorIter& iter = iterators.TopRef();
+ BasicBlock* succ = *iter;
+ ++iter;
+
+ // push the children
+ if (!BitVecOps::IsMember(&traits, visited, succ->bbNum))
+ {
+ blocks.Push(succ);
+ iterators.Push(succ->GetAllSuccs(comp).begin());
+ ends.Push(succ->GetAllSuccs(comp).end());
+ BitVecOps::AddElemD(&traits, visited, succ->bbNum);
+ }
+ }
+ else
+ {
+ // all successors have been visited
+ blocks.Pop();
+ iterators.Pop();
+ ends.Pop();
+
+ postOrder[postIndex] = block;
+ block->bbPostOrderNum = postIndex;
+ postIndex += 1;
+
+ DBG_SSA_JITDUMP("postOrder[%d] = [%p] and BB%02u\n", postIndex, dspPtr(block), block->bbNum);
+ }
+ }
// In the absence of EH (because catch/finally have no preds), this should be valid.
// assert(postIndex == (count - 1));
@@ -1686,7 +1703,17 @@ void SsaBuilder::Build()
JITDUMP("[SsaBuilder] Max block count is %d.\n", blockCount);
// Allocate the postOrder array for the graph.
- BasicBlock** postOrder = (BasicBlock**)alloca(blockCount * sizeof(BasicBlock*));
+
+ BasicBlock** postOrder;
+
+ if (blockCount > DEFAULT_MIN_OPTS_BB_COUNT)
+ {
+ postOrder = new (m_pCompiler->getAllocator()) BasicBlock*[blockCount];
+ }
+ else
+ {
+ postOrder = (BasicBlock**)alloca(blockCount * sizeof(BasicBlock*));
+ }
// Topologically sort the graph.
int count = TopologicalSort(postOrder, blockCount);
diff --git a/src/jit/stackfp.cpp b/src/jit/stackfp.cpp
index f975822740..43c463039e 100644
--- a/src/jit/stackfp.cpp
+++ b/src/jit/stackfp.cpp
@@ -1406,8 +1406,6 @@ void CodeGen::genCodeForTreeStackFP_Asg(GenTreePtr tree)
assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1NonCom->gtFlags & GTF_VAR_DEATH));
#endif
-#ifdef DEBUGGING_SUPPORT
-
/* For non-debuggable code, every definition of a lcl-var has
* to be checked to see if we need to open a new scope for it.
*/
@@ -1416,7 +1414,6 @@ void CodeGen::genCodeForTreeStackFP_Asg(GenTreePtr tree)
{
siCheckVarScope(op1NonCom->gtLclVarCommon.gtLclNum, op1NonCom->gtLclVar.gtLclILoffs);
}
-#endif
}
assert(op2);
@@ -2827,7 +2824,7 @@ void CodeGen::genCondJumpFltStackFP(GenTreePtr cond, BasicBlock* jumpTrue, Basic
BasicBlock* CodeGen::genTransitionBlockStackFP(FlatFPStateX87* pState, BasicBlock* pFrom, BasicBlock* pTarget)
{
// Fast paths where a transition block is not necessary
- if (pTarget->bbFPStateX87 && FlatFPStateX87::AreEqual(pState, pTarget->bbFPStateX87) || pState->IsEmpty())
+ if ((pTarget->bbFPStateX87 && FlatFPStateX87::AreEqual(pState, pTarget->bbFPStateX87)) || pState->IsEmpty())
{
return pTarget;
}
@@ -4143,8 +4140,26 @@ void Compiler::raEnregisterVarsPostPassStackFP()
{
raSetRegLclBirthDeath(tree, lastlife, false);
}
+
+ // Model implicit use (& hence last use) of frame list root at pinvokes.
+ if (tree->gtOper == GT_CALL)
+ {
+ GenTreeCall* call = tree->AsCall();
+ if (call->IsUnmanaged() && !opts.ShouldUsePInvokeHelpers())
+ {
+ LclVarDsc* frameVarDsc = &lvaTable[info.compLvFrameListRoot];
+
+ if (frameVarDsc->lvTracked && ((call->gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH) != 0))
+ {
+ // Frame var dies here
+ unsigned varIndex = frameVarDsc->lvVarIndex;
+ VarSetOps::RemoveElemD(this, lastlife, varIndex);
+ }
+ }
+ }
}
}
+
assert(VarSetOps::Equal(this, lastlife, block->bbLiveOut));
}
compCurBB = NULL;
diff --git a/src/jit/standalone/CMakeLists.txt b/src/jit/standalone/CMakeLists.txt
index 2e6317098e..f20d3790c7 100644
--- a/src/jit/standalone/CMakeLists.txt
+++ b/src/jit/standalone/CMakeLists.txt
@@ -1,22 +1,27 @@
project(ryujit)
+
add_definitions(-DFEATURE_NO_HOST)
add_definitions(-DSELF_NO_HOST)
add_definitions(-DFEATURE_READYTORUN_COMPILER)
remove_definitions(-DFEATURE_MERGE_JIT_AND_ENGINE)
-if(CLR_CMAKE_TARGET_ARCH_I386 OR CLR_CMAKE_TARGET_ARCH_ARM)
+if(CLR_CMAKE_TARGET_ARCH_ARM)
add_definitions(-DLEGACY_BACKEND)
endif()
-add_library_clr(${JIT_BASE_NAME}
+if(WIN32)
+ add_definitions(-DFX_VER_INTERNALNAME_STR=clrjit.dll)
+endif(WIN32)
+
+add_library_clr(clrjit
SHARED
${SHARED_LIB_SOURCES}
)
-add_dependencies(${JIT_BASE_NAME} jit_exports)
+add_dependencies(clrjit jit_exports)
-set_property(TARGET ${JIT_BASE_NAME} APPEND_STRING PROPERTY LINK_FLAGS ${JIT_EXPORTS_LINKER_OPTION})
-set_property(TARGET ${JIT_BASE_NAME} APPEND_STRING PROPERTY LINK_DEPENDS ${JIT_EXPORTS_FILE})
+set_property(TARGET clrjit APPEND_STRING PROPERTY LINK_FLAGS ${JIT_EXPORTS_LINKER_OPTION})
+set_property(TARGET clrjit APPEND_STRING PROPERTY LINK_DEPENDS ${JIT_EXPORTS_FILE})
set(RYUJIT_LINK_LIBRARIES
utilcodestaticnohost
@@ -47,12 +52,12 @@ else()
)
endif(CLR_CMAKE_PLATFORM_UNIX)
-target_link_libraries(${JIT_BASE_NAME}
+target_link_libraries(clrjit
${RYUJIT_LINK_LIBRARIES}
)
# add the install targets
-install_clr(${JIT_BASE_NAME})
+install_clr(clrjit)
# Enable profile guided optimization
-add_pgo(${JIT_BASE_NAME})
+add_pgo(clrjit)
diff --git a/src/jit/target.h b/src/jit/target.h
index fa0b18af3e..a726525488 100644
--- a/src/jit/target.h
+++ b/src/jit/target.h
@@ -6,11 +6,6 @@
#ifndef _TARGET_H_
#define _TARGET_H_
-// Inform includers that we're in a context in which a target has been set.
-#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) || defined(_TARGET_ARM_)
-#define _TARGET_SET_
-#endif
-
// If the UNIX_AMD64_ABI is defined make sure that _TARGET_AMD64_ is also defined.
#if defined(UNIX_AMD64_ABI)
#if !defined(_TARGET_AMD64_)
@@ -365,6 +360,9 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#endif // !LEGACY_BACKEND
+#ifdef FEATURE_SIMD
+ #define ALIGN_SIMD_TYPES 1 // whether SIMD type locals are to be aligned
+#endif // FEATURE_SIMD
#define FEATURE_WRITE_BARRIER 1 // Generate the proper WriteBarrier calls for GC
#define FEATURE_FIXED_OUT_ARGS 0 // X86 uses push instructions to pass args
@@ -585,7 +583,14 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define RBM_CALLEE_TRASH_NOGC RBM_EDX
#endif // NOGC_WRITE_BARRIERS
- // IL stub's secret parameter (CORJIT_FLG_PUBLISH_SECRET_PARAM)
+ // GenericPInvokeCalliHelper unmanaged target parameter
+ #define REG_PINVOKE_TARGET_PARAM REG_EAX
+ #define RBM_PINVOKE_TARGET_PARAM RBM_EAX
+
+ // GenericPInvokeCalliHelper cookie parameter
+ #define REG_PINVOKE_COOKIE_PARAM REG_STK
+
+ // IL stub's secret parameter (JitFlags::JIT_FLAG_PUBLISH_SECRET_PARAM)
#define REG_SECRET_STUB_PARAM REG_EAX
#define RBM_SECRET_STUB_PARAM RBM_EAX
@@ -594,6 +599,10 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define RBM_VIRTUAL_STUB_PARAM RBM_EAX
#define PREDICT_REG_VIRTUAL_STUB_PARAM PREDICT_REG_EAX
+ // VSD target address register
+ #define REG_VIRTUAL_STUB_TARGET REG_EAX
+ #define RBM_VIRTUAL_STUB_TARGET RBM_EAX
+
// Registers used by PInvoke frame setup
#define REG_PINVOKE_FRAME REG_EDI // EDI is p/invoke "Frame" pointer argument to CORINFO_HELP_INIT_PINVOKE_FRAME helper
#define RBM_PINVOKE_FRAME RBM_EDI
@@ -670,6 +679,12 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define RBM_ARG_REGS (RBM_ARG_0|RBM_ARG_1)
+ // The registers trashed by profiler enter/leave/tailcall hook
+ // See vm\i386\asmhelpers.asm for more details.
+ #define RBM_PROFILER_ENTER_TRASH RBM_NONE
+ #define RBM_PROFILER_LEAVE_TRASH RBM_NONE
+ #define RBM_PROFILER_TAILCALL_TRASH (RBM_ALLINT & ~RBM_ARG_REGS)
+
// What sort of reloc do we use for [disp32] address mode
#define IMAGE_REL_BASED_DISP32 IMAGE_REL_BASED_HIGHLOW
@@ -968,7 +983,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define RBM_PINVOKE_TARGET_PARAM RBM_R10
#define PREDICT_REG_PINVOKE_TARGET_PARAM PREDICT_REG_R10
- // IL stub's secret MethodDesc parameter (CORJIT_FLG_PUBLISH_SECRET_PARAM)
+ // IL stub's secret MethodDesc parameter (JitFlags::JIT_FLAG_PUBLISH_SECRET_PARAM)
#define REG_SECRET_STUB_PARAM REG_R10
#define RBM_SECRET_STUB_PARAM RBM_R10
@@ -1111,9 +1126,10 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#endif // !UNIX_AMD64_ABI
// The registers trashed by profiler enter/leave/tailcall hook
- // See vm\amd64\amshelpers.asm for more details.
- #define RBM_PROFILER_ENTER_TRASH RBM_CALLEE_TRASH
- #define RBM_PROFILER_LEAVE_TRASH (RBM_CALLEE_TRASH & ~(RBM_FLOATRET | RBM_INTRET))
+ // See vm\amd64\asmhelpers.asm for more details.
+ #define RBM_PROFILER_ENTER_TRASH RBM_CALLEE_TRASH
+ #define RBM_PROFILER_LEAVE_TRASH (RBM_CALLEE_TRASH & ~(RBM_FLOATRET | RBM_INTRET))
+ #define RBM_PROFILER_TAILCALL_TRASH RBM_PROFILER_LEAVE_TRASH
// The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper.
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
@@ -1339,7 +1355,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define RBM_PINVOKE_TARGET_PARAM RBM_R12
#define PREDICT_REG_PINVOKE_TARGET_PARAM PREDICT_REG_R12
- // IL stub's secret MethodDesc parameter (CORJIT_FLG_PUBLISH_SECRET_PARAM)
+ // IL stub's secret MethodDesc parameter (JitFlags::JIT_FLAG_PUBLISH_SECRET_PARAM)
#define REG_SECRET_STUB_PARAM REG_R12
#define RBM_SECRET_STUB_PARAM RBM_R12
@@ -1447,6 +1463,9 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define JMP_DIST_SMALL_MAX_NEG (-2048)
#define JMP_DIST_SMALL_MAX_POS (+2046)
+ #define CALL_DIST_MAX_NEG (-16777216)
+ #define CALL_DIST_MAX_POS (+16777214)
+
#define JCC_DIST_SMALL_MAX_NEG (-256)
#define JCC_DIST_SMALL_MAX_POS (+254)
@@ -1617,7 +1636,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define RBM_PINVOKE_TARGET_PARAM RBM_R14
#define PREDICT_REG_PINVOKE_TARGET_PARAM PREDICT_REG_R14
- // IL stub's secret MethodDesc parameter (CORJIT_FLG_PUBLISH_SECRET_PARAM)
+ // IL stub's secret MethodDesc parameter (JitFlags::JIT_FLAG_PUBLISH_SECRET_PARAM)
#define REG_SECRET_STUB_PARAM REG_R12
#define RBM_SECRET_STUB_PARAM RBM_R12
@@ -2277,6 +2296,9 @@ inline regNumber regNextOfType(regNumber reg, var_types type)
inline bool isRegPairType(int /* s/b "var_types" */ type)
{
+#if !CPU_LONG_USES_REGPAIR
+ return false;
+#else
#ifdef _TARGET_64BIT_
return false;
#elif CPU_HAS_FP_SUPPORT
@@ -2284,6 +2306,7 @@ inline bool isRegPairType(int /* s/b "var_types" */ type)
#else
return type == TYP_LONG || type == TYP_DOUBLE;
#endif
+#endif // CPU_LONG_USES_REGPAIR
}
inline bool isFloatRegType(int /* s/b "var_types" */ type)
diff --git a/src/jit/tinyarray.h b/src/jit/tinyarray.h
index 17d7e044b2..bee59bdb59 100644
--- a/src/jit/tinyarray.h
+++ b/src/jit/tinyarray.h
@@ -71,7 +71,7 @@ public:
// only use this for clearing it
void operator=(void* rhs)
{
- assert(rhs == NULL);
+ assert(rhs == nullptr);
data = 0;
}
};
diff --git a/src/jit/unwindamd64.cpp b/src/jit/unwindamd64.cpp
index 89abdff2b3..14eba8cb50 100644
--- a/src/jit/unwindamd64.cpp
+++ b/src/jit/unwindamd64.cpp
@@ -481,6 +481,13 @@ void Compiler::unwindSetFrameRegWindows(regNumber reg, unsigned offset)
}
#ifdef UNIX_AMD64_ABI
+//------------------------------------------------------------------------
+// Compiler::unwindSetFrameRegCFI: Record a cfi info for a frame register set.
+//
+// Arguments:
+// reg - The register being set as the frame register.
+// offset - The offset from the current stack pointer that the frame pointer will point at.
+//
void Compiler::unwindSetFrameRegCFI(regNumber reg, unsigned offset)
{
assert(compGeneratingProlog);
@@ -492,7 +499,13 @@ void Compiler::unwindSetFrameRegCFI(regNumber reg, unsigned offset)
createCfiCode(func, cbProlog, CFI_DEF_CFA_REGISTER, mapRegNumToDwarfReg(reg));
if (offset != 0)
{
- createCfiCode(func, cbProlog, CFI_ADJUST_CFA_OFFSET, DWARF_REG_ILLEGAL, offset);
+ // before: cfa = rsp + old_cfa_offset;
+ // rbp = rsp + offset;
+ // after: cfa should be based on rbp, but points to the old address:
+ // rsp + old_cfa_offset == rbp + old_cfa_offset + adjust;
+ // adjust = -offset;
+ int adjust = -(int)offset;
+ createCfiCode(func, cbProlog, CFI_ADJUST_CFA_OFFSET, DWARF_REG_ILLEGAL, adjust);
}
}
#endif // UNIX_AMD64_ABI
diff --git a/src/jit/utils.cpp b/src/jit/utils.cpp
index 9934416412..3a45039aa7 100644
--- a/src/jit/utils.cpp
+++ b/src/jit/utils.cpp
@@ -657,7 +657,7 @@ void dumpILRange(const BYTE* const codeAddr, unsigned codeSize) // in bytes
for (IL_OFFSET offs = 0; offs < codeSize;)
{
char prefix[100];
- sprintf(prefix, "IL_%04x ", offs);
+ sprintf_s(prefix, _countof(prefix), "IL_%04x ", offs);
unsigned codeBytesDumped = dumpSingleInstr(codeAddr, offs, prefix);
offs += codeBytesDumped;
}
@@ -665,11 +665,9 @@ void dumpILRange(const BYTE* const codeAddr, unsigned codeSize) // in bytes
/*****************************************************************************
*
- * Display a variable set (which may be a 32-bit or 64-bit number); only
- * one or two of these can be used at once.
+ * Display a variable set.
*/
-
-const char* genES2str(EXPSET_TP set)
+const char* genES2str(BitVecTraits* traits, EXPSET_TP set)
{
const int bufSize = 17;
static char num1[bufSize];
@@ -682,11 +680,7 @@ const char* genES2str(EXPSET_TP set)
nump = (nump == num1) ? num2 : num1;
-#if EXPSET_SZ == 32
- sprintf_s(temp, bufSize, "%08X", set);
-#else
- sprintf_s(temp, bufSize, "%08X%08X", (int)(set >> 32), (int)set);
-#endif
+ sprintf_s(temp, bufSize, "%s", BitVecOps::ToString(traits, set));
return temp;
}
@@ -876,7 +870,7 @@ void ConfigMethodRange::InitRanges(const wchar_t* rangeStr, unsigned capacity)
#endif // defined(DEBUG) || defined(INLINE_DATA)
-#if CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_NODE_SIZE
+#if CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_NODE_SIZE || MEASURE_MEM_ALLOC
/*****************************************************************************
* Histogram class.
@@ -896,7 +890,10 @@ Histogram::Histogram(IAllocator* allocator, const unsigned* const sizeTable)
Histogram::~Histogram()
{
- m_allocator->Free(m_counts);
+ if (m_counts != nullptr)
+ {
+ m_allocator->Free(m_counts);
+ }
}
// We need to lazy allocate the histogram data so static `Histogram` variables don't try to
@@ -1414,6 +1411,9 @@ void HelperCallProperties::init()
case CORINFO_HELP_GETGENERICS_GCSTATIC_BASE:
case CORINFO_HELP_GETGENERICS_NONGCSTATIC_BASE:
case CORINFO_HELP_READYTORUN_STATIC_BASE:
+#if COR_JIT_EE_VERSION > 460
+ case CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE:
+#endif // COR_JIT_EE_VERSION > 460
// These may invoke static class constructors
// These can throw InvalidProgram exception if the class can not be constructed
diff --git a/src/jit/valuenum.cpp b/src/jit/valuenum.cpp
index 5bc96ed4a9..f7cc0c9a23 100644
--- a/src/jit/valuenum.cpp
+++ b/src/jit/valuenum.cpp
@@ -76,7 +76,6 @@ ValueNumStore::ValueNumStore(Compiler* comp, IAllocator* alloc)
, m_VNFunc2Map(nullptr)
, m_VNFunc3Map(nullptr)
, m_VNFunc4Map(nullptr)
- , m_uPtrToLocNotAFieldCount(1)
{
// We have no current allocation chunks.
for (unsigned i = 0; i < TYP_COUNT; i++)
@@ -604,6 +603,7 @@ ValueNumStore::Chunk::Chunk(
switch (attribs)
{
case CEA_None:
+ case CEA_NotAField:
break; // Nothing to do.
case CEA_Const:
switch (typ)
@@ -911,6 +911,7 @@ class Object* ValueNumStore::s_specialRefConsts[] = {nullptr, nullptr, nullptr};
ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func)
{
assert(VNFuncArity(func) == 0);
+ assert(func != VNF_NotAField);
ValueNum res;
@@ -1029,9 +1030,9 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V
{
if (typ != TYP_BYREF) // We don't want/need to optimize a zero byref
{
- genTreeOps oper = genTreeOps(func);
- ValueNum ZeroVN, OneVN; // We may need to create one of these in the switch below.
- switch (oper)
+ ValueNum resultVN = NoVN;
+ ValueNum ZeroVN, OneVN; // We may need to create one of these in the switch below.
+ switch (genTreeOps(func))
{
case GT_ADD:
// This identity does not apply for floating point (when x == -0.0)
@@ -1041,11 +1042,11 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V
ZeroVN = VNZeroForType(typ);
if (arg0VN == ZeroVN)
{
- return arg1VN;
+ resultVN = arg1VN;
}
else if (arg1VN == ZeroVN)
{
- return arg0VN;
+ resultVN = arg0VN;
}
}
break;
@@ -1055,7 +1056,7 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V
ZeroVN = VNZeroForType(typ);
if (arg1VN == ZeroVN)
{
- return arg0VN;
+ resultVN = arg0VN;
}
break;
@@ -1066,11 +1067,11 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V
{
if (arg0VN == OneVN)
{
- return arg1VN;
+ resultVN = arg1VN;
}
else if (arg1VN == OneVN)
{
- return arg0VN;
+ resultVN = arg0VN;
}
}
@@ -1080,11 +1081,11 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V
ZeroVN = VNZeroForType(typ);
if (arg0VN == ZeroVN)
{
- return ZeroVN;
+ resultVN = ZeroVN;
}
else if (arg1VN == ZeroVN)
{
- return ZeroVN;
+ resultVN = ZeroVN;
}
}
break;
@@ -1097,7 +1098,7 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V
{
if (arg1VN == OneVN)
{
- return arg0VN;
+ resultVN = arg0VN;
}
}
break;
@@ -1109,11 +1110,11 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V
ZeroVN = VNZeroForType(typ);
if (arg0VN == ZeroVN)
{
- return arg1VN;
+ resultVN = arg1VN;
}
else if (arg1VN == ZeroVN)
{
- return arg0VN;
+ resultVN = arg0VN;
}
break;
@@ -1122,11 +1123,11 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V
ZeroVN = VNZeroForType(typ);
if (arg0VN == ZeroVN)
{
- return ZeroVN;
+ resultVN = ZeroVN;
}
else if (arg1VN == ZeroVN)
{
- return ZeroVN;
+ resultVN = ZeroVN;
}
break;
@@ -1142,7 +1143,7 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V
ZeroVN = VNZeroForType(typ);
if (arg1VN == ZeroVN)
{
- return arg0VN;
+ resultVN = arg0VN;
}
break;
@@ -1150,30 +1151,35 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V
// (x == x) => true (unless x is NaN)
if (!varTypeIsFloating(TypeOfVN(arg0VN)) && (arg0VN != NoVN) && (arg0VN == arg1VN))
{
- return VNOneForType(typ);
+ resultVN = VNOneForType(typ);
}
if ((arg0VN == VNForNull() && IsKnownNonNull(arg1VN)) ||
(arg1VN == VNForNull() && IsKnownNonNull(arg0VN)))
{
- return VNZeroForType(typ);
+ resultVN = VNZeroForType(typ);
}
break;
case GT_NE:
// (x != x) => false (unless x is NaN)
if (!varTypeIsFloating(TypeOfVN(arg0VN)) && (arg0VN != NoVN) && (arg0VN == arg1VN))
{
- return VNZeroForType(typ);
+ resultVN = VNZeroForType(typ);
}
if ((arg0VN == VNForNull() && IsKnownNonNull(arg1VN)) ||
(arg1VN == VNForNull() && IsKnownNonNull(arg0VN)))
{
- return VNOneForType(typ);
+ resultVN = VNOneForType(typ);
}
break;
default:
break;
}
+
+ if ((resultVN != NoVN) && (TypeOfVN(resultVN) == typ))
+ {
+ return resultVN;
+ }
}
}
else // must be a VNF_ function
@@ -2072,10 +2078,11 @@ bool ValueNumStore::CanEvalForConstantArgs(VNFunc vnf)
case GT_MKREFANY: // We can't evaluate these.
case GT_RETFILT:
case GT_LIST:
+ case GT_FIELD_LIST:
case GT_ARR_LENGTH:
return false;
case GT_MULHI:
- // should be rare, not worth the complexity and risk of getting it wrong
+ assert(false && "Unexpected GT_MULHI node encountered before lowering");
return false;
default:
return true;
@@ -2545,6 +2552,11 @@ ValueNumPair ValueNumStore::VNPairApplySelectors(ValueNumPair map, FieldSeqNode*
return ValueNumPair(liberalVN, conservVN);
}
+bool ValueNumStore::IsVNNotAField(ValueNum vn)
+{
+ return m_chunks.GetNoExpand(GetChunkNum(vn))->m_attribs == CEA_NotAField;
+}
+
ValueNum ValueNumStore::VNForFieldSeq(FieldSeqNode* fieldSeq)
{
if (fieldSeq == nullptr)
@@ -2553,7 +2565,11 @@ ValueNum ValueNumStore::VNForFieldSeq(FieldSeqNode* fieldSeq)
}
else if (fieldSeq == FieldSeqStore::NotAField())
{
- return VNForNotAField();
+ // We always allocate a new, unique VN in this call.
+ Chunk* c = GetAllocChunk(TYP_REF, CEA_NotAField);
+ unsigned offsetWithinChunk = c->AllocVN();
+ ValueNum result = c->m_baseVN + offsetWithinChunk;
+ return result;
}
else
{
@@ -2585,22 +2601,22 @@ FieldSeqNode* ValueNumStore::FieldSeqVNToFieldSeq(ValueNum vn)
{
return nullptr;
}
- else if (vn == VNForNotAField())
+
+ assert(IsVNFunc(vn));
+
+ VNFuncApp funcApp;
+ GetVNFunc(vn, &funcApp);
+ if (funcApp.m_func == VNF_NotAField)
{
return FieldSeqStore::NotAField();
}
- else
- {
- assert(IsVNFunc(vn));
- VNFuncApp funcApp;
- GetVNFunc(vn, &funcApp);
- assert(funcApp.m_func == VNF_FieldSeq);
- ssize_t fieldHndVal = ConstantValue<ssize_t>(funcApp.m_args[0]);
- FieldSeqNode* head =
- m_pComp->GetFieldSeqStore()->CreateSingleton(reinterpret_cast<CORINFO_FIELD_HANDLE>(fieldHndVal));
- FieldSeqNode* tail = FieldSeqVNToFieldSeq(funcApp.m_args[1]);
- return m_pComp->GetFieldSeqStore()->Append(head, tail);
- }
+
+ assert(funcApp.m_func == VNF_FieldSeq);
+ const ssize_t fieldHndVal = ConstantValue<ssize_t>(funcApp.m_args[0]);
+ FieldSeqNode* head =
+ m_pComp->GetFieldSeqStore()->CreateSingleton(reinterpret_cast<CORINFO_FIELD_HANDLE>(fieldHndVal));
+ FieldSeqNode* tail = FieldSeqVNToFieldSeq(funcApp.m_args[1]);
+ return m_pComp->GetFieldSeqStore()->Append(head, tail);
}
ValueNum ValueNumStore::FieldSeqVNAppend(ValueNum fsVN1, ValueNum fsVN2)
@@ -2609,40 +2625,31 @@ ValueNum ValueNumStore::FieldSeqVNAppend(ValueNum fsVN1, ValueNum fsVN2)
{
return fsVN2;
}
- else if (fsVN1 == VNForNotAField() || fsVN2 == VNForNotAField())
- {
- return VNForNotAField();
- }
- else
- {
- assert(IsVNFunc(fsVN1));
- VNFuncApp funcApp1;
- GetVNFunc(fsVN1, &funcApp1);
- assert(funcApp1.m_func == VNF_FieldSeq);
- ValueNum tailRes = FieldSeqVNAppend(funcApp1.m_args[1], fsVN2);
- ValueNum fieldSeqVN = VNForFunc(TYP_REF, VNF_FieldSeq, funcApp1.m_args[0], tailRes);
-#ifdef DEBUG
- if (m_pComp->verbose)
- {
- printf(" fieldSeq " STR_VN "%x is ", fieldSeqVN);
- vnDump(m_pComp, fieldSeqVN);
- printf("\n");
- }
-#endif
+ assert(IsVNFunc(fsVN1));
- return fieldSeqVN;
+ VNFuncApp funcApp1;
+ GetVNFunc(fsVN1, &funcApp1);
+
+ if ((funcApp1.m_func == VNF_NotAField) || IsVNNotAField(fsVN2))
+ {
+ return VNForFieldSeq(FieldSeqStore::NotAField());
}
-}
-ValueNum ValueNumStore::VNForPtrToLoc(var_types typ, ValueNum lclVarVN, ValueNum fieldSeqVN)
-{
- if (fieldSeqVN == VNForNotAField())
+ assert(funcApp1.m_func == VNF_FieldSeq);
+ ValueNum tailRes = FieldSeqVNAppend(funcApp1.m_args[1], fsVN2);
+ ValueNum fieldSeqVN = VNForFunc(TYP_REF, VNF_FieldSeq, funcApp1.m_args[0], tailRes);
+
+#ifdef DEBUG
+ if (m_pComp->verbose)
{
- // To distinguish two different not a fields, append a unique value.
- return VNForFunc(typ, VNF_PtrToLoc, lclVarVN, fieldSeqVN, VNForIntCon(++m_uPtrToLocNotAFieldCount));
+ printf(" fieldSeq " STR_VN "%x is ", fieldSeqVN);
+ vnDump(m_pComp, fieldSeqVN);
+ printf("\n");
}
- return VNForFunc(typ, VNF_PtrToLoc, lclVarVN, fieldSeqVN, VNForIntCon(0));
+#endif
+
+ return fieldSeqVN;
}
ValueNum ValueNumStore::ExtendPtrVN(GenTreePtr opA, GenTreePtr opB)
@@ -2650,7 +2657,7 @@ ValueNum ValueNumStore::ExtendPtrVN(GenTreePtr opA, GenTreePtr opB)
if (opB->OperGet() == GT_CNS_INT)
{
FieldSeqNode* fldSeq = opB->gtIntCon.gtFieldSeq;
- if ((fldSeq != nullptr) && (fldSeq != FieldSeqStore::NotAField()))
+ if (fldSeq != nullptr)
{
return ExtendPtrVN(opA, opB->gtIntCon.gtFieldSeq);
}
@@ -2660,8 +2667,9 @@ ValueNum ValueNumStore::ExtendPtrVN(GenTreePtr opA, GenTreePtr opB)
ValueNum ValueNumStore::ExtendPtrVN(GenTreePtr opA, FieldSeqNode* fldSeq)
{
+ assert(fldSeq != nullptr);
+
ValueNum res = NoVN;
- assert(fldSeq != FieldSeqStore::NotAField());
ValueNum opAvnWx = opA->gtVNPair.GetLiberal();
assert(VNIsValid(opAvnWx));
@@ -2684,7 +2692,7 @@ ValueNum ValueNumStore::ExtendPtrVN(GenTreePtr opA, FieldSeqNode* fldSeq)
assert(GetVNFunc(VNNormVal(opA->GetVN(VNK_Conservative)), &consFuncApp) && consFuncApp.Equals(funcApp));
#endif
ValueNum fldSeqVN = VNForFieldSeq(fldSeq);
- res = VNForPtrToLoc(TYP_BYREF, funcApp.m_args[0], FieldSeqVNAppend(funcApp.m_args[1], fldSeqVN));
+ res = VNForFunc(TYP_BYREF, VNF_PtrToLoc, funcApp.m_args[0], FieldSeqVNAppend(funcApp.m_args[1], fldSeqVN));
}
else if (funcApp.m_func == VNF_PtrToStatic)
{
@@ -2917,6 +2925,11 @@ ValueNum Compiler::fgValueNumberArrIndexVal(GenTreePtr tree,
var_types ValueNumStore::TypeOfVN(ValueNum vn)
{
+ if (vn == NoVN)
+ {
+ return TYP_UNDEF;
+ }
+
Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
return c->m_typ;
}
@@ -2936,6 +2949,11 @@ var_types ValueNumStore::TypeOfVN(ValueNum vn)
BasicBlock::loopNumber ValueNumStore::LoopOfVN(ValueNum vn)
{
+ if (vn == NoVN)
+ {
+ return MAX_LOOP_NUM;
+ }
+
Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
return c->m_loopNum;
}
@@ -3388,6 +3406,7 @@ bool ValueNumStore::IsVNFunc(ValueNum vn)
Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
switch (c->m_attribs)
{
+ case CEA_NotAField:
case CEA_Func0:
case CEA_Func1:
case CEA_Func2:
@@ -3401,6 +3420,11 @@ bool ValueNumStore::IsVNFunc(ValueNum vn)
bool ValueNumStore::GetVNFunc(ValueNum vn, VNFuncApp* funcApp)
{
+ if (vn == NoVN)
+ {
+ return false;
+ }
+
Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
unsigned offset = ChunkOffset(vn);
assert(offset < c->m_numUsed);
@@ -3415,8 +3439,8 @@ bool ValueNumStore::GetVNFunc(ValueNum vn, VNFuncApp* funcApp)
funcApp->m_args[1] = farg4->m_arg1;
funcApp->m_args[2] = farg4->m_arg2;
funcApp->m_args[3] = farg4->m_arg3;
- }
return true;
+ }
case CEA_Func3:
{
VNDefFunc3Arg* farg3 = &reinterpret_cast<VNDefFunc3Arg*>(c->m_defs)[offset];
@@ -3425,8 +3449,8 @@ bool ValueNumStore::GetVNFunc(ValueNum vn, VNFuncApp* funcApp)
funcApp->m_args[0] = farg3->m_arg0;
funcApp->m_args[1] = farg3->m_arg1;
funcApp->m_args[2] = farg3->m_arg2;
- }
return true;
+ }
case CEA_Func2:
{
VNDefFunc2Arg* farg2 = &reinterpret_cast<VNDefFunc2Arg*>(c->m_defs)[offset];
@@ -3434,23 +3458,29 @@ bool ValueNumStore::GetVNFunc(ValueNum vn, VNFuncApp* funcApp)
funcApp->m_arity = 2;
funcApp->m_args[0] = farg2->m_arg0;
funcApp->m_args[1] = farg2->m_arg1;
- }
return true;
+ }
case CEA_Func1:
{
VNDefFunc1Arg* farg1 = &reinterpret_cast<VNDefFunc1Arg*>(c->m_defs)[offset];
funcApp->m_func = farg1->m_func;
funcApp->m_arity = 1;
funcApp->m_args[0] = farg1->m_arg0;
- }
return true;
+ }
case CEA_Func0:
{
VNDefFunc0Arg* farg0 = &reinterpret_cast<VNDefFunc0Arg*>(c->m_defs)[offset];
funcApp->m_func = farg0->m_func;
funcApp->m_arity = 0;
+ return true;
}
+ case CEA_NotAField:
+ {
+ funcApp->m_func = VNF_NotAField;
+ funcApp->m_arity = 0;
return true;
+ }
default:
return false;
}
@@ -3751,8 +3781,9 @@ static genTreeOps genTreeOpsIllegalAsVNFunc[] = {GT_IND, // When we do heap memo
// These need special semantics:
GT_COMMA, // == second argument (but with exception(s) from first).
GT_ADDR, GT_ARR_BOUNDS_CHECK,
- GT_OBJ, // May reference heap memory.
- GT_BLK, // May reference heap memory.
+ GT_OBJ, // May reference heap memory.
+ GT_BLK, // May reference heap memory.
+ GT_INIT_VAL, // Not strictly a pass-through.
// These control-flow operations need no values.
GT_JTRUE, GT_RETURN, GT_SWITCH, GT_RETFILT, GT_CKFINITE};
@@ -3842,10 +3873,9 @@ static const char* s_reservedNameArr[] = {
"$VN.No", // -1 NoVN
"$VN.Null", // 0 VNForNull()
"$VN.ZeroMap", // 1 VNForZeroMap()
- "$VN.NotAField", // 2 VNForNotAField()
- "$VN.ReadOnlyHeap", // 3 VNForROH()
- "$VN.Void", // 4 VNForVoid()
- "$VN.EmptyExcSet" // 5 VNForEmptyExcSet()
+ "$VN.ReadOnlyHeap", // 2 VNForROH()
+ "$VN.Void", // 3 VNForVoid()
+ "$VN.EmptyExcSet" // 4 VNForEmptyExcSet()
};
// Returns the string name of "vn" when it is a reserved value number, nullptr otherwise
@@ -4804,8 +4834,16 @@ void Compiler::fgValueNumberTreeConst(GenTreePtr tree)
tree->gtVNPair.SetBoth(vnStore->VNForDoubleCon(tree->gtDblCon.gtDconVal));
break;
case TYP_REF:
- // Null is the only constant. (Except maybe for String?)
- tree->gtVNPair.SetBoth(ValueNumStore::VNForNull());
+ if (tree->gtIntConCommon.IconValue() == 0)
+ {
+ tree->gtVNPair.SetBoth(ValueNumStore::VNForNull());
+ }
+ else
+ {
+ assert(tree->gtFlags == GTF_ICON_STR_HDL); // Constant object can be only frozen string.
+ tree->gtVNPair.SetBoth(
+ vnStore->VNForHandle(ssize_t(tree->gtIntConCommon.IconValue()), tree->GetIconHandleFlag()));
+ }
break;
case TYP_BYREF:
@@ -4903,9 +4941,6 @@ void Compiler::fgValueNumberBlockAssignment(GenTreePtr tree, bool evalAsgLhsInd)
}
#endif // DEBUG
}
- // Initblock's are of type void. Give them the void "value" -- they may occur in argument lists, which we
- // want to be able to give VN's to.
- tree->gtVNPair.SetBoth(ValueNumStore::VNForVoid());
}
else
{
@@ -4913,6 +4948,9 @@ void Compiler::fgValueNumberBlockAssignment(GenTreePtr tree, bool evalAsgLhsInd)
// TODO-CQ: Why not be complete, and get this case right?
fgMutateHeap(tree DEBUGARG("INITBLK - non local"));
}
+ // Initblock's are of type void. Give them the void "value" -- they may occur in argument lists, which we
+ // want to be able to give VN's to.
+ tree->gtVNPair.SetBoth(ValueNumStore::VNForVoid());
}
else
{
@@ -4953,17 +4991,21 @@ void Compiler::fgValueNumberBlockAssignment(GenTreePtr tree, bool evalAsgLhsInd)
assert(lhs->OperGet() == GT_IND);
lhsAddr = lhs->gtOp.gtOp1;
}
+
// For addr-of-local expressions, lib/cons shouldn't matter.
assert(lhsAddr->gtVNPair.BothEqual());
ValueNum lhsAddrVN = lhsAddr->GetVN(VNK_Liberal);
// Unpack the PtrToLoc value number of the address.
assert(vnStore->IsVNFunc(lhsAddrVN));
+
VNFuncApp lhsAddrFuncApp;
vnStore->GetVNFunc(lhsAddrVN, &lhsAddrFuncApp);
+
assert(lhsAddrFuncApp.m_func == VNF_PtrToLoc);
assert(vnStore->IsVNConstant(lhsAddrFuncApp.m_args[0]) &&
vnStore->ConstantValue<unsigned>(lhsAddrFuncApp.m_args[0]) == lhsLclNum);
+
lhsFldSeq = vnStore->FieldSeqVNToFieldSeq(lhsAddrFuncApp.m_args[1]);
}
@@ -5598,10 +5640,9 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
// (we looked in a side table above for its "def" identity). Look up that value.
ValueNumPair oldLhsVNPair =
lvaTable[lclFld->GetLclNum()].GetPerSsaData(lclFld->GetSsaNum())->m_vnPair;
- newLhsVNPair =
- vnStore->VNPairApplySelectorsAssign(oldLhsVNPair, lclFld->gtFieldSeq,
- rhsVNPair, // Pre-value.
- lvaGetActualType(lclFld->gtLclNum), compCurBB);
+ newLhsVNPair = vnStore->VNPairApplySelectorsAssign(oldLhsVNPair, lclFld->gtFieldSeq,
+ rhsVNPair, // Pre-value.
+ lclFld->TypeGet(), compCurBB);
}
}
lvaTable[lclFld->GetLclNum()].GetPerSsaData(lclDefSsaNum)->m_vnPair = newLhsVNPair;
@@ -6034,8 +6075,9 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
if (newVN == ValueNumStore::NoVN)
{
assert(arg->gtLclVarCommon.GetSsaNum() != ValueNumStore::NoVN);
- newVN = vnStore->VNForPtrToLoc(TYP_BYREF, vnStore->VNForIntCon(arg->gtLclVarCommon.GetLclNum()),
- vnStore->VNForFieldSeq(fieldSeq));
+ newVN = vnStore->VNForFunc(TYP_BYREF, VNF_PtrToLoc,
+ vnStore->VNForIntCon(arg->gtLclVarCommon.GetLclNum()),
+ vnStore->VNForFieldSeq(fieldSeq));
}
tree->gtVNPair.SetBoth(newVN);
}
@@ -6240,17 +6282,12 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
}
tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, addrXvnp);
}
- else if (!varTypeIsStruct(tree) && vnStore->GetVNFunc(addrNvnp.GetLiberal(), &funcApp) &&
- (funcApp.m_func == VNF_PtrToArrElem))
+ else if (vnStore->GetVNFunc(addrNvnp.GetLiberal(), &funcApp) && (funcApp.m_func == VNF_PtrToArrElem))
{
- // TODO-1stClassStructs: The above condition need not exclude struct types, but it is
- // excluded for now to minimize diffs.
fgValueNumberArrIndexVal(tree, &funcApp, addrXvnp.GetLiberal());
}
- else if (!varTypeIsStruct(tree) && addr->IsFieldAddr(this, &obj, &staticOffset, &fldSeq2))
+ else if (addr->IsFieldAddr(this, &obj, &staticOffset, &fldSeq2))
{
- // TODO-1stClassStructs: The above condition need not exclude struct types, but it is
- // excluded for now to minimize diffs.
if (fldSeq2 == FieldSeqStore::NotAField())
{
tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
@@ -6522,6 +6559,9 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
case GT_JTRUE:
case GT_LIST:
+#ifndef LEGACY_BACKEND
+ case GT_FIELD_LIST:
+#endif // !LEGACY_BACKEND
// These nodes never need to have a ValueNumber
tree->gtVNPair.SetBoth(ValueNumStore::NoVN);
break;
@@ -6667,7 +6707,7 @@ void Compiler::fgValueNumberCastTree(GenTreePtr tree)
bool srcIsUnsigned = ((tree->gtFlags & GTF_UNSIGNED) != 0);
bool hasOverflowCheck = tree->gtOverflowEx();
- assert(genActualType(castToType) == tree->TypeGet()); // Insure that the resultType is correct
+ assert(genActualType(castToType) == genActualType(tree->TypeGet())); // Insure that the resultType is correct
tree->gtVNPair = vnStore->VNPairForCast(srcVNPair, castToType, castFromType, srcIsUnsigned, hasOverflowCheck);
}
@@ -6816,6 +6856,7 @@ void Compiler::fgValueNumberHelperCallFunc(GenTreeCall* call, VNFunc vnf, ValueN
break;
case VNF_ReadyToRunStaticBase:
+ case VNF_ReadyToRunGenericStaticBase:
case VNF_ReadyToRunIsInstanceOf:
case VNF_ReadyToRunCastClass:
{
@@ -7061,11 +7102,11 @@ VNFunc Compiler::fgValueNumberHelperMethVNFunc(CorInfoHelpFunc helpFunc)
vnf = VNFunc(GT_MOD);
break;
case CORINFO_HELP_ULDIV:
- vnf = VNFunc(GT_DIV);
- break; // Is this the right thing?
+ vnf = VNFunc(GT_UDIV);
+ break;
case CORINFO_HELP_ULMOD:
- vnf = VNFunc(GT_MOD);
- break; // Is this the right thing?
+ vnf = VNFunc(GT_UMOD);
+ break;
case CORINFO_HELP_LNG2DBL:
vnf = VNF_Lng2Dbl;
@@ -7155,6 +7196,11 @@ VNFunc Compiler::fgValueNumberHelperMethVNFunc(CorInfoHelpFunc helpFunc)
case CORINFO_HELP_READYTORUN_STATIC_BASE:
vnf = VNF_ReadyToRunStaticBase;
break;
+#if COR_JIT_EE_VERSION > 460
+ case CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE:
+ vnf = VNF_ReadyToRunGenericStaticBase;
+ break;
+#endif // COR_JIT_EE_VERSION > 460
case CORINFO_HELP_GETSHARED_GCSTATIC_BASE_DYNAMICCLASS:
vnf = VNF_GetsharedGcstaticBaseDynamicclass;
break;
diff --git a/src/jit/valuenum.h b/src/jit/valuenum.h
index 17dacfbb54..c8a57ff210 100644
--- a/src/jit/valuenum.h
+++ b/src/jit/valuenum.h
@@ -297,13 +297,6 @@ public:
return ValueNum(SRC_ZeroMap);
}
- // The value number for the special "NotAField" field sequence.
- static ValueNum VNForNotAField()
- {
- // We reserve Chunk 0 for "special" VNs. Let SRC_NotAField (== 2) be the "not a field seq".
- return ValueNum(SRC_NotAField);
- }
-
// The ROH map is the map for the "read-only heap". We assume that this is never mutated, and always
// has the same value number.
static ValueNum VNForROH()
@@ -450,7 +443,7 @@ public:
// Get a new, unique value number for an expression that we're not equating to some function,
// which is the value of a tree in the given block.
- ValueNum VNForExpr(BasicBlock *block, var_types typ = TYP_UNKNOWN);
+ ValueNum VNForExpr(BasicBlock* block, var_types typ = TYP_UNKNOWN);
// This controls extra tracing of the "evaluation" of "VNF_MapSelect" functions.
#define FEATURE_VN_TRACE_APPLY_SELECTORS 1
@@ -485,13 +478,11 @@ public:
ValueNumPair VNPairApplySelectors(ValueNumPair map, FieldSeqNode* fieldSeq, var_types indType);
- ValueNumPair VNPairApplySelectorsAssign(ValueNumPair map,
- FieldSeqNode* fieldSeq,
- ValueNumPair rhs,
- var_types indType,
- BasicBlock* block)
+ ValueNumPair VNPairApplySelectorsAssign(
+ ValueNumPair map, FieldSeqNode* fieldSeq, ValueNumPair rhs, var_types indType, BasicBlock* block)
{
- return ValueNumPair(VNApplySelectorsAssign(VNK_Liberal, map.GetLiberal(), fieldSeq, rhs.GetLiberal(), indType, block),
+ return ValueNumPair(VNApplySelectorsAssign(VNK_Liberal, map.GetLiberal(), fieldSeq, rhs.GetLiberal(), indType,
+ block),
VNApplySelectorsAssign(VNK_Conservative, map.GetConservative(), fieldSeq,
rhs.GetConservative(), indType, block));
}
@@ -506,6 +497,9 @@ public:
bool srcIsUnsigned = false,
bool hasOverflowCheck = false);
+ // Returns true iff the VN represents an application of VNF_NotAField.
+ bool IsVNNotAField(ValueNum vn);
+
// PtrToLoc values need to express a field sequence as one of their arguments. VN for null represents
// empty sequence, otherwise, "FieldSeq(VN(FieldHandle), restOfSeq)".
ValueNum VNForFieldSeq(FieldSeqNode* fieldSeq);
@@ -518,12 +512,6 @@ public:
// concatenation "fsVN1 || fsVN2".
ValueNum FieldSeqVNAppend(ValueNum fsVN1, ValueNum fsVN2);
- // Requires "lclVarVN" be a value number for a GT_LCL_VAR pointer tree.
- // Requires "fieldSeqVN" be a field sequence value number.
- // Requires "typ" to be a TYP_REF/TYP_BYREF used for VNF_PtrToLoc.
- // When "fieldSeqVN" is VNForNotAField, a unique VN is generated using m_uPtrToLocNotAFieldCount.
- ValueNum VNForPtrToLoc(var_types typ, ValueNum lclVarVN, ValueNum fieldSeqVN);
-
// If "opA" has a PtrToLoc, PtrToArrElem, or PtrToStatic application as its value numbers, and "opB" is an integer
// with a "fieldSeq", returns the VN for the pointer form extended with the field sequence; or else NoVN.
ValueNum ExtendPtrVN(GenTreePtr opA, GenTreePtr opB);
@@ -853,14 +841,15 @@ private:
DECLARE_TYPED_ENUM(ChunkExtraAttribs, BYTE)
{
- CEA_None, // No extra attributes.
- CEA_Const, // This chunk contains constant values.
- CEA_Handle, // This chunk contains handle constants.
- CEA_Func0, // Represents functions of arity 0.
- CEA_Func1, // ...arity 1.
- CEA_Func2, // ...arity 2.
- CEA_Func3, // ...arity 3.
- CEA_Func4, // ...arity 4.
+ CEA_None, // No extra attributes.
+ CEA_Const, // This chunk contains constant values.
+ CEA_Handle, // This chunk contains handle constants.
+ CEA_NotAField, // This chunk contains "not a field" values.
+ CEA_Func0, // Represents functions of arity 0.
+ CEA_Func1, // ...arity 1.
+ CEA_Func2, // ...arity 2.
+ CEA_Func3, // ...arity 3.
+ CEA_Func4, // ...arity 4.
CEA_Count
}
END_DECLARE_TYPED_ENUM(ChunkExtraAttribs, BYTE);
@@ -883,9 +872,14 @@ private:
ChunkExtraAttribs m_attribs;
BasicBlock::loopNumber m_loopNum;
- // Initialize a chunk, starting at "*baseVN", for the given "typ", "attribs", and "loopNum" (using "alloc" for allocations).
+ // Initialize a chunk, starting at "*baseVN", for the given "typ", "attribs", and "loopNum" (using "alloc" for
+ // allocations).
// (Increments "*baseVN" by ChunkSize.)
- Chunk(IAllocator* alloc, ValueNum* baseVN, var_types typ, ChunkExtraAttribs attribs, BasicBlock::loopNumber loopNum);
+ Chunk(IAllocator* alloc,
+ ValueNum* baseVN,
+ var_types typ,
+ ChunkExtraAttribs attribs,
+ BasicBlock::loopNumber loopNum);
// Requires that "m_numUsed < ChunkSize." Returns the offset of the allocated VN within the chunk; the
// actual VN is this added to the "m_baseVN" of the chunk.
@@ -1257,7 +1251,6 @@ private:
{
SRC_Null,
SRC_ZeroMap,
- SRC_NotAField,
SRC_ReadOnlyHeap,
SRC_Void,
SRC_EmptyExcSet,
@@ -1265,10 +1258,6 @@ private:
SRC_NumSpecialRefConsts
};
- // Counter to keep track of all the unique not a field sequences that have been assigned to
- // PtrToLoc, because the ptr was added to an offset that was not a field.
- unsigned m_uPtrToLocNotAFieldCount;
-
// The "values" of special ref consts will be all be "null" -- their differing meanings will
// be carried by the distinct value numbers.
static class Object* s_specialRefConsts[SRC_NumSpecialRefConsts];
diff --git a/src/jit/valuenumfuncs.h b/src/jit/valuenumfuncs.h
index 064a33707b..eb17aedf28 100644
--- a/src/jit/valuenumfuncs.h
+++ b/src/jit/valuenumfuncs.h
@@ -11,9 +11,10 @@ ValueNumFuncDef(MapStore, 3, false, false, false)
ValueNumFuncDef(MapSelect, 2, false, false, false)
ValueNumFuncDef(FieldSeq, 2, false, false, false) // Sequence (VN of null == empty) of (VN's of) field handles.
+ValueNumFuncDef(NotAField, 0, false, false, false) // Value number function for FieldSeqStore::NotAField.
ValueNumFuncDef(ZeroMap, 0, false, false, false) // The "ZeroMap": indexing at any index yields "zero of the desired type".
-ValueNumFuncDef(PtrToLoc, 3, false, false, false) // Pointer (byref) to a local variable. Args: VN's of: 0: var num, 1: FieldSeq, 2: Unique value for this PtrToLoc.
+ValueNumFuncDef(PtrToLoc, 2, false, false, false) // Pointer (byref) to a local variable. Args: VN's of: 0: var num, 1: FieldSeq.
ValueNumFuncDef(PtrToArrElem, 4, false, false, false) // Pointer (byref) to an array element. Args: 0: array elem type eq class var_types value, VN's of: 1: array, 2: index, 3: FieldSeq.
ValueNumFuncDef(PtrToStatic, 1, false, false, false) // Pointer (byref) to a static variable (or possibly a field thereof, if the static variable is a struct). Args: 0: FieldSeq, first element
// of which is the static var.
@@ -99,6 +100,7 @@ ValueNumFuncDef(GetsharedNongcstaticBase, 2, false, true, true)
ValueNumFuncDef(GetsharedGcstaticBaseNoctor, 1, false, true, true)
ValueNumFuncDef(GetsharedNongcstaticBaseNoctor, 1, false, true, true)
ValueNumFuncDef(ReadyToRunStaticBase, 1, false, true, true)
+ValueNumFuncDef(ReadyToRunGenericStaticBase, 2, false, true, true)
ValueNumFuncDef(GetsharedGcstaticBaseDynamicclass, 2, false, true, true)
ValueNumFuncDef(GetsharedNongcstaticBaseDynamicclass, 2, false, true, true)
ValueNumFuncDef(GetgenericsGcthreadstaticBase, 1, false, true, true)