diff options
author | Jiyoung Yun <jy910.yun@samsung.com> | 2016-12-27 16:46:08 +0900 |
---|---|---|
committer | Jiyoung Yun <jy910.yun@samsung.com> | 2016-12-27 16:46:08 +0900 |
commit | db20f3f1bb8595633a7e16c8900fd401a453a6b5 (patch) | |
tree | e5435159cd1bf0519276363a6fe1663d1721bed3 /src/jit | |
parent | 4b4aad7217d3292650e77eec2cf4c198ea9c3b4b (diff) | |
download | coreclr-db20f3f1bb8595633a7e16c8900fd401a453a6b5.tar.gz coreclr-db20f3f1bb8595633a7e16c8900fd401a453a6b5.tar.bz2 coreclr-db20f3f1bb8595633a7e16c8900fd401a453a6b5.zip |
Imported Upstream version 1.0.0.9127upstream/1.0.0.9127
Diffstat (limited to 'src/jit')
102 files changed, 18309 insertions, 11988 deletions
diff --git a/src/jit/CMakeLists.txt b/src/jit/CMakeLists.txt index 6372e37852..96b8c496b9 100644 --- a/src/jit/CMakeLists.txt +++ b/src/jit/CMakeLists.txt @@ -7,9 +7,9 @@ include_directories("../inc") # Enable the following for UNIX altjit on Windows # add_definitions(-DALT_JIT) -if (CLR_CMAKE_TARGET_ARCH_AMD64) - add_definitions(-DFEATURE_SIMD) - add_definitions(-DFEATURE_AVX_SUPPORT) +if (CLR_CMAKE_TARGET_ARCH_AMD64 OR (CLR_CMAKE_TARGET_ARCH_I386 AND NOT CLR_CMAKE_PLATFORM_UNIX)) + add_definitions(-DFEATURE_SIMD) + add_definitions(-DFEATURE_AVX_SUPPORT) endif () @@ -23,6 +23,7 @@ set( JIT_SOURCES bitset.cpp block.cpp codegencommon.cpp + codegenlinear.cpp compiler.cpp copyprop.cpp disasm.cpp @@ -194,19 +195,17 @@ endif() add_custom_target(jit_exports DEPENDS ${JIT_EXPORTS_FILE}) -set(JIT_BASE_NAME clrjit) -if (CLR_BUILD_JIT32) - set(JIT_BASE_NAME ryujit) -endif() - -if(WIN32) - add_definitions(-DFX_VER_INTERNALNAME_STR=${JIT_BASE_NAME}.dll) -endif(WIN32) - add_subdirectory(dll) add_subdirectory(crossgen) add_subdirectory(standalone) -if (CLR_CMAKE_PLATFORM_ARCH_I386 OR CLR_CMAKE_PLATFORM_ARCH_ARM) +if (CLR_CMAKE_PLATFORM_ARCH_ARM) add_subdirectory(protojit) -endif (CLR_CMAKE_PLATFORM_ARCH_I386 OR CLR_CMAKE_PLATFORM_ARCH_ARM) +endif (CLR_CMAKE_PLATFORM_ARCH_ARM) + +if (CLR_CMAKE_PLATFORM_ARCH_I386) + add_subdirectory(legacyjit) + if (NOT CLR_BUILD_JIT32) + add_subdirectory(compatjit) + endif () +endif (CLR_CMAKE_PLATFORM_ARCH_I386) diff --git a/src/jit/ICorJitInfo_API_names.h b/src/jit/ICorJitInfo_API_names.h new file mode 100644 index 0000000000..601afbdfe1 --- /dev/null +++ b/src/jit/ICorJitInfo_API_names.h @@ -0,0 +1,171 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +DEF_CLR_API(getMethodAttribs) +DEF_CLR_API(setMethodAttribs) +DEF_CLR_API(getMethodSig) +DEF_CLR_API(getMethodInfo) +DEF_CLR_API(canInline) +DEF_CLR_API(reportInliningDecision) +DEF_CLR_API(canTailCall) +DEF_CLR_API(reportTailCallDecision) +DEF_CLR_API(getEHinfo) +DEF_CLR_API(getMethodClass) +DEF_CLR_API(getMethodModule) +DEF_CLR_API(getMethodVTableOffset) +DEF_CLR_API(getIntrinsicID) +DEF_CLR_API(isInSIMDModule) +DEF_CLR_API(getUnmanagedCallConv) +DEF_CLR_API(pInvokeMarshalingRequired) +DEF_CLR_API(satisfiesMethodConstraints) +DEF_CLR_API(isCompatibleDelegate) +DEF_CLR_API(isDelegateCreationAllowed) +DEF_CLR_API(isInstantiationOfVerifiedGeneric) +DEF_CLR_API(initConstraintsForVerification) +DEF_CLR_API(canSkipMethodVerification) +DEF_CLR_API(methodMustBeLoadedBeforeCodeIsRun) +DEF_CLR_API(mapMethodDeclToMethodImpl) +DEF_CLR_API(getGSCookie) +DEF_CLR_API(resolveToken) +DEF_CLR_API(tryResolveToken) +DEF_CLR_API(findSig) +DEF_CLR_API(findCallSiteSig) +DEF_CLR_API(getTokenTypeAsHandle) +DEF_CLR_API(canSkipVerification) +DEF_CLR_API(isValidToken) +DEF_CLR_API(isValidStringRef) +DEF_CLR_API(shouldEnforceCallvirtRestriction) +DEF_CLR_API(asCorInfoType) +DEF_CLR_API(getClassName) +DEF_CLR_API(appendClassName) +DEF_CLR_API(isValueClass) +DEF_CLR_API(canInlineTypeCheckWithObjectVTable) +DEF_CLR_API(getClassAttribs) +DEF_CLR_API(isStructRequiringStackAllocRetBuf) +DEF_CLR_API(getClassModule) +DEF_CLR_API(getModuleAssembly) +DEF_CLR_API(getAssemblyName) +DEF_CLR_API(LongLifetimeMalloc) +DEF_CLR_API(LongLifetimeFree) +DEF_CLR_API(getClassModuleIdForStatics) +DEF_CLR_API(getClassSize) +DEF_CLR_API(getClassAlignmentRequirement) +DEF_CLR_API(getClassGClayout) +DEF_CLR_API(getClassNumInstanceFields) +DEF_CLR_API(getFieldInClass) +DEF_CLR_API(checkMethodModifier) +DEF_CLR_API(getNewHelper) +DEF_CLR_API(getNewArrHelper) +DEF_CLR_API(getCastingHelper) +DEF_CLR_API(getSharedCCtorHelper) +DEF_CLR_API(getSecurityPrologHelper) +DEF_CLR_API(getTypeForBox) +DEF_CLR_API(getBoxHelper) +DEF_CLR_API(getUnBoxHelper) +DEF_CLR_API(getReadyToRunHelper) +DEF_CLR_API(getReadyToRunDelegateCtorHelper) +DEF_CLR_API(getHelperName) +DEF_CLR_API(initClass) +DEF_CLR_API(classMustBeLoadedBeforeCodeIsRun) +DEF_CLR_API(getBuiltinClass) +DEF_CLR_API(getTypeForPrimitiveValueClass) +DEF_CLR_API(canCast) +DEF_CLR_API(areTypesEquivalent) +DEF_CLR_API(mergeClasses) +DEF_CLR_API(getParentType) +DEF_CLR_API(getChildType) +DEF_CLR_API(satisfiesClassConstraints) +DEF_CLR_API(isSDArray) +DEF_CLR_API(getArrayRank) +DEF_CLR_API(getArrayInitializationData) +DEF_CLR_API(canAccessClass) +DEF_CLR_API(getFieldName) +DEF_CLR_API(getFieldClass) +DEF_CLR_API(getFieldType) +DEF_CLR_API(getFieldOffset) +DEF_CLR_API(isWriteBarrierHelperRequired) +DEF_CLR_API(getFieldInfo) +DEF_CLR_API(isFieldStatic) +DEF_CLR_API(getBoundaries) +DEF_CLR_API(setBoundaries) +DEF_CLR_API(getVars) +DEF_CLR_API(setVars) +DEF_CLR_API(allocateArray) +DEF_CLR_API(freeArray) +DEF_CLR_API(getArgNext) +DEF_CLR_API(getArgType) +DEF_CLR_API(getArgClass) +DEF_CLR_API(getHFAType) +DEF_CLR_API(GetErrorHRESULT) +DEF_CLR_API(GetErrorMessage) +DEF_CLR_API(FilterException) +DEF_CLR_API(HandleException) +DEF_CLR_API(ThrowExceptionForJitResult) +DEF_CLR_API(ThrowExceptionForHelper) +DEF_CLR_API(getEEInfo) +DEF_CLR_API(getJitTimeLogFilename) +DEF_CLR_API(getMethodDefFromMethod) +DEF_CLR_API(getMethodName) +DEF_CLR_API(getMethodHash) +DEF_CLR_API(findNameOfToken) +DEF_CLR_API(getSystemVAmd64PassStructInRegisterDescriptor) +DEF_CLR_API(getThreadTLSIndex) +DEF_CLR_API(getInlinedCallFrameVptr) +DEF_CLR_API(getAddrOfCaptureThreadGlobal) +DEF_CLR_API(getAddrModuleDomainID) +DEF_CLR_API(getHelperFtn) +DEF_CLR_API(getFunctionEntryPoint) +DEF_CLR_API(getFunctionFixedEntryPoint) +DEF_CLR_API(getMethodSync) +DEF_CLR_API(getLazyStringLiteralHelper) +DEF_CLR_API(embedModuleHandle) +DEF_CLR_API(embedClassHandle) +DEF_CLR_API(embedMethodHandle) +DEF_CLR_API(embedFieldHandle) +DEF_CLR_API(embedGenericHandle) +DEF_CLR_API(getLocationOfThisType) +DEF_CLR_API(getPInvokeUnmanagedTarget) +DEF_CLR_API(getAddressOfPInvokeFixup) +DEF_CLR_API(getAddressOfPInvokeTarget) +DEF_CLR_API(GetCookieForPInvokeCalliSig) +DEF_CLR_API(canGetCookieForPInvokeCalliSig) +DEF_CLR_API(getJustMyCodeHandle) +DEF_CLR_API(GetProfilingHandle) +DEF_CLR_API(getCallInfo) +DEF_CLR_API(canAccessFamily) +DEF_CLR_API(isRIDClassDomainID) +DEF_CLR_API(getClassDomainID) +DEF_CLR_API(getFieldAddress) +DEF_CLR_API(getVarArgsHandle) +DEF_CLR_API(canGetVarArgsHandle) +DEF_CLR_API(constructStringLiteral) +DEF_CLR_API(emptyStringLiteral) +DEF_CLR_API(getFieldThreadLocalStoreID) +DEF_CLR_API(setOverride) +DEF_CLR_API(addActiveDependency) +DEF_CLR_API(GetDelegateCtor) +DEF_CLR_API(MethodCompileComplete) +DEF_CLR_API(getTailCallCopyArgsThunk) +DEF_CLR_API(getJitFlags) +DEF_CLR_API(runWithErrorTrap) +DEF_CLR_API(getMemoryManager) +DEF_CLR_API(allocMem) +DEF_CLR_API(reserveUnwindInfo) +DEF_CLR_API(allocUnwindInfo) +DEF_CLR_API(allocGCInfo) +DEF_CLR_API(yieldExecution) +DEF_CLR_API(setEHcount) +DEF_CLR_API(setEHinfo) +DEF_CLR_API(logMsg) +DEF_CLR_API(doAssert) +DEF_CLR_API(reportFatalError) +DEF_CLR_API(allocBBProfileBuffer) +DEF_CLR_API(getBBProfileData) +DEF_CLR_API(recordCallSite) +DEF_CLR_API(recordRelocation) +DEF_CLR_API(getRelocTypeHint) +DEF_CLR_API(getModuleNativeEntryPointRange) +DEF_CLR_API(getExpectedTargetArchitecture) + +#undef DEF_CLR_API diff --git a/src/jit/ICorJitInfo_API_wrapper.hpp b/src/jit/ICorJitInfo_API_wrapper.hpp new file mode 100644 index 0000000000..4272b2755c --- /dev/null +++ b/src/jit/ICorJitInfo_API_wrapper.hpp @@ -0,0 +1,1666 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#define API_ENTER(name) wrapComp->CLR_API_Enter(API_##name); +#define API_LEAVE(name) wrapComp->CLR_API_Leave(API_##name); + +/**********************************************************************************/ +// clang-format off +/**********************************************************************************/ +// +// ICorMethodInfo +// + +DWORD WrapICorJitInfo::getMethodAttribs(CORINFO_METHOD_HANDLE ftn /* IN */) +{ + API_ENTER(getMethodAttribs) + DWORD temp = wrapHnd->getMethodAttribs(ftn); + API_LEAVE(getMethodAttribs) + return temp; +} + +void WrapICorJitInfo::setMethodAttribs(CORINFO_METHOD_HANDLE ftn,/* IN */ + CorInfoMethodRuntimeFlags attribs/* IN */) +{ + API_ENTER(setMethodAttribs); + wrapHnd->setMethodAttribs(ftn, attribs); + API_LEAVE(setMethodAttribs); +} + +void WrapICorJitInfo::getMethodSig(CORINFO_METHOD_HANDLE ftn, /* IN */ + CORINFO_SIG_INFO *sig, /* OUT */ + CORINFO_CLASS_HANDLE memberParent/* IN */) +{ + API_ENTER(getMethodSig); + wrapHnd->getMethodSig(ftn, sig, memberParent); + API_LEAVE(getMethodSig); +} + +bool WrapICorJitInfo::getMethodInfo( + CORINFO_METHOD_HANDLE ftn, /* IN */ + CORINFO_METHOD_INFO* info /* OUT */) +{ + API_ENTER(getMethodInfo); + bool temp = wrapHnd->getMethodInfo(ftn, info); + API_LEAVE(getMethodInfo); + return temp; +} + +CorInfoInline WrapICorJitInfo::canInline( + CORINFO_METHOD_HANDLE callerHnd, /* IN */ + CORINFO_METHOD_HANDLE calleeHnd, /* IN */ + DWORD* pRestrictions /* OUT */) +{ + API_ENTER(canInline); + CorInfoInline temp = wrapHnd->canInline(callerHnd, calleeHnd, pRestrictions); + API_LEAVE(canInline); + return temp; +} + +void WrapICorJitInfo::reportInliningDecision(CORINFO_METHOD_HANDLE inlinerHnd, + CORINFO_METHOD_HANDLE inlineeHnd, + CorInfoInline inlineResult, + const char * reason) +{ + API_ENTER(reportInliningDecision); + wrapHnd->reportInliningDecision(inlinerHnd, inlineeHnd, inlineResult, reason); + API_LEAVE(reportInliningDecision); +} + +bool WrapICorJitInfo::canTailCall( + CORINFO_METHOD_HANDLE callerHnd, /* IN */ + CORINFO_METHOD_HANDLE declaredCalleeHnd, /* IN */ + CORINFO_METHOD_HANDLE exactCalleeHnd, /* IN */ + bool fIsTailPrefix /* IN */) +{ + API_ENTER(canTailCall); + bool temp = wrapHnd->canTailCall(callerHnd, declaredCalleeHnd, exactCalleeHnd, fIsTailPrefix); + API_LEAVE(canTailCall); + return temp; +} + +void WrapICorJitInfo::reportTailCallDecision(CORINFO_METHOD_HANDLE callerHnd, + CORINFO_METHOD_HANDLE calleeHnd, + bool fIsTailPrefix, + CorInfoTailCall tailCallResult, + const char * reason) +{ + API_ENTER(reportTailCallDecision); + wrapHnd->reportTailCallDecision(callerHnd, calleeHnd, fIsTailPrefix, tailCallResult, reason); + API_LEAVE(reportTailCallDecision); +} + +void WrapICorJitInfo::getEHinfo( + CORINFO_METHOD_HANDLE ftn, /* IN */ + unsigned EHnumber, /* IN */ + CORINFO_EH_CLAUSE* clause /* OUT */) +{ + API_ENTER(getEHinfo); + wrapHnd->getEHinfo(ftn, EHnumber, clause); + API_LEAVE(getEHinfo); +} + +CORINFO_CLASS_HANDLE WrapICorJitInfo::getMethodClass( + CORINFO_METHOD_HANDLE method) +{ + API_ENTER(getMethodClass); + CORINFO_CLASS_HANDLE temp = wrapHnd->getMethodClass(method); + API_LEAVE(getMethodClass); + return temp; +} + +CORINFO_MODULE_HANDLE WrapICorJitInfo::getMethodModule( + CORINFO_METHOD_HANDLE method) +{ + API_ENTER(getMethodModule); + CORINFO_MODULE_HANDLE temp = wrapHnd->getMethodModule(method); + API_LEAVE(getMethodModule); + return temp; +} + +void WrapICorJitInfo::getMethodVTableOffset( + CORINFO_METHOD_HANDLE method, /* IN */ + unsigned* offsetOfIndirection, /* OUT */ + unsigned* offsetAfterIndirection /* OUT */) +{ + API_ENTER(getMethodVTableOffset); + wrapHnd->getMethodVTableOffset(method, offsetOfIndirection, offsetAfterIndirection); + API_LEAVE(getMethodVTableOffset); +} + +#if COR_JIT_EE_VERSION > 460 + +CorInfoIntrinsics WrapICorJitInfo::getIntrinsicID( + CORINFO_METHOD_HANDLE method, + bool* pMustExpand /* OUT */) +{ + API_ENTER(getIntrinsicID); + CorInfoIntrinsics temp = wrapHnd->getIntrinsicID(method, pMustExpand); + API_LEAVE(getIntrinsicID); + return temp; +} + +#else + +CorInfoIntrinsics WrapICorJitInfo::getIntrinsicID(CORINFO_METHOD_HANDLE method) +{ + API_ENTER(getIntrinsicID); + CorInfoIntrinsics temp = wrapHnd->getIntrinsicID(method); + API_LEAVE(getIntrinsicID); + return temp; +} + +#endif + +bool WrapICorJitInfo::isInSIMDModule(CORINFO_CLASS_HANDLE classHnd) +{ + API_ENTER(isInSIMDModule); + bool temp = wrapHnd->isInSIMDModule(classHnd); + API_LEAVE(isInSIMDModule); + return temp; +} + +CorInfoUnmanagedCallConv WrapICorJitInfo::getUnmanagedCallConv( + CORINFO_METHOD_HANDLE method) +{ + API_ENTER(getUnmanagedCallConv); + CorInfoUnmanagedCallConv temp = wrapHnd->getUnmanagedCallConv(method); + API_LEAVE(getUnmanagedCallConv); + return temp; +} + +BOOL WrapICorJitInfo::pInvokeMarshalingRequired( + CORINFO_METHOD_HANDLE method, + CORINFO_SIG_INFO* callSiteSig) +{ + API_ENTER(pInvokeMarshalingRequired); + BOOL temp = wrapHnd->pInvokeMarshalingRequired(method, callSiteSig); + API_LEAVE(pInvokeMarshalingRequired); + return temp; +} + +BOOL WrapICorJitInfo::satisfiesMethodConstraints( + CORINFO_CLASS_HANDLE parent, // the exact parent of the method + CORINFO_METHOD_HANDLE method) +{ + API_ENTER(satisfiesMethodConstraints); + BOOL temp = wrapHnd->satisfiesMethodConstraints(parent, method); + API_LEAVE(satisfiesMethodConstraints); + return temp; +} + +BOOL WrapICorJitInfo::isCompatibleDelegate( + CORINFO_CLASS_HANDLE objCls, + CORINFO_CLASS_HANDLE methodParentCls, + CORINFO_METHOD_HANDLE method, + CORINFO_CLASS_HANDLE delegateCls, + BOOL *pfIsOpenDelegate) +{ + API_ENTER(isCompatibleDelegate); + BOOL temp = wrapHnd->isCompatibleDelegate(objCls, methodParentCls, method, delegateCls, pfIsOpenDelegate); + API_LEAVE(isCompatibleDelegate); + return temp; +} + +BOOL WrapICorJitInfo::isDelegateCreationAllowed( + CORINFO_CLASS_HANDLE delegateHnd, + CORINFO_METHOD_HANDLE calleeHnd) +{ + API_ENTER(isDelegateCreationAllowed); + BOOL temp = wrapHnd->isDelegateCreationAllowed(delegateHnd, calleeHnd); + API_LEAVE(isDelegateCreationAllowed); + return temp; +} + + +CorInfoInstantiationVerification WrapICorJitInfo::isInstantiationOfVerifiedGeneric( + CORINFO_METHOD_HANDLE method /* IN */) +{ + API_ENTER(isInstantiationOfVerifiedGeneric); + CorInfoInstantiationVerification temp = wrapHnd->isInstantiationOfVerifiedGeneric(method); + API_LEAVE(isInstantiationOfVerifiedGeneric); + return temp; +} + +void WrapICorJitInfo::initConstraintsForVerification( + CORINFO_METHOD_HANDLE method, /* IN */ + BOOL *pfHasCircularClassConstraints, /* OUT */ + BOOL *pfHasCircularMethodConstraint /* OUT */) +{ + API_ENTER(initConstraintsForVerification); + wrapHnd->initConstraintsForVerification(method, pfHasCircularClassConstraints, pfHasCircularMethodConstraint); + API_LEAVE(initConstraintsForVerification); +} + +CorInfoCanSkipVerificationResult WrapICorJitInfo::canSkipMethodVerification( + CORINFO_METHOD_HANDLE ftnHandle) +{ + API_ENTER(canSkipMethodVerification); + CorInfoCanSkipVerificationResult temp = wrapHnd->canSkipMethodVerification(ftnHandle); + API_LEAVE(canSkipMethodVerification); + return temp; +} + +void WrapICorJitInfo::methodMustBeLoadedBeforeCodeIsRun( + CORINFO_METHOD_HANDLE method) +{ + API_ENTER(methodMustBeLoadedBeforeCodeIsRun); + wrapHnd->methodMustBeLoadedBeforeCodeIsRun(method); + API_LEAVE(methodMustBeLoadedBeforeCodeIsRun); +} + +CORINFO_METHOD_HANDLE WrapICorJitInfo::mapMethodDeclToMethodImpl( + CORINFO_METHOD_HANDLE method) +{ + API_ENTER(mapMethodDeclToMethodImpl); + CORINFO_METHOD_HANDLE temp = wrapHnd->mapMethodDeclToMethodImpl(method); + API_LEAVE(mapMethodDeclToMethodImpl); + return temp; +} + +void WrapICorJitInfo::getGSCookie( + GSCookie * pCookieVal, + GSCookie ** ppCookieVal ) +{ + API_ENTER(getGSCookie); + wrapHnd->getGSCookie(pCookieVal, ppCookieVal); + API_LEAVE(getGSCookie); +} + +/**********************************************************************************/ +// +// ICorModuleInfo +// +/**********************************************************************************/ + +void WrapICorJitInfo::resolveToken(/* IN, OUT */ CORINFO_RESOLVED_TOKEN * pResolvedToken) +{ + API_ENTER(resolveToken); + wrapHnd->resolveToken(pResolvedToken); + API_LEAVE(resolveToken); +} + +#if COR_JIT_EE_VERSION > 460 + +bool WrapICorJitInfo::tryResolveToken(/* IN, OUT */ CORINFO_RESOLVED_TOKEN * pResolvedToken) +{ + API_ENTER(tryResolveToken); + bool success = wrapHnd->tryResolveToken(pResolvedToken); + API_LEAVE(tryResolveToken); + return success; +} + +#endif + +void WrapICorJitInfo::findSig( + CORINFO_MODULE_HANDLE module, + unsigned sigTOK, + CORINFO_CONTEXT_HANDLE context, + CORINFO_SIG_INFO *sig ) +{ + API_ENTER(findSig); + wrapHnd->findSig(module, sigTOK, context, sig); + API_LEAVE(findSig); +} + +void WrapICorJitInfo::findCallSiteSig( + CORINFO_MODULE_HANDLE module, /* IN */ + unsigned methTOK, /* IN */ + CORINFO_CONTEXT_HANDLE context, /* IN */ + CORINFO_SIG_INFO *sig /* OUT */) +{ + API_ENTER(findCallSiteSig); + wrapHnd->findCallSiteSig(module, methTOK, context, sig); + API_LEAVE(findCallSiteSig); +} + +CORINFO_CLASS_HANDLE WrapICorJitInfo::getTokenTypeAsHandle( + CORINFO_RESOLVED_TOKEN * pResolvedToken /* IN */) +{ + API_ENTER(getTokenTypeAsHandle); + CORINFO_CLASS_HANDLE temp = wrapHnd->getTokenTypeAsHandle(pResolvedToken); + API_LEAVE(getTokenTypeAsHandle); + return temp; +} + +CorInfoCanSkipVerificationResult WrapICorJitInfo::canSkipVerification( + CORINFO_MODULE_HANDLE module /* IN */) +{ + API_ENTER(canSkipVerification); + CorInfoCanSkipVerificationResult temp = wrapHnd->canSkipVerification(module); + API_LEAVE(canSkipVerification); + return temp; +} + +BOOL WrapICorJitInfo::isValidToken( + CORINFO_MODULE_HANDLE module, /* IN */ + unsigned metaTOK /* IN */) +{ + API_ENTER(isValidToken); + BOOL result = wrapHnd->isValidToken(module, metaTOK); + API_LEAVE(isValidToken); + return result; +} + +BOOL WrapICorJitInfo::isValidStringRef( + CORINFO_MODULE_HANDLE module, /* IN */ + unsigned metaTOK /* IN */) +{ + API_ENTER(isValidStringRef); + BOOL temp = wrapHnd->isValidStringRef(module, metaTOK); + API_LEAVE(isValidStringRef); + return temp; +} + +BOOL WrapICorJitInfo::shouldEnforceCallvirtRestriction( + CORINFO_MODULE_HANDLE scope) +{ + API_ENTER(shouldEnforceCallvirtRestriction); + BOOL temp = wrapHnd->shouldEnforceCallvirtRestriction(scope); + API_LEAVE(shouldEnforceCallvirtRestriction); + return temp; +} + +/**********************************************************************************/ +// +// ICorClassInfo +// +/**********************************************************************************/ + +CorInfoType WrapICorJitInfo::asCorInfoType(CORINFO_CLASS_HANDLE cls) +{ + API_ENTER(asCorInfoType); + CorInfoType temp = wrapHnd->asCorInfoType(cls); + API_LEAVE(asCorInfoType); + return temp; +} + +const char* WrapICorJitInfo::getClassName(CORINFO_CLASS_HANDLE cls) +{ + API_ENTER(getClassName); + const char* result = wrapHnd->getClassName(cls); + API_LEAVE(getClassName); + return result; +} + +int WrapICorJitInfo::appendClassName( + __deref_inout_ecount(*pnBufLen) WCHAR** ppBuf, + int* pnBufLen, + CORINFO_CLASS_HANDLE cls, + BOOL fNamespace, + BOOL fFullInst, + BOOL fAssembly) +{ + API_ENTER(appendClassName); + WCHAR* pBuf = *ppBuf; + int nLen = wrapHnd->appendClassName(ppBuf, pnBufLen, cls, fNamespace, fFullInst, fAssembly); + API_LEAVE(appendClassName); + return nLen; +} + +BOOL WrapICorJitInfo::isValueClass(CORINFO_CLASS_HANDLE cls) +{ + API_ENTER(isValueClass); + BOOL temp = wrapHnd->isValueClass(cls); + API_LEAVE(isValueClass); + return temp; +} + +BOOL WrapICorJitInfo::canInlineTypeCheckWithObjectVTable(CORINFO_CLASS_HANDLE cls) +{ + API_ENTER(canInlineTypeCheckWithObjectVTable); + BOOL temp = wrapHnd->canInlineTypeCheckWithObjectVTable(cls); + API_LEAVE(canInlineTypeCheckWithObjectVTable); + return temp; +} + +DWORD WrapICorJitInfo::getClassAttribs( + CORINFO_CLASS_HANDLE cls) +{ + API_ENTER(getClassAttribs); + DWORD temp = wrapHnd->getClassAttribs(cls); + API_LEAVE(getClassAttribs); + return temp; +} + +BOOL WrapICorJitInfo::isStructRequiringStackAllocRetBuf(CORINFO_CLASS_HANDLE cls) +{ + API_ENTER(isStructRequiringStackAllocRetBuf); + BOOL temp = wrapHnd->isStructRequiringStackAllocRetBuf(cls); + API_LEAVE(isStructRequiringStackAllocRetBuf); + return temp; +} + +CORINFO_MODULE_HANDLE WrapICorJitInfo::getClassModule( + CORINFO_CLASS_HANDLE cls) +{ + API_ENTER(getClassModule); + CORINFO_MODULE_HANDLE result = wrapHnd->getClassModule(cls); + API_LEAVE(getClassModule); + return result; +} + +CORINFO_ASSEMBLY_HANDLE WrapICorJitInfo::getModuleAssembly( + CORINFO_MODULE_HANDLE mod) +{ + API_ENTER(getModuleAssembly); + CORINFO_ASSEMBLY_HANDLE result = wrapHnd->getModuleAssembly(mod); + API_LEAVE(getModuleAssembly); + return result; +} + +const char* WrapICorJitInfo::getAssemblyName( + CORINFO_ASSEMBLY_HANDLE assem) +{ + API_ENTER(getAssemblyName); + const char* result = wrapHnd->getAssemblyName(assem); + API_LEAVE(getAssemblyName); + return result; +} + +void* WrapICorJitInfo::LongLifetimeMalloc(size_t sz) +{ + API_ENTER(LongLifetimeMalloc); + void* result = wrapHnd->LongLifetimeMalloc(sz); + API_LEAVE(LongLifetimeMalloc); + return result; +} + +void WrapICorJitInfo::LongLifetimeFree(void* obj) +{ + API_ENTER(LongLifetimeFree); + wrapHnd->LongLifetimeFree(obj); + API_LEAVE(LongLifetimeFree); +} + +size_t WrapICorJitInfo::getClassModuleIdForStatics( + CORINFO_CLASS_HANDLE cls, + CORINFO_MODULE_HANDLE *pModule, + void **ppIndirection) +{ + API_ENTER(getClassModuleIdForStatics); + size_t temp = wrapHnd->getClassModuleIdForStatics(cls, pModule, ppIndirection); + API_LEAVE(getClassModuleIdForStatics); + return temp; +} + +unsigned WrapICorJitInfo::getClassSize(CORINFO_CLASS_HANDLE cls) +{ + API_ENTER(getClassSize); + unsigned temp = wrapHnd->getClassSize(cls); + API_LEAVE(getClassSize); + return temp; +} + +unsigned WrapICorJitInfo::getClassAlignmentRequirement( + CORINFO_CLASS_HANDLE cls, + BOOL fDoubleAlignHint) +{ + API_ENTER(getClassAlignmentRequirement); + unsigned temp = wrapHnd->getClassAlignmentRequirement(cls, fDoubleAlignHint); + API_LEAVE(getClassAlignmentRequirement); + return temp; +} + +unsigned WrapICorJitInfo::getClassGClayout( + CORINFO_CLASS_HANDLE cls, /* IN */ + BYTE *gcPtrs /* OUT */) +{ + API_ENTER(getClassGClayout); + unsigned temp = wrapHnd->getClassGClayout(cls, gcPtrs); + API_LEAVE(getClassGClayout); + return temp; +} + +unsigned WrapICorJitInfo::getClassNumInstanceFields( + CORINFO_CLASS_HANDLE cls /* IN */) +{ + API_ENTER(getClassNumInstanceFields); + unsigned temp = wrapHnd->getClassNumInstanceFields(cls); + API_LEAVE(getClassNumInstanceFields); + return temp; +} + +CORINFO_FIELD_HANDLE WrapICorJitInfo::getFieldInClass( + CORINFO_CLASS_HANDLE clsHnd, + INT num) +{ + API_ENTER(getFieldInClass); + CORINFO_FIELD_HANDLE temp = wrapHnd->getFieldInClass(clsHnd, num); + API_LEAVE(getFieldInClass); + return temp; +} + +BOOL WrapICorJitInfo::checkMethodModifier( + CORINFO_METHOD_HANDLE hMethod, + LPCSTR modifier, + BOOL fOptional) +{ + API_ENTER(checkMethodModifier); + BOOL result = wrapHnd->checkMethodModifier(hMethod, modifier, fOptional); + API_LEAVE(checkMethodModifier); + return result; +} + +CorInfoHelpFunc WrapICorJitInfo::getNewHelper( + CORINFO_RESOLVED_TOKEN * pResolvedToken, + CORINFO_METHOD_HANDLE callerHandle) +{ + API_ENTER(getNewHelper); + CorInfoHelpFunc temp = wrapHnd->getNewHelper(pResolvedToken, callerHandle); + API_LEAVE(getNewHelper); + return temp; +} + +CorInfoHelpFunc WrapICorJitInfo::getNewArrHelper( + CORINFO_CLASS_HANDLE arrayCls) +{ + API_ENTER(getNewArrHelper); + CorInfoHelpFunc temp = wrapHnd->getNewArrHelper(arrayCls); + API_LEAVE(getNewArrHelper); + return temp; +} + +CorInfoHelpFunc WrapICorJitInfo::getCastingHelper( + CORINFO_RESOLVED_TOKEN * pResolvedToken, + bool fThrowing) +{ + API_ENTER(getCastingHelper); + CorInfoHelpFunc temp = wrapHnd->getCastingHelper(pResolvedToken, fThrowing); + API_LEAVE(getCastingHelper); + return temp; +} + +CorInfoHelpFunc WrapICorJitInfo::getSharedCCtorHelper( + CORINFO_CLASS_HANDLE clsHnd) +{ + API_ENTER(getSharedCCtorHelper); + CorInfoHelpFunc temp = wrapHnd->getSharedCCtorHelper(clsHnd); + API_LEAVE(getSharedCCtorHelper); + return temp; +} + +CorInfoHelpFunc WrapICorJitInfo::getSecurityPrologHelper( + CORINFO_METHOD_HANDLE ftn) +{ + API_ENTER(getSecurityPrologHelper); + CorInfoHelpFunc temp = wrapHnd->getSecurityPrologHelper(ftn); + API_LEAVE(getSecurityPrologHelper); + return temp; +} + +CORINFO_CLASS_HANDLE WrapICorJitInfo::getTypeForBox( + CORINFO_CLASS_HANDLE cls) +{ + API_ENTER(getTypeForBox); + CORINFO_CLASS_HANDLE temp = wrapHnd->getTypeForBox(cls); + API_LEAVE(getTypeForBox); + return temp; +} + +CorInfoHelpFunc WrapICorJitInfo::getBoxHelper( + CORINFO_CLASS_HANDLE cls) +{ + API_ENTER(getBoxHelper); + CorInfoHelpFunc temp = wrapHnd->getBoxHelper(cls); + API_LEAVE(getBoxHelper); + return temp; +} + +CorInfoHelpFunc WrapICorJitInfo::getUnBoxHelper( + CORINFO_CLASS_HANDLE cls) +{ + API_ENTER(getUnBoxHelper); + CorInfoHelpFunc temp = wrapHnd->getUnBoxHelper(cls); + API_LEAVE(getUnBoxHelper); + return temp; +} + +#if COR_JIT_EE_VERSION > 460 + +bool WrapICorJitInfo::getReadyToRunHelper( + CORINFO_RESOLVED_TOKEN * pResolvedToken, + CORINFO_LOOKUP_KIND * pGenericLookupKind, + CorInfoHelpFunc id, + CORINFO_CONST_LOOKUP * pLookup) +{ + API_ENTER(getReadyToRunHelper); + bool result = wrapHnd->getReadyToRunHelper(pResolvedToken, pGenericLookupKind, id, pLookup); + API_LEAVE(getReadyToRunHelper); + return result; +} + +void WrapICorJitInfo::getReadyToRunDelegateCtorHelper( + CORINFO_RESOLVED_TOKEN * pTargetMethod, + CORINFO_CLASS_HANDLE delegateType, + CORINFO_CONST_LOOKUP * pLookup) +{ + API_ENTER(getReadyToRunDelegateCtorHelper); + wrapHnd->getReadyToRunDelegateCtorHelper(pTargetMethod, delegateType, pLookup); + API_LEAVE(getReadyToRunDelegateCtorHelper); +} + +#else + +void WrapICorJitInfo::getReadyToRunHelper( + CORINFO_RESOLVED_TOKEN * pResolvedToken, + CorInfoHelpFunc id, + CORINFO_CONST_LOOKUP * pLookup) +{ + API_ENTER(getReadyToRunHelper); + wrapHnd->getReadyToRunHelper(pResolvedToken, id, pLookup); + API_LEAVE(getReadyToRunHelper); +} + +#endif + +const char* WrapICorJitInfo::getHelperName( + CorInfoHelpFunc funcNum) +{ + API_ENTER(getHelperName); + const char* temp = wrapHnd->getHelperName(funcNum); + API_LEAVE(getHelperName); + return temp; +} + +CorInfoInitClassResult WrapICorJitInfo::initClass( + CORINFO_FIELD_HANDLE field, + + CORINFO_METHOD_HANDLE method, + CORINFO_CONTEXT_HANDLE context, + BOOL speculative) +{ + API_ENTER(initClass); + CorInfoInitClassResult temp = wrapHnd->initClass(field, method, context, speculative); + API_LEAVE(initClass); + return temp; +} + +void WrapICorJitInfo::classMustBeLoadedBeforeCodeIsRun( + CORINFO_CLASS_HANDLE cls) +{ + API_ENTER(classMustBeLoadedBeforeCodeIsRun); + wrapHnd->classMustBeLoadedBeforeCodeIsRun(cls); + API_LEAVE(classMustBeLoadedBeforeCodeIsRun); +} + +CORINFO_CLASS_HANDLE WrapICorJitInfo::getBuiltinClass( + CorInfoClassId classId) +{ + API_ENTER(getBuiltinClass); + CORINFO_CLASS_HANDLE temp = wrapHnd->getBuiltinClass(classId); + API_LEAVE(getBuiltinClass); + return temp; +} + +CorInfoType WrapICorJitInfo::getTypeForPrimitiveValueClass( + CORINFO_CLASS_HANDLE cls) +{ + API_ENTER(getTypeForPrimitiveValueClass); + CorInfoType temp = wrapHnd->getTypeForPrimitiveValueClass(cls); + API_LEAVE(getTypeForPrimitiveValueClass); + return temp; +} + +BOOL WrapICorJitInfo::canCast( + CORINFO_CLASS_HANDLE child, + CORINFO_CLASS_HANDLE parent ) +{ + API_ENTER(canCast); + BOOL temp = wrapHnd->canCast(child, parent); + API_LEAVE(canCast); + return temp; +} + +BOOL WrapICorJitInfo::areTypesEquivalent( + CORINFO_CLASS_HANDLE cls1, + CORINFO_CLASS_HANDLE cls2) +{ + API_ENTER(areTypesEquivalent); + BOOL temp = wrapHnd->areTypesEquivalent(cls1, cls2); + API_LEAVE(areTypesEquivalent); + return temp; +} + +CORINFO_CLASS_HANDLE WrapICorJitInfo::mergeClasses( + CORINFO_CLASS_HANDLE cls1, + CORINFO_CLASS_HANDLE cls2) +{ + API_ENTER(mergeClasses); + CORINFO_CLASS_HANDLE temp = wrapHnd->mergeClasses(cls1, cls2); + API_LEAVE(mergeClasses); + return temp; +} + +CORINFO_CLASS_HANDLE WrapICorJitInfo::getParentType( + CORINFO_CLASS_HANDLE cls) +{ + API_ENTER(getParentType); + CORINFO_CLASS_HANDLE temp = wrapHnd->getParentType(cls); + API_LEAVE(getParentType); + return temp; +} + +CorInfoType WrapICorJitInfo::getChildType( + CORINFO_CLASS_HANDLE clsHnd, + CORINFO_CLASS_HANDLE *clsRet) +{ + API_ENTER(getChildType); + CorInfoType temp = wrapHnd->getChildType(clsHnd, clsRet); + API_LEAVE(getChildType); + return temp; +} + +BOOL WrapICorJitInfo::satisfiesClassConstraints( + CORINFO_CLASS_HANDLE cls) +{ + API_ENTER(satisfiesClassConstraints); + BOOL temp = wrapHnd->satisfiesClassConstraints(cls); + API_LEAVE(satisfiesClassConstraints); + return temp; + +} + +BOOL WrapICorJitInfo::isSDArray( + CORINFO_CLASS_HANDLE cls) +{ + API_ENTER(isSDArray); + BOOL temp = wrapHnd->isSDArray(cls); + API_LEAVE(isSDArray); + return temp; +} + +unsigned WrapICorJitInfo::getArrayRank( + CORINFO_CLASS_HANDLE cls) +{ + API_ENTER(getArrayRank); + unsigned result = wrapHnd->getArrayRank(cls); + API_LEAVE(getArrayRank); + return result; +} + +void * WrapICorJitInfo::getArrayInitializationData( + CORINFO_FIELD_HANDLE field, + DWORD size) +{ + API_ENTER(getArrayInitializationData); + void *temp = wrapHnd->getArrayInitializationData(field, size); + API_LEAVE(getArrayInitializationData); + return temp; +} + +CorInfoIsAccessAllowedResult WrapICorJitInfo::canAccessClass( + CORINFO_RESOLVED_TOKEN * pResolvedToken, + CORINFO_METHOD_HANDLE callerHandle, + CORINFO_HELPER_DESC *pAccessHelper) +{ + API_ENTER(canAccessClass); + CorInfoIsAccessAllowedResult temp = wrapHnd->canAccessClass(pResolvedToken, callerHandle, pAccessHelper); + API_LEAVE(canAccessClass); + return temp; +} + +/**********************************************************************************/ +// +// ICorFieldInfo +// +/**********************************************************************************/ + +const char* WrapICorJitInfo::getFieldName( + CORINFO_FIELD_HANDLE ftn, /* IN */ + const char **moduleName /* OUT */) +{ + API_ENTER(getFieldName); + const char* temp = wrapHnd->getFieldName(ftn, moduleName); + API_LEAVE(getFieldName); + return temp; +} + +CORINFO_CLASS_HANDLE WrapICorJitInfo::getFieldClass( + CORINFO_FIELD_HANDLE field) +{ + API_ENTER(getFieldClass); + CORINFO_CLASS_HANDLE temp = wrapHnd->getFieldClass(field); + API_LEAVE(getFieldClass); + return temp; +} + +CorInfoType WrapICorJitInfo::getFieldType( + CORINFO_FIELD_HANDLE field, + CORINFO_CLASS_HANDLE *structType, + CORINFO_CLASS_HANDLE memberParent/* IN */) +{ + API_ENTER(getFieldType); + CorInfoType temp = wrapHnd->getFieldType(field, structType, memberParent); + API_LEAVE(getFieldType); + return temp; +} + +unsigned WrapICorJitInfo::getFieldOffset( + CORINFO_FIELD_HANDLE field) +{ + API_ENTER(getFieldOffset); + unsigned temp = wrapHnd->getFieldOffset(field); + API_LEAVE(getFieldOffset); + return temp; +} + +bool WrapICorJitInfo::isWriteBarrierHelperRequired( + CORINFO_FIELD_HANDLE field) +{ + API_ENTER(isWriteBarrierHelperRequired); + bool result = wrapHnd->isWriteBarrierHelperRequired(field); + API_LEAVE(isWriteBarrierHelperRequired); + return result; +} + +void WrapICorJitInfo::getFieldInfo(CORINFO_RESOLVED_TOKEN * pResolvedToken, + CORINFO_METHOD_HANDLE callerHandle, + CORINFO_ACCESS_FLAGS flags, + CORINFO_FIELD_INFO *pResult) +{ + API_ENTER(getFieldInfo); + wrapHnd->getFieldInfo(pResolvedToken, callerHandle, flags, pResult); + API_LEAVE(getFieldInfo); +} + +bool WrapICorJitInfo::isFieldStatic(CORINFO_FIELD_HANDLE fldHnd) +{ + API_ENTER(isFieldStatic); + bool result = wrapHnd->isFieldStatic(fldHnd); + API_LEAVE(isFieldStatic); + return result; +} + +/*********************************************************************************/ +// +// ICorDebugInfo +// +/*********************************************************************************/ + +void WrapICorJitInfo::getBoundaries( + CORINFO_METHOD_HANDLE ftn, + unsigned int *cILOffsets, + DWORD **pILOffsets, + + ICorDebugInfo::BoundaryTypes *implictBoundaries) +{ + API_ENTER(getBoundaries); + wrapHnd->getBoundaries(ftn, cILOffsets, pILOffsets, implictBoundaries); + API_LEAVE(getBoundaries); +} + +void WrapICorJitInfo::setBoundaries( + CORINFO_METHOD_HANDLE ftn, + ULONG32 cMap, + ICorDebugInfo::OffsetMapping *pMap) +{ + API_ENTER(setBoundaries); + wrapHnd->setBoundaries(ftn, cMap, pMap); + API_LEAVE(setBoundaries); +} + +void WrapICorJitInfo::getVars( + CORINFO_METHOD_HANDLE ftn, + ULONG32 *cVars, + ICorDebugInfo::ILVarInfo **vars, + bool *extendOthers) + +{ + API_ENTER(getVars); + wrapHnd->getVars(ftn, cVars, vars, extendOthers); + API_LEAVE(getVars); +} + +void WrapICorJitInfo::setVars( + CORINFO_METHOD_HANDLE ftn, + ULONG32 cVars, + ICorDebugInfo::NativeVarInfo *vars) + +{ + API_ENTER(setVars); + wrapHnd->setVars(ftn, cVars, vars); + API_LEAVE(setVars); +} + +void * WrapICorJitInfo::allocateArray( + ULONG cBytes) +{ + API_ENTER(allocateArray); + void *temp = wrapHnd->allocateArray(cBytes); + API_LEAVE(allocateArray); + return temp; +} + +void WrapICorJitInfo::freeArray( + void *array) +{ + API_ENTER(freeArray); + wrapHnd->freeArray(array); + API_LEAVE(freeArray); +} + +/*********************************************************************************/ +// +// ICorArgInfo +// +/*********************************************************************************/ + +CORINFO_ARG_LIST_HANDLE WrapICorJitInfo::getArgNext( + CORINFO_ARG_LIST_HANDLE args /* IN */) +{ + API_ENTER(getArgNext); + CORINFO_ARG_LIST_HANDLE temp = wrapHnd->getArgNext(args); + API_LEAVE(getArgNext); + return temp; +} + +CorInfoTypeWithMod WrapICorJitInfo::getArgType( + CORINFO_SIG_INFO* sig, /* IN */ + CORINFO_ARG_LIST_HANDLE args, /* IN */ + CORINFO_CLASS_HANDLE *vcTypeRet /* OUT */) +{ + API_ENTER(getArgType); + CorInfoTypeWithMod temp = wrapHnd->getArgType(sig, args, vcTypeRet); + API_LEAVE(getArgType); + return temp; +} + +CORINFO_CLASS_HANDLE WrapICorJitInfo::getArgClass( + CORINFO_SIG_INFO* sig, /* IN */ + CORINFO_ARG_LIST_HANDLE args /* IN */) +{ + API_ENTER(getArgClass); + CORINFO_CLASS_HANDLE temp = wrapHnd->getArgClass(sig, args); + API_LEAVE(getArgClass); + return temp; +} + +CorInfoType WrapICorJitInfo::getHFAType( + CORINFO_CLASS_HANDLE hClass) +{ + API_ENTER(getHFAType); + CorInfoType temp = wrapHnd->getHFAType(hClass); + API_LEAVE(getHFAType); + return temp; +} + +HRESULT WrapICorJitInfo::GetErrorHRESULT( + struct _EXCEPTION_POINTERS *pExceptionPointers) +{ + API_ENTER(GetErrorHRESULT); + HRESULT temp = wrapHnd->GetErrorHRESULT(pExceptionPointers); + API_LEAVE(GetErrorHRESULT); + return temp; +} + +ULONG WrapICorJitInfo::GetErrorMessage( + __inout_ecount(bufferLength) LPWSTR buffer, + ULONG bufferLength) +{ + API_ENTER(GetErrorMessage); + ULONG temp = wrapHnd->GetErrorMessage(buffer, bufferLength); + API_LEAVE(GetErrorMessage); + return temp; +} + +int WrapICorJitInfo::FilterException( + struct _EXCEPTION_POINTERS *pExceptionPointers) +{ + API_ENTER(FilterException); + int temp = wrapHnd->FilterException(pExceptionPointers); + API_LEAVE(FilterException); + return temp; +} + +void WrapICorJitInfo::HandleException( + struct _EXCEPTION_POINTERS *pExceptionPointers) +{ + API_ENTER(HandleException); + wrapHnd->HandleException(pExceptionPointers); + API_LEAVE(HandleException); +} + +void WrapICorJitInfo::ThrowExceptionForJitResult( + HRESULT result) +{ + API_ENTER(ThrowExceptionForJitResult); + wrapHnd->ThrowExceptionForJitResult(result); + API_LEAVE(ThrowExceptionForJitResult); +} + +void WrapICorJitInfo::ThrowExceptionForHelper( + const CORINFO_HELPER_DESC * throwHelper) +{ + API_ENTER(ThrowExceptionForHelper); + wrapHnd->ThrowExceptionForHelper(throwHelper); + API_LEAVE(ThrowExceptionForHelper); +} + +void WrapICorJitInfo::getEEInfo( + CORINFO_EE_INFO *pEEInfoOut) +{ + API_ENTER(getEEInfo); + wrapHnd->getEEInfo(pEEInfoOut); + API_LEAVE(getEEInfo); +} + +LPCWSTR WrapICorJitInfo::getJitTimeLogFilename() +{ + API_ENTER(getJitTimeLogFilename); + LPCWSTR temp = wrapHnd->getJitTimeLogFilename(); + API_LEAVE(getJitTimeLogFilename); + return temp; +} + +mdMethodDef WrapICorJitInfo::getMethodDefFromMethod( + CORINFO_METHOD_HANDLE hMethod) +{ + API_ENTER(getMethodDefFromMethod); + mdMethodDef result = wrapHnd->getMethodDefFromMethod(hMethod); + API_LEAVE(getMethodDefFromMethod); + return result; +} + +const char* WrapICorJitInfo::getMethodName( + CORINFO_METHOD_HANDLE ftn, /* IN */ + const char **moduleName /* OUT */) +{ + API_ENTER(getMethodName); + const char* temp = wrapHnd->getMethodName(ftn, moduleName); + API_LEAVE(getMethodName); + return temp; +} + +unsigned WrapICorJitInfo::getMethodHash( + CORINFO_METHOD_HANDLE ftn /* IN */) +{ + API_ENTER(getMethodHash); + unsigned temp = wrapHnd->getMethodHash(ftn); + API_LEAVE(getMethodHash); + return temp; +} + +size_t WrapICorJitInfo::findNameOfToken( + CORINFO_MODULE_HANDLE module, /* IN */ + mdToken metaTOK, /* IN */ + __out_ecount(FQNameCapacity) char * szFQName, /* OUT */ + size_t FQNameCapacity /* IN */) +{ + API_ENTER(findNameOfToken); + size_t result = wrapHnd->findNameOfToken(module, metaTOK, szFQName, FQNameCapacity); + API_LEAVE(findNameOfToken); + return result; +} + +#if COR_JIT_EE_VERSION > 460 + +bool WrapICorJitInfo::getSystemVAmd64PassStructInRegisterDescriptor( + /* IN */ CORINFO_CLASS_HANDLE structHnd, + /* OUT */ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structPassInRegDescPtr) +{ + API_ENTER(getSystemVAmd64PassStructInRegisterDescriptor); + bool result = wrapHnd->getSystemVAmd64PassStructInRegisterDescriptor(structHnd, structPassInRegDescPtr); + API_LEAVE(getSystemVAmd64PassStructInRegisterDescriptor); + return result; +} + +#endif + +DWORD WrapICorJitInfo::getThreadTLSIndex( + void **ppIndirection) +{ + API_ENTER(getThreadTLSIndex); + DWORD temp = wrapHnd->getThreadTLSIndex(ppIndirection); + API_LEAVE(getThreadTLSIndex); + return temp; +} + +const void * WrapICorJitInfo::getInlinedCallFrameVptr( + void **ppIndirection) +{ + API_ENTER(getInlinedCallFrameVptr); + const void* temp = wrapHnd->getInlinedCallFrameVptr(ppIndirection); + API_LEAVE(getInlinedCallFrameVptr); + return temp; +} + +LONG * WrapICorJitInfo::getAddrOfCaptureThreadGlobal( + void **ppIndirection) +{ + API_ENTER(getAddrOfCaptureThreadGlobal); + LONG * temp = wrapHnd->getAddrOfCaptureThreadGlobal(ppIndirection); + API_LEAVE(getAddrOfCaptureThreadGlobal); + return temp; +} + +SIZE_T* WrapICorJitInfo::getAddrModuleDomainID(CORINFO_MODULE_HANDLE module) +{ + API_ENTER(getAddrModuleDomainID); + SIZE_T* result = wrapHnd->getAddrModuleDomainID(module); + API_LEAVE(getAddrModuleDomainID); + return result; +} + +void* WrapICorJitInfo::getHelperFtn( + CorInfoHelpFunc ftnNum, + void **ppIndirection) +{ + API_ENTER(getHelperFtn); + void *temp = wrapHnd->getHelperFtn(ftnNum, ppIndirection); + API_LEAVE(getHelperFtn); + return temp; +} + +void WrapICorJitInfo::getFunctionEntryPoint( + CORINFO_METHOD_HANDLE ftn, /* IN */ + CORINFO_CONST_LOOKUP * pResult, /* OUT */ + CORINFO_ACCESS_FLAGS accessFlags) +{ + API_ENTER(getFunctionEntryPoint); + wrapHnd->getFunctionEntryPoint(ftn, pResult, accessFlags); + API_LEAVE(getFunctionEntryPoint); +} + +void WrapICorJitInfo::getFunctionFixedEntryPoint( + CORINFO_METHOD_HANDLE ftn, + CORINFO_CONST_LOOKUP * pResult) +{ + API_ENTER(getFunctionFixedEntryPoint); + wrapHnd->getFunctionFixedEntryPoint(ftn, pResult); + API_LEAVE(getFunctionFixedEntryPoint); +} + +void* WrapICorJitInfo::getMethodSync( + CORINFO_METHOD_HANDLE ftn, + void **ppIndirection) +{ + API_ENTER(getMethodSync); + void *temp = wrapHnd->getMethodSync(ftn, ppIndirection); + API_LEAVE(getMethodSync); + return temp; +} + + +CorInfoHelpFunc WrapICorJitInfo::getLazyStringLiteralHelper( + CORINFO_MODULE_HANDLE handle) +{ + API_ENTER(getLazyStringLiteralHelper); + CorInfoHelpFunc temp = wrapHnd->getLazyStringLiteralHelper(handle); + API_LEAVE(getLazyStringLiteralHelper); + return temp; +} + +CORINFO_MODULE_HANDLE WrapICorJitInfo::embedModuleHandle( + CORINFO_MODULE_HANDLE handle, + void **ppIndirection) +{ + API_ENTER(embedModuleHandle); + CORINFO_MODULE_HANDLE temp = wrapHnd->embedModuleHandle(handle, ppIndirection); + API_LEAVE(embedModuleHandle); + return temp; +} + +CORINFO_CLASS_HANDLE WrapICorJitInfo::embedClassHandle( + CORINFO_CLASS_HANDLE handle, + void **ppIndirection) +{ + API_ENTER(embedClassHandle); + CORINFO_CLASS_HANDLE temp = wrapHnd->embedClassHandle(handle, ppIndirection); + API_LEAVE(embedClassHandle); + return temp; +} + +CORINFO_METHOD_HANDLE WrapICorJitInfo::embedMethodHandle( + CORINFO_METHOD_HANDLE handle, + void **ppIndirection) +{ + API_ENTER(embedMethodHandle); + CORINFO_METHOD_HANDLE temp = wrapHnd->embedMethodHandle(handle, ppIndirection); + API_LEAVE(embedMethodHandle); + return temp; +} + +CORINFO_FIELD_HANDLE WrapICorJitInfo::embedFieldHandle( + CORINFO_FIELD_HANDLE handle, + void **ppIndirection) +{ + API_ENTER(embedFieldHandle); + CORINFO_FIELD_HANDLE temp = wrapHnd->embedFieldHandle(handle, ppIndirection); + API_LEAVE(embedFieldHandle); + return temp; +} + +void WrapICorJitInfo::embedGenericHandle( + CORINFO_RESOLVED_TOKEN * pResolvedToken, + BOOL fEmbedParent, + CORINFO_GENERICHANDLE_RESULT * pResult) +{ + API_ENTER(embedGenericHandle); + wrapHnd->embedGenericHandle(pResolvedToken, fEmbedParent, pResult); + API_LEAVE(embedGenericHandle); +} + +CORINFO_LOOKUP_KIND WrapICorJitInfo::getLocationOfThisType( + CORINFO_METHOD_HANDLE context) +{ + API_ENTER(getLocationOfThisType); + CORINFO_LOOKUP_KIND temp = wrapHnd->getLocationOfThisType(context); + API_LEAVE(getLocationOfThisType); + return temp; +} + +void* WrapICorJitInfo::getPInvokeUnmanagedTarget( + CORINFO_METHOD_HANDLE method, + void **ppIndirection) +{ + API_ENTER(getPInvokeUnmanagedTarget); + void *result = wrapHnd->getPInvokeUnmanagedTarget(method, ppIndirection); + API_LEAVE(getPInvokeUnmanagedTarget); + return result; +} + +void* WrapICorJitInfo::getAddressOfPInvokeFixup( + CORINFO_METHOD_HANDLE method, + void **ppIndirection) +{ + API_ENTER(getAddressOfPInvokeFixup); + void *temp = wrapHnd->getAddressOfPInvokeFixup(method, ppIndirection); + API_LEAVE(getAddressOfPInvokeFixup); + return temp; +} + +#if COR_JIT_EE_VERSION > 460 + +void WrapICorJitInfo::getAddressOfPInvokeTarget( + CORINFO_METHOD_HANDLE method, + CORINFO_CONST_LOOKUP *pLookup) +{ + API_ENTER(getAddressOfPInvokeTarget); + wrapHnd->getAddressOfPInvokeTarget(method, pLookup); + API_LEAVE(getAddressOfPInvokeTarget); +} + +#endif + +LPVOID WrapICorJitInfo::GetCookieForPInvokeCalliSig( + CORINFO_SIG_INFO* szMetaSig, + void ** ppIndirection) +{ + API_ENTER(GetCookieForPInvokeCalliSig); + LPVOID temp = wrapHnd->GetCookieForPInvokeCalliSig(szMetaSig, ppIndirection); + API_LEAVE(GetCookieForPInvokeCalliSig); + return temp; +} + +bool WrapICorJitInfo::canGetCookieForPInvokeCalliSig( + CORINFO_SIG_INFO* szMetaSig) +{ + API_ENTER(canGetCookieForPInvokeCalliSig); + bool temp = wrapHnd->canGetCookieForPInvokeCalliSig(szMetaSig); + API_LEAVE(canGetCookieForPInvokeCalliSig); + return temp; +} + +CORINFO_JUST_MY_CODE_HANDLE WrapICorJitInfo::getJustMyCodeHandle( + CORINFO_METHOD_HANDLE method, + CORINFO_JUST_MY_CODE_HANDLE**ppIndirection) +{ + API_ENTER(getJustMyCodeHandle); + CORINFO_JUST_MY_CODE_HANDLE temp = wrapHnd->getJustMyCodeHandle(method, ppIndirection); + API_LEAVE(getJustMyCodeHandle); + return temp; +} + +void WrapICorJitInfo::GetProfilingHandle( + BOOL *pbHookFunction, + void **pProfilerHandle, + BOOL *pbIndirectedHandles) +{ + API_ENTER(GetProfilingHandle); + wrapHnd->GetProfilingHandle(pbHookFunction, pProfilerHandle, pbIndirectedHandles); + API_LEAVE(GetProfilingHandle); +} + +void WrapICorJitInfo::getCallInfo( + CORINFO_RESOLVED_TOKEN * pResolvedToken, + CORINFO_RESOLVED_TOKEN * pConstrainedResolvedToken, + CORINFO_METHOD_HANDLE callerHandle, + CORINFO_CALLINFO_FLAGS flags, + CORINFO_CALL_INFO *pResult) +{ + API_ENTER(getCallInfo); + wrapHnd->getCallInfo(pResolvedToken, pConstrainedResolvedToken, callerHandle, flags, pResult); + API_LEAVE(getCallInfo); +} + +BOOL WrapICorJitInfo::canAccessFamily(CORINFO_METHOD_HANDLE hCaller, + CORINFO_CLASS_HANDLE hInstanceType) +{ + API_ENTER(canAccessFamily); + BOOL temp = wrapHnd->canAccessFamily(hCaller, hInstanceType); + API_LEAVE(canAccessFamily); + return temp; +} + +BOOL WrapICorJitInfo::isRIDClassDomainID(CORINFO_CLASS_HANDLE cls) +{ + API_ENTER(isRIDClassDomainID); + BOOL result = wrapHnd->isRIDClassDomainID(cls); + API_LEAVE(isRIDClassDomainID); + return result; +} + +unsigned WrapICorJitInfo::getClassDomainID( + CORINFO_CLASS_HANDLE cls, + void **ppIndirection) +{ + API_ENTER(getClassDomainID); + unsigned temp = wrapHnd->getClassDomainID(cls, ppIndirection); + API_LEAVE(getClassDomainID); + return temp; +} + +void* WrapICorJitInfo::getFieldAddress( + CORINFO_FIELD_HANDLE field, + void **ppIndirection) +{ + API_ENTER(getFieldAddress); + void *temp = wrapHnd->getFieldAddress(field, ppIndirection); + API_LEAVE(getFieldAddress); + return temp; +} + +CORINFO_VARARGS_HANDLE WrapICorJitInfo::getVarArgsHandle( + CORINFO_SIG_INFO *pSig, + void **ppIndirection) +{ + API_ENTER(getVarArgsHandle); + CORINFO_VARARGS_HANDLE temp = wrapHnd->getVarArgsHandle(pSig, ppIndirection); + API_LEAVE(getVarArgsHandle); + return temp; +} + +bool WrapICorJitInfo::canGetVarArgsHandle( + CORINFO_SIG_INFO *pSig) +{ + API_ENTER(canGetVarArgsHandle); + bool temp = wrapHnd->canGetVarArgsHandle(pSig); + API_LEAVE(canGetVarArgsHandle); + return temp; +} + +InfoAccessType WrapICorJitInfo::constructStringLiteral( + CORINFO_MODULE_HANDLE module, + mdToken metaTok, + void **ppValue) +{ + API_ENTER(constructStringLiteral); + InfoAccessType temp = wrapHnd->constructStringLiteral(module, metaTok, ppValue); + API_LEAVE(constructStringLiteral); + return temp; +} + +InfoAccessType WrapICorJitInfo::emptyStringLiteral(void **ppValue) +{ + API_ENTER(emptyStringLiteral); + InfoAccessType temp = wrapHnd->emptyStringLiteral(ppValue); + API_LEAVE(emptyStringLiteral); + return temp; +} + +DWORD WrapICorJitInfo::getFieldThreadLocalStoreID( + CORINFO_FIELD_HANDLE field, + void **ppIndirection) +{ + API_ENTER(getFieldThreadLocalStoreID); + DWORD temp = wrapHnd->getFieldThreadLocalStoreID(field, ppIndirection); + API_LEAVE(getFieldThreadLocalStoreID); + return temp; +} + +void WrapICorJitInfo::setOverride( + ICorDynamicInfo *pOverride, + CORINFO_METHOD_HANDLE currentMethod) +{ + API_ENTER(setOverride); + wrapHnd->setOverride(pOverride, currentMethod); + API_LEAVE(setOverride); +} + +void WrapICorJitInfo::addActiveDependency( + CORINFO_MODULE_HANDLE moduleFrom, + CORINFO_MODULE_HANDLE moduleTo) +{ + API_ENTER(addActiveDependency); + wrapHnd->addActiveDependency(moduleFrom, moduleTo); + API_LEAVE(addActiveDependency); +} + +CORINFO_METHOD_HANDLE WrapICorJitInfo::GetDelegateCtor( + CORINFO_METHOD_HANDLE methHnd, + CORINFO_CLASS_HANDLE clsHnd, + CORINFO_METHOD_HANDLE targetMethodHnd, + DelegateCtorArgs * pCtorData) +{ + API_ENTER(GetDelegateCtor); + CORINFO_METHOD_HANDLE temp = wrapHnd->GetDelegateCtor(methHnd, clsHnd, targetMethodHnd, pCtorData); + API_LEAVE(GetDelegateCtor); + return temp; +} + +void WrapICorJitInfo::MethodCompileComplete( + CORINFO_METHOD_HANDLE methHnd) +{ + API_ENTER(MethodCompileComplete); + wrapHnd->MethodCompileComplete(methHnd); + API_LEAVE(MethodCompileComplete); +} + +void* WrapICorJitInfo::getTailCallCopyArgsThunk( + CORINFO_SIG_INFO *pSig, + CorInfoHelperTailCallSpecialHandling flags) +{ + API_ENTER(getTailCallCopyArgsThunk); + void *result = wrapHnd->getTailCallCopyArgsThunk(pSig, flags); + API_LEAVE(getTailCallCopyArgsThunk); + return result; +} + +/*********************************************************************************/ +// +// ICorJitInfo +// +/*********************************************************************************/ + +#if COR_JIT_EE_VERSION > 460 + +DWORD WrapICorJitInfo::getJitFlags(CORJIT_FLAGS *jitFlags, DWORD sizeInBytes) +{ + API_ENTER(getJitFlags); + DWORD result = wrapHnd->getJitFlags(jitFlags, sizeInBytes); + API_LEAVE(getJitFlags); + return result; +} + +bool WrapICorJitInfo::runWithErrorTrap(void(*function)(void*), void *param) +{ + return wrapHnd->runWithErrorTrap(function, param); +} + +#endif + +IEEMemoryManager* WrapICorJitInfo::getMemoryManager() +{ + API_ENTER(getMemoryManager); + IEEMemoryManager * temp = wrapHnd->getMemoryManager(); + API_LEAVE(getMemoryManager); + return temp; +} + +void WrapICorJitInfo::allocMem( + ULONG hotCodeSize, /* IN */ + ULONG coldCodeSize, /* IN */ + ULONG roDataSize, /* IN */ + ULONG xcptnsCount, /* IN */ + CorJitAllocMemFlag flag, /* IN */ + void ** hotCodeBlock, /* OUT */ + void ** coldCodeBlock, /* OUT */ + void ** roDataBlock /* OUT */) +{ + API_ENTER(allocMem); + wrapHnd->allocMem(hotCodeSize, coldCodeSize, roDataSize, xcptnsCount, flag, hotCodeBlock, coldCodeBlock, roDataBlock); + API_LEAVE(allocMem); +} + +void WrapICorJitInfo::reserveUnwindInfo( + BOOL isFunclet, /* IN */ + BOOL isColdCode, /* IN */ + ULONG unwindSize /* IN */) +{ + API_ENTER(reserveUnwindInfo); + wrapHnd->reserveUnwindInfo(isFunclet, isColdCode, unwindSize); + API_LEAVE(reserveUnwindInfo); +} + +void WrapICorJitInfo::allocUnwindInfo( + BYTE * pHotCode, /* IN */ + BYTE * pColdCode, /* IN */ + ULONG startOffset, /* IN */ + ULONG endOffset, /* IN */ + ULONG unwindSize, /* IN */ + BYTE * pUnwindBlock, /* IN */ + CorJitFuncKind funcKind /* IN */) +{ + API_ENTER(allocUnwindInfo); + wrapHnd->allocUnwindInfo(pHotCode, pColdCode, startOffset, endOffset, unwindSize, pUnwindBlock, funcKind); + API_LEAVE(allocUnwindInfo); +} + +void *WrapICorJitInfo::allocGCInfo(size_t size /* IN */) +{ + API_ENTER(allocGCInfo); + void *temp = wrapHnd->allocGCInfo(size); + API_LEAVE(allocGCInfo); + return temp; +} + +void WrapICorJitInfo::yieldExecution() +{ + API_ENTER(yieldExecution); //Nothing to record + wrapHnd->yieldExecution(); + API_LEAVE(yieldExecution); //Nothing to recor) +} + +void WrapICorJitInfo::setEHcount(unsigned cEH /* IN */) +{ + API_ENTER(setEHcount); + wrapHnd->setEHcount(cEH); + API_LEAVE(setEHcount); +} + +void WrapICorJitInfo::setEHinfo( + unsigned EHnumber, /* IN */ + const CORINFO_EH_CLAUSE *clause /* IN */) +{ + API_ENTER(setEHinfo); + wrapHnd->setEHinfo(EHnumber, clause); + API_LEAVE(setEHinfo); +} + +BOOL WrapICorJitInfo::logMsg(unsigned level, const char* fmt, va_list args) +{ + API_ENTER(logMsg); + BOOL result = wrapHnd->logMsg(level, fmt, args); + API_LEAVE(logMsg); + return result; +} + +int WrapICorJitInfo::doAssert(const char* szFile, int iLine, const char* szExpr) +{ + API_ENTER(doAssert); + int result = wrapHnd->doAssert(szFile, iLine, szExpr); + API_LEAVE(doAssert); + return result; +} + +void WrapICorJitInfo::reportFatalError(CorJitResult result) +{ + API_ENTER(reportFatalError); + wrapHnd->reportFatalError(result); + API_LEAVE(reportFatalError); +} + +HRESULT WrapICorJitInfo::allocBBProfileBuffer( + ULONG count, + ProfileBuffer **profileBuffer) +{ + API_ENTER(allocBBProfileBuffer); + HRESULT result = wrapHnd->allocBBProfileBuffer(count, profileBuffer); + API_LEAVE(allocBBProfileBuffer); + return result; +} + +HRESULT WrapICorJitInfo::getBBProfileData( + CORINFO_METHOD_HANDLE ftnHnd, + ULONG *count, + ProfileBuffer **profileBuffer, + ULONG *numRuns) +{ + API_ENTER(getBBProfileData); + HRESULT temp = wrapHnd->getBBProfileData(ftnHnd, count, profileBuffer, numRuns); + API_LEAVE(getBBProfileData); + return temp; +} + +void WrapICorJitInfo::recordCallSite( + ULONG instrOffset, /* IN */ + CORINFO_SIG_INFO * callSig, /* IN */ + CORINFO_METHOD_HANDLE methodHandle /* IN */) +{ + API_ENTER(recordCallSite); + wrapHnd->recordCallSite(instrOffset, callSig, methodHandle); + API_LEAVE(recordCallSite); +} + +void WrapICorJitInfo::recordRelocation( + void *location, /* IN */ + void *target, /* IN */ + WORD fRelocType, /* IN */ + WORD slotNum, /* IN */ + INT32 addlDelta /* IN */) +{ + API_ENTER(recordRelocation); + wrapHnd->recordRelocation(location, target, fRelocType, slotNum, addlDelta); + API_LEAVE(recordRelocation); +} + +WORD WrapICorJitInfo::getRelocTypeHint(void *target) +{ + API_ENTER(getRelocTypeHint); + WORD result = wrapHnd->getRelocTypeHint(target); + API_LEAVE(getRelocTypeHint); + return result; +} + +void WrapICorJitInfo::getModuleNativeEntryPointRange( + void **pStart, /* OUT */ + void **pEnd /* OUT */) +{ + API_ENTER(getModuleNativeEntryPointRange); + wrapHnd->getModuleNativeEntryPointRange(pStart, pEnd); + API_LEAVE(getModuleNativeEntryPointRange); +} + +DWORD WrapICorJitInfo::getExpectedTargetArchitecture() +{ + API_ENTER(getExpectedTargetArchitecture); + DWORD result = wrapHnd->getExpectedTargetArchitecture(); + API_LEAVE(getExpectedTargetArchitecture); + return result; +} + +/**********************************************************************************/ +// clang-format on +/**********************************************************************************/ diff --git a/src/jit/assertionprop.cpp b/src/jit/assertionprop.cpp index fe35c3b780..cb0832fe47 100644 --- a/src/jit/assertionprop.cpp +++ b/src/jit/assertionprop.cpp @@ -1100,11 +1100,6 @@ Compiler::AssertionIndex Compiler::optCreateAssertion(GenTreePtr op1, CNS_COMMON: { - // TODO-1stClassStructs: handle constant propagation to struct types. - if (varTypeIsStruct(lclVar)) - { - goto DONE_ASSERTION; - } // // Must either be an OAK_EQUAL or an OAK_NOT_EQUAL assertion // @@ -2034,12 +2029,7 @@ void Compiler::optAssertionGen(GenTreePtr tree) { case GT_ASG: // VN takes care of non local assertions for assignments and data flow. - // TODO-1stClassStructs: Enable assertion prop for struct types. - if (varTypeIsStruct(tree)) - { - // Do nothing. - } - else if (optLocalAssertionProp) + if (optLocalAssertionProp) { assertionIndex = optCreateAssertion(tree->gtOp.gtOp1, tree->gtOp.gtOp2, OAK_EQUAL); } @@ -2052,26 +2042,15 @@ void Compiler::optAssertionGen(GenTreePtr tree) case GT_OBJ: case GT_BLK: case GT_DYN_BLK: - // TODO-1stClassStructs: These should always be considered to create a non-null - // assertion, but previously, when these indirections were implicit due to a block - // copy or init, they were not being considered to do so. - break; case GT_IND: - // TODO-1stClassStructs: All indirections should be considered to create a non-null - // assertion, but previously, when these indirections were implicit due to a block - // copy or init, they were not being considered to do so. - if (tree->gtType == TYP_STRUCT) - { - GenTree* parent = tree->gtGetParent(nullptr); - if ((parent != nullptr) && (parent->gtOper == GT_ASG)) - { - break; - } - } case GT_NULLCHECK: + // All indirections create non-null assertions + assertionIndex = optCreateAssertion(tree->AsIndir()->Addr(), nullptr, OAK_NOT_EQUAL); + break; + case GT_ARR_LENGTH: - // An array length can create a non-null assertion - assertionIndex = optCreateAssertion(tree->gtOp.gtOp1, nullptr, OAK_NOT_EQUAL); + // An array length is an indirection (but doesn't derive from GenTreeIndir). + assertionIndex = optCreateAssertion(tree->AsArrLen()->ArrRef(), nullptr, OAK_NOT_EQUAL); break; case GT_ARR_BOUNDS_CHECK: @@ -2629,9 +2608,29 @@ GenTreePtr Compiler::optConstantAssertionProp(AssertionDsc* curAssertion, else { bool isArrIndex = ((tree->gtFlags & GTF_VAR_ARR_INDEX) != 0); - newTree->ChangeOperConst(GT_CNS_INT); - newTree->gtIntCon.gtIconVal = curAssertion->op2.u1.iconVal; - newTree->ClearIconHandleMask(); + // If we have done constant propagation of a struct type, it is only valid for zero-init, + // and we have to ensure that we have the right zero for the type. + if (varTypeIsStruct(tree)) + { + assert(curAssertion->op2.u1.iconVal == 0); + } +#ifdef FEATURE_SIMD + if (varTypeIsSIMD(tree)) + { + var_types simdType = tree->TypeGet(); + tree->ChangeOperConst(GT_CNS_DBL); + GenTree* initVal = tree; + initVal->gtType = TYP_FLOAT; + newTree = + gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, TYP_FLOAT, genTypeSize(simdType)); + } + else +#endif // FEATURE_SIMD + { + newTree->ChangeOperConst(GT_CNS_INT); + newTree->gtIntCon.gtIconVal = curAssertion->op2.u1.iconVal; + newTree->ClearIconHandleMask(); + } // If we're doing an array index address, assume any constant propagated contributes to the index. if (isArrIndex) { @@ -3421,32 +3420,13 @@ GenTreePtr Compiler::optAssertionProp_Ind(ASSERT_VALARG_TP assertions, const Gen { assert(tree->OperIsIndir()); - // TODO-1stClassStructs: All indirections should be handled here, but - // previously, when these indirections were GT_OBJ, or implicit due to a block - // copy or init, they were not being handled. - if (tree->TypeGet() == TYP_STRUCT) - { - if (tree->OperIsBlk()) - { - return nullptr; - } - else - { - GenTree* parent = tree->gtGetParent(nullptr); - if ((parent != nullptr) && parent->OperIsBlkOp()) - { - return nullptr; - } - } - } - if (!(tree->gtFlags & GTF_EXCEPT)) { return nullptr; } // Check for add of a constant. - GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op1 = tree->AsIndir()->Addr(); if ((op1->gtOper == GT_ADD) && (op1->gtOp.gtOp2->gtOper == GT_CNS_INT)) { op1 = op1->gtOp.gtOp1; @@ -3700,6 +3680,21 @@ GenTreePtr Compiler::optAssertionProp_BndsChk(ASSERT_VALARG_TP assertions, const assert(tree->gtOper == GT_ARR_BOUNDS_CHECK); +#ifdef FEATURE_ENABLE_NO_RANGE_CHECKS + if (JitConfig.JitNoRangeChks()) + { +#ifdef DEBUG + if (verbose) + { + printf("\nFlagging check redundant due to JitNoRangeChks in BB%02u:\n", compCurBB->bbNum); + gtDispTree(tree, nullptr, nullptr, true); + } +#endif // DEBUG + tree->gtFlags |= GTF_ARR_BOUND_INBND; + return nullptr; + } +#endif // FEATURE_ENABLE_NO_RANGE_CHECKS + BitVecOps::Iter iter(apTraits, assertions); unsigned index = 0; while (iter.NextElem(apTraits, &index)) @@ -4688,9 +4683,8 @@ GenTreePtr Compiler::optVNConstantPropOnJTrue(BasicBlock* block, GenTreePtr stmt newStmt = fgInsertStmtNearEnd(block, sideEffList); sideEffList = nullptr; } - fgMorphBlockStmt(block, newStmt DEBUGARG(__FUNCTION__)); - gtSetStmtInfo(newStmt); - fgSetStmtSeq(newStmt); + + fgMorphBlockStmt(block, newStmt->AsStmt() DEBUGARG(__FUNCTION__)); } // Transform the relop's operands to be both zeroes. @@ -4748,7 +4742,6 @@ Compiler::fgWalkResult Compiler::optVNConstantPropCurStmt(BasicBlock* block, Gen case GT_MOD: case GT_UDIV: case GT_UMOD: - case GT_MULHI: case GT_EQ: case GT_NE: case GT_LT: @@ -4767,6 +4760,10 @@ Compiler::fgWalkResult Compiler::optVNConstantPropCurStmt(BasicBlock* block, Gen case GT_INTRINSIC: break; + case GT_MULHI: + assert(false && "Unexpected GT_MULHI node encountered before lowering"); + break; + case GT_JTRUE: break; @@ -4911,9 +4908,7 @@ GenTreePtr Compiler::optVNAssertionPropCurStmt(BasicBlock* block, GenTreePtr stm if (optAssertionPropagatedCurrentStmt) { - fgMorphBlockStmt(block, stmt DEBUGARG("optVNAssertionPropCurStmt")); - gtSetStmtInfo(stmt); - fgSetStmtSeq(stmt); + fgMorphBlockStmt(block, stmt->AsStmt() DEBUGARG("optVNAssertionPropCurStmt")); } // Check if propagation removed statements starting from current stmt. @@ -5110,13 +5105,7 @@ void Compiler::optAssertionPropMain() } #endif // Re-morph the statement. - fgMorphBlockStmt(block, stmt DEBUGARG("optAssertionPropMain")); - - // Recalculate the gtCostSz, etc... - gtSetStmtInfo(stmt); - - // Re-thread the nodes - fgSetStmtSeq(stmt); + fgMorphBlockStmt(block, stmt->AsStmt() DEBUGARG("optAssertionPropMain")); } // Check if propagation removed statements starting from current stmt. diff --git a/src/jit/bitsetasuint64.h b/src/jit/bitsetasuint64.h index 150f7e9d61..243e9e33b4 100644 --- a/src/jit/bitsetasuint64.h +++ b/src/jit/bitsetasuint64.h @@ -167,7 +167,7 @@ public: { IAllocator* alloc = BitSetTraits::GetDebugOnlyAllocator(env); const int CharsForUINT64 = sizeof(UINT64) * 2; - char* res = NULL; + char* res = nullptr; const int AllocSize = CharsForUINT64 + 4; res = (char*)alloc->Alloc(AllocSize); UINT64 bits = bs; diff --git a/src/jit/block.cpp b/src/jit/block.cpp index 2d37754ec5..47f1052cc8 100644 --- a/src/jit/block.cpp +++ b/src/jit/block.cpp @@ -554,7 +554,9 @@ void BasicBlock::dspBlockHeader(Compiler* compiler, } if (showFlags) { - printf(" flags=0x%08x: ", bbFlags); + const unsigned lowFlags = (unsigned)bbFlags; + const unsigned highFlags = (unsigned)(bbFlags >> 32); + printf(" flags=0x%08x.%08x: ", highFlags, lowFlags); dspFlags(); } printf("\n"); @@ -568,7 +570,25 @@ void* BasicBlock::HeapPhiArg::operator new(size_t sz, Compiler* comp) return comp->compGetMem(sz, CMK_HeapPhiArg); } -void BasicBlock::CloneBlockState(Compiler* compiler, BasicBlock* to, const BasicBlock* from) +//------------------------------------------------------------------------ +// CloneBlockState: Try to populate `to` block with a copy of `from` block's statements, replacing +// uses of local `varNum` with IntCns `varVal`. +// +// Arguments: +// compiler - Jit compiler instance +// to - New/empty block to copy statements into +// from - Block to copy statements from +// varNum - lclVar uses with lclNum `varNum` will be replaced; can be ~0 to indicate no replacement. +// varVal - If replacing uses of `varNum`, replace them with int constants with value `varVal`. +// +// Return Value: +// Cloning may fail because this routine uses `gtCloneExpr` for cloning and it can't handle all +// IR nodes. If cloning of any statement fails, `false` will be returned and block `to` may be +// partially populated. If cloning of all statements succeeds, `true` will be returned and +// block `to` will be fully populated. + +bool BasicBlock::CloneBlockState( + Compiler* compiler, BasicBlock* to, const BasicBlock* from, unsigned varNum, int varVal) { assert(to->bbTreeList == nullptr); @@ -595,9 +615,17 @@ void BasicBlock::CloneBlockState(Compiler* compiler, BasicBlock* to, const Basic for (GenTreePtr fromStmt = from->bbTreeList; fromStmt != nullptr; fromStmt = fromStmt->gtNext) { - compiler->fgInsertStmtAtEnd(to, - compiler->fgNewStmtFromTree(compiler->gtCloneExpr(fromStmt->gtStmt.gtStmtExpr))); + auto newExpr = compiler->gtCloneExpr(fromStmt->gtStmt.gtStmtExpr, 0, varNum, varVal); + if (!newExpr) + { + // gtCloneExpr doesn't handle all opcodes, so may fail to clone a statement. + // When that happens, it returns nullptr; abandon the rest of this block and + // return `false` to the caller to indicate that cloning was unsuccessful. + return false; + } + compiler->fgInsertStmtAtEnd(to, compiler->fgNewStmtFromTree(newExpr)); } + return true; } // LIR helpers @@ -667,7 +695,6 @@ GenTreeStmt* BasicBlock::lastStmt() return result->AsStmt(); } - //------------------------------------------------------------------------ // BasicBlock::firstNode: Returns the first node in the block. // diff --git a/src/jit/block.h b/src/jit/block.h index ecfbb620a1..99c0efc1a7 100644 --- a/src/jit/block.h +++ b/src/jit/block.h @@ -30,17 +30,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "simplerhash.h" /*****************************************************************************/ - +typedef BitVec EXPSET_TP; #if LARGE_EXPSET -typedef unsigned __int64 EXPSET_TP; #define EXPSET_SZ 64 #else -typedef unsigned int EXPSET_TP; #define EXPSET_SZ 32 #endif -#define EXPSET_ALL ((EXPSET_TP)0 - 1) - typedef BitVec ASSERT_TP; typedef BitVec_ValArg_T ASSERT_VALARG_TP; typedef BitVec_ValRet_T ASSERT_VALRET_TP; @@ -291,14 +287,14 @@ struct BasicBlock : private LIR::Range } } + unsigned __int64 bbFlags; // see BBF_xxxx below + unsigned bbNum; // the block's number unsigned bbPostOrderNum; // the block's post order number in the graph. unsigned bbRefs; // number of blocks that can reach here, either by fall-through or a branch. If this falls to zero, // the block is unreachable. - unsigned bbFlags; // see BBF_xxxx below - #define BBF_VISITED 0x00000001 // BB visited during optimizations #define BBF_MARKED 0x00000002 // BB marked during optimizations #define BBF_CHANGED 0x00000004 // input/output of this block has changed @@ -357,6 +353,10 @@ struct BasicBlock : private LIR::Range // BBJ_CALLFINALLY block, as well as, on x86, the final step block out of a // finally. +// Flags that relate blocks to loop structure. + +#define BBF_LOOP_FLAGS (BBF_LOOP_PREHEADER | BBF_LOOP_HEAD | BBF_LOOP_CALL0 | BBF_LOOP_CALL1) + bool isRunRarely() { return ((bbFlags & BBF_RUN_RARELY) != 0); @@ -860,9 +860,7 @@ struct BasicBlock : private LIR::Range unsigned bbHeapSsaNumIn; // The SSA # of "Heap" on entry to the block. unsigned bbHeapSsaNumOut; // The SSA # of "Heap" on exit from the block. -#ifdef DEBUGGING_SUPPORT VARSET_TP bbScope; // variables in scope over the block -#endif void InitVarSets(class Compiler* comp); @@ -1094,9 +1092,11 @@ public: return AllSuccs(comp, this); } - // Clone block state and statements from 'from' block to 'to' block. - // Assumes that "to" is an empty block. - static void CloneBlockState(Compiler* compiler, BasicBlock* to, const BasicBlock* from); + // Try to clone block state and statements from `from` block to `to` block (which must be new/empty), + // optionally replacing uses of local `varNum` with IntCns `varVal`. Return true if all statements + // in the block are cloned successfully, false (with partially-populated `to` block) if one fails. + static bool CloneBlockState( + Compiler* compiler, BasicBlock* to, const BasicBlock* from, unsigned varNum = (unsigned)-1, int varVal = 0); void MakeLIR(GenTree* firstNode, GenTree* lastNode); bool IsLIR(); diff --git a/src/jit/codegen.h b/src/jit/codegen.h index 0c4a311186..c6e38ab6af 100755 --- a/src/jit/codegen.h +++ b/src/jit/codegen.h @@ -48,7 +48,6 @@ public: unsigned* cnsPtr, bool nogen = false); - private: #if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87 // Bit masks used in negating a float or double number. @@ -123,7 +122,7 @@ private: void genRangeCheck(GenTree* node); - void genLockedInstructions(GenTree* node); + void genLockedInstructions(GenTreeOp* node); //------------------------------------------------------------------------- // Register-related methods @@ -251,6 +250,8 @@ protected: void genAdjustSP(ssize_t delta); + void genAdjustStackLevel(BasicBlock* block); + void genExitCode(BasicBlock* block); //------------------------------------------------------------------------- @@ -488,15 +489,26 @@ protected: void genAmd64EmitterUnitTests(); #endif -//------------------------------------------------------------------------- -// -// End prolog/epilog generation -// -//------------------------------------------------------------------------- + //------------------------------------------------------------------------- + // + // End prolog/epilog generation + // + //------------------------------------------------------------------------- -/*****************************************************************************/ -#ifdef DEBUGGING_SUPPORT -/*****************************************************************************/ + void genSinglePush(); + void genSinglePop(); + regMaskTP genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs); + void genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs); + +/* +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Debugging Support XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ #ifdef DEBUG void genIPmappingDisp(unsigned mappingNum, Compiler::IPmappingDsc* ipMapping); @@ -730,10 +742,6 @@ protected: unsigned genTrnslLocalVarCount; #endif -/*****************************************************************************/ -#endif // DEBUGGING_SUPPORT -/*****************************************************************************/ - #ifndef LEGACY_BACKEND #include "codegenlinear.h" #else // LEGACY_BACKEND diff --git a/src/jit/codegenarm.cpp b/src/jit/codegenarm.cpp index 4ce82307f9..73e51f2ef7 100644 --- a/src/jit/codegenarm.cpp +++ b/src/jit/codegenarm.cpp @@ -27,102 +27,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "gcinfoencoder.h" #endif -// Get the register assigned to the given node - -regNumber CodeGenInterface::genGetAssignedReg(GenTreePtr tree) -{ - return tree->gtRegNum; -} - -//------------------------------------------------------------------------ -// genSpillVar: Spill a local variable -// -// Arguments: -// tree - the lclVar node for the variable being spilled -// -// Return Value: -// None. -// -// Assumptions: -// The lclVar must be a register candidate (lvRegCandidate) - -void CodeGen::genSpillVar(GenTreePtr tree) -{ - regMaskTP regMask; - unsigned varNum = tree->gtLclVarCommon.gtLclNum; - LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); - - // We don't actually need to spill if it is already living in memory - bool needsSpill = ((tree->gtFlags & GTF_VAR_DEF) == 0 && varDsc->lvIsInReg()); - if (needsSpill) - { - bool restoreRegVar = false; - if (tree->gtOper == GT_REG_VAR) - { - tree->SetOper(GT_LCL_VAR); - restoreRegVar = true; - } - - // mask off the flag to generate the right spill code, then bring it back - tree->gtFlags &= ~GTF_REG_VAL; - - instruction storeIns = ins_Store(tree->TypeGet()); - - if (varTypeIsMultiReg(tree)) - { - assert(varDsc->lvRegNum == genRegPairLo(tree->gtRegPair)); - assert(varDsc->lvOtherReg == genRegPairHi(tree->gtRegPair)); - regNumber regLo = genRegPairLo(tree->gtRegPair); - regNumber regHi = genRegPairHi(tree->gtRegPair); - inst_TT_RV(storeIns, tree, regLo); - inst_TT_RV(storeIns, tree, regHi, 4); - } - else - { - assert(varDsc->lvRegNum == tree->gtRegNum); - inst_TT_RV(storeIns, tree, tree->gtRegNum); - } - tree->gtFlags |= GTF_REG_VAL; - - if (restoreRegVar) - { - tree->SetOper(GT_REG_VAR); - } - - genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree)); - gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask()); - - if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex)) - { -#ifdef DEBUG - if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) - { - JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum); - } - else - { - JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum); - } -#endif - VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); - } - } - - tree->gtFlags &= ~GTF_SPILL; - varDsc->lvRegNum = REG_STK; - if (varTypeIsMultiReg(tree)) - { - varDsc->lvOtherReg = REG_STK; - } -} - -// inline -void CodeGenInterface::genUpdateVarReg(LclVarDsc* varDsc, GenTreePtr tree) -{ - assert(tree->OperIsScalarLocal() || (tree->gtOper == GT_COPY)); - varDsc->lvRegNum = tree->gtRegNum; -} - /***************************************************************************** * * Generate code that will set the given register to the integer constant. @@ -157,735 +61,22 @@ void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFla */ void CodeGen::genEmitGSCookieCheck(bool pushReg) { - NYI("ARM genEmitGSCookieCheck is not yet implemented for protojit"); + NYI("ARM genEmitGSCookieCheck"); } -/***************************************************************************** - * - * Generate code for all the basic blocks in the function. - */ - -void CodeGen::genCodeForBBlist() +BasicBlock* CodeGen::genCallFinally(BasicBlock* block, BasicBlock* lblk) { - unsigned varNum; - LclVarDsc* varDsc; - - unsigned savedStkLvl; - -#ifdef DEBUG - genInterruptibleUsed = true; - - // You have to be careful if you create basic blocks from now on - compiler->fgSafeBasicBlockCreation = false; - - // This stress mode is not comptible with fully interruptible GC - if (genInterruptible && compiler->opts.compStackCheckOnCall) - { - compiler->opts.compStackCheckOnCall = false; - } - - // This stress mode is not comptible with fully interruptible GC - if (genInterruptible && compiler->opts.compStackCheckOnRet) - { - compiler->opts.compStackCheckOnRet = false; - } -#endif - - // Prepare the blocks for exception handling codegen: mark the blocks that needs labels. - genPrepForEHCodegen(); - - assert(!compiler->fgFirstBBScratch || - compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first. - - /* Initialize the spill tracking logic */ - - regSet.rsSpillBeg(); - -#ifdef DEBUGGING_SUPPORT - /* Initialize the line# tracking logic */ - - if (compiler->opts.compScopeInfo) - { - siInit(); - } -#endif - - if (compiler->opts.compDbgEnC) - { - noway_assert(isFramePointerUsed()); - regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE); - } - - /* If we have any pinvoke calls, we might potentially trash everything */ - if (compiler->info.compCallUnmanaged) - { - noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame - regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE); - } - - genPendingCallLabel = nullptr; - - /* Initialize the pointer tracking code */ - - gcInfo.gcRegPtrSetInit(); - gcInfo.gcVarPtrSetInit(); - - /* If any arguments live in registers, mark those regs as such */ - - for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++) - { - /* Is this variable a parameter assigned to a register? */ - - if (!varDsc->lvIsParam || !varDsc->lvRegister) - continue; - - /* Is the argument live on entry to the method? */ - - if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex)) - continue; - - /* Is this a floating-point argument? */ - - if (varDsc->IsFloatRegType()) - continue; - - noway_assert(!varTypeIsFloating(varDsc->TypeGet())); - - /* Mark the register as holding the variable */ - - regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum); - } - - unsigned finallyNesting = 0; - - // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without - // allocation at the start of each basic block. - VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler)); - - /*------------------------------------------------------------------------- - * - * Walk the basic blocks and generate code for each one - * - */ - - BasicBlock* block; - BasicBlock* lblk; /* previous block */ - - for (lblk = NULL, block = compiler->fgFirstBB; block != NULL; lblk = block, block = block->bbNext) - { -#ifdef DEBUG - if (compiler->verbose) - { - printf("\n=============== Generating "); - block->dspBlockHeader(compiler, true, true); - compiler->fgDispBBLiveness(block); - } -#endif // DEBUG - - /* Figure out which registers hold variables on entry to this block */ - - regSet.ClearMaskVars(); - gcInfo.gcRegGCrefSetCur = RBM_NONE; - gcInfo.gcRegByrefSetCur = RBM_NONE; - - compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(block); - - genUpdateLife(block->bbLiveIn); - - // Even if liveness didn't change, we need to update the registers containing GC references. - // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't - // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change - // here. That would require handling the changes in recordVarLocationsAtStartOfBB(). - - regMaskTP newLiveRegSet = RBM_NONE; - regMaskTP newRegGCrefSet = RBM_NONE; - regMaskTP newRegByrefSet = RBM_NONE; - VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex); - while (iter.NextElem(compiler, &varIndex)) - { - unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; - LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); - - if (varDsc->lvIsInReg()) - { - newLiveRegSet |= varDsc->lvRegMask(); - if (varDsc->lvType == TYP_REF) - { - newRegGCrefSet |= varDsc->lvRegMask(); - } - else if (varDsc->lvType == TYP_BYREF) - { - newRegByrefSet |= varDsc->lvRegMask(); - } - } - else if (varDsc->lvType == TYP_REF || varDsc->lvType == TYP_BYREF) - { - VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex); - } - } - - regSet.rsMaskVars = newLiveRegSet; - gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUGARG(true)); - gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUGARG(true)); - - /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to - represent the exception object (TYP_REF). - We mark REG_EXCEPTION_OBJECT as holding a GC object on entry - to the block, it will be the first thing evaluated - (thanks to GTF_ORDER_SIDEEFF). - */ - - if (handlerGetsXcptnObj(block->bbCatchTyp)) - { - for (GenTree* node : LIR::AsRange(block)) - { - if (node->OperGet() == GT_CATCH_ARG) - { - gcInfo.gcMarkRegSetGCref(RBM_EXCEPTION_OBJECT); - break; - } - } - } - - /* Start a new code output block */ - CLANG_FORMAT_COMMENT_ANCHOR; - -#if FEATURE_EH_FUNCLETS -#if defined(_TARGET_ARM_) - // If this block is the target of a finally return, we need to add a preceding NOP, in the same EH region, - // so the unwinder doesn't get confused by our "movw lr, xxx; movt lr, xxx; b Lyyy" calling convention that - // calls the funclet during non-exceptional control flow. - if (block->bbFlags & BBF_FINALLY_TARGET) - { - assert(block->bbFlags & BBF_JMP_TARGET); - -#ifdef DEBUG - if (compiler->verbose) - { - printf("\nEmitting finally target NOP predecessor for BB%02u\n", block->bbNum); - } -#endif - // Create a label that we'll use for computing the start of an EH region, if this block is - // at the beginning of such a region. If we used the existing bbEmitCookie as is for - // determining the EH regions, then this NOP would end up outside of the region, if this - // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP - // would be executed, which we would prefer not to do. - - block->bbUnwindNopEmitCookie = - getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur); - - instGen(INS_nop); - } -#endif // defined(_TARGET_ARM_) - - genUpdateCurrentFunclet(block); -#endif // FEATURE_EH_FUNCLETS - -#ifdef _TARGET_XARCH_ - if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD) - { - getEmitter()->emitLoopAlign(); - } -#endif - -#ifdef DEBUG - if (compiler->opts.dspCode) - printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum); -#endif - - block->bbEmitCookie = NULL; - - if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL)) - { - /* Mark a label and update the current set of live GC refs */ - - block->bbEmitCookie = - getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, - /*isFinally*/ block->bbFlags & BBF_FINALLY_TARGET); - } - - if (block == compiler->fgFirstColdBlock) - { -#ifdef DEBUG - if (compiler->verbose) - { - printf("\nThis is the start of the cold region of the method\n"); - } -#endif - // We should never have a block that falls through into the Cold section - noway_assert(!lblk->bbFallsThrough()); - - // We require the block that starts the Cold section to have a label - noway_assert(block->bbEmitCookie); - getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie); - } - - /* Both stacks are always empty on entry to a basic block */ - - genStackLevel = 0; - -#if !FEATURE_FIXED_OUT_ARGS - /* Check for inserted throw blocks and adjust genStackLevel */ - - if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block)) - { - noway_assert(block->bbFlags & BBF_JMP_TARGET); - - genStackLevel = compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int); - - if (genStackLevel) - { - NYI("Need emitMarkStackLvl()"); - } - } -#endif // !FEATURE_FIXED_OUT_ARGS - - savedStkLvl = genStackLevel; - - /* Tell everyone which basic block we're working on */ - - compiler->compCurBB = block; - -#ifdef DEBUGGING_SUPPORT - siBeginBlock(block); - - // BBF_INTERNAL blocks don't correspond to any single IL instruction. - if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) && block != compiler->fgFirstBB) - genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true); - - bool firstMapping = true; -#endif // DEBUGGING_SUPPORT - - /*--------------------------------------------------------------------- - * - * Generate code for each statement-tree in the block - * - */ - CLANG_FORMAT_COMMENT_ANCHOR; - -#if FEATURE_EH_FUNCLETS - if (block->bbFlags & BBF_FUNCLET_BEG) - { - genReserveFuncletProlog(block); - } -#endif // FEATURE_EH_FUNCLETS - - // Clear compCurStmt and compCurLifeTree. - compiler->compCurStmt = nullptr; - compiler->compCurLifeTree = nullptr; - -#ifdef DEBUG - bool pastProfileUpdate = false; -#endif - -// Traverse the block in linear order, generating code for each node as we -// as we encounter it. -#ifdef DEBUGGING_SUPPORT - IL_OFFSETX currentILOffset = BAD_IL_OFFSET; -#endif - for (GenTree* node : LIR::AsRange(block)) - { -#ifdef DEBUGGING_SUPPORT - // Do we have a new IL offset? - if (node->OperGet() == GT_IL_OFFSET) - { - genEnsureCodeEmitted(currentILOffset); - - currentILOffset = node->gtStmt.gtStmtILoffsx; - - genIPmappingAdd(currentILOffset, firstMapping); - firstMapping = false; - } -#endif // DEBUGGING_SUPPORT - -#ifdef DEBUG - if (node->OperGet() == GT_IL_OFFSET) - { - noway_assert(node->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize || - node->gtStmt.gtStmtLastILoffs == BAD_IL_OFFSET); - - if (compiler->opts.dspCode && compiler->opts.dspInstrs && - node->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET) - { - while (genCurDispOffset <= node->gtStmt.gtStmtLastILoffs) - { - genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> "); - } - } - } -#endif // DEBUG - - genCodeForTreeNode(node); - if (node->gtHasReg() && node->gtLsraInfo.isLocalDefUse) - { - genConsumeReg(node); - } - -#ifdef DEBUG - regSet.rsSpillChk(); - - assert((node->gtFlags & GTF_SPILL) == 0); - - /* Make sure we didn't bungle pointer register tracking */ - - regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur); - regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars; - - // If return is a GC-type, clear it. Note that if a common - // epilog is generated (genReturnBB) it has a void return - // even though we might return a ref. We can't use the compRetType - // as the determiner because something we are tracking as a byref - // might be used as a return value of a int function (which is legal) - if (node->gtOper == GT_RETURN && (varTypeIsGC(compiler->info.compRetType) || - (node->gtOp.gtOp1 != 0 && varTypeIsGC(node->gtOp.gtOp1->TypeGet())))) - { - nonVarPtrRegs &= ~RBM_INTRET; - } - - // When profiling, the first few nodes in a catch block will be an update of - // the profile count (does not interfere with the exception object). - if (((compiler->opts.eeFlags & CORJIT_FLG_BBINSTR) != 0) && handlerGetsXcptnObj(block->bbCatchTyp)) - { - pastProfileUpdate = pastProfileUpdate || node->OperGet() == GT_CATCH_ARG; - if (!pastProfileUpdate) - { - nonVarPtrRegs &= ~RBM_EXCEPTION_OBJECT; - } - } - - if (nonVarPtrRegs) - { - printf("Regset after node="); - Compiler::printTreeID(node); - printf(" BB%02u gcr=", block->bbNum); - printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars); - compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars); - printf(", byr="); - printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars); - compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars); - printf(", regVars="); - printRegMaskInt(regSet.rsMaskVars); - compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars); - printf("\n"); - } - - noway_assert(nonVarPtrRegs == 0); -#endif // DEBUG - } - -#ifdef DEBUGGING_SUPPORT - // It is possible to reach the end of the block without generating code for the current IL offset. - // For example, if the following IR ends the current block, no code will have been generated for - // offset 21: - // - // ( 0, 0) [000040] ------------ il_offset void IL offset: 21 - // - // N001 ( 0, 0) [000039] ------------ nop void - // - // This can lead to problems when debugging the generated code. To prevent these issues, make sure - // we've generated code for the last IL offset we saw in the block. - genEnsureCodeEmitted(currentILOffset); - - if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0)) - { - siEndBlock(block); - - /* Is this the last block, and are there any open scopes left ? */ - - bool isLastBlockProcessed = (block->bbNext == NULL); - if (block->isBBCallAlwaysPair()) - { - isLastBlockProcessed = (block->bbNext->bbNext == NULL); - } - - if (isLastBlockProcessed && siOpenScopeList.scNext) - { - /* This assert no longer holds, because we may insert a throw - block to demarcate the end of a try or finally region when they - are at the end of the method. It would be nice if we could fix - our code so that this throw block will no longer be necessary. */ - - // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize); - - siCloseAllOpenScopes(); - } - } - -#endif // DEBUGGING_SUPPORT - - genStackLevel -= savedStkLvl; - -#ifdef DEBUG - // compCurLife should be equal to the liveOut set, except that we don't keep - // it up to date for vars that are not register candidates - // (it would be nice to have a xor set function) - - VARSET_TP VARSET_INIT_NOCOPY(extraLiveVars, VarSetOps::Diff(compiler, block->bbLiveOut, compiler->compCurLife)); - VarSetOps::UnionD(compiler, extraLiveVars, VarSetOps::Diff(compiler, compiler->compCurLife, block->bbLiveOut)); - VARSET_ITER_INIT(compiler, extraLiveVarIter, extraLiveVars, extraLiveVarIndex); - while (extraLiveVarIter.NextElem(compiler, &extraLiveVarIndex)) - { - unsigned varNum = compiler->lvaTrackedToVarNum[extraLiveVarIndex]; - LclVarDsc* varDsc = compiler->lvaTable + varNum; - assert(!varDsc->lvIsRegCandidate()); - } -#endif - - /* Both stacks should always be empty on exit from a basic block */ - - noway_assert(genStackLevel == 0); - -#ifdef _TARGET_AMD64_ - // On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several - // situations, to support proper exception handling semantics. This is mostly to ensure that when the stack - // walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region. - // The document "X64 and ARM ABIs.docx" has more details. The situations: - // 1. If the call instruction is in a different EH region as the instruction that follows it. - // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might - // be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters - // here.) - // We handle case #1 here, and case #2 in the emitter. - if (getEmitter()->emitIsLastInsCall()) - { - // Ok, the last instruction generated is a call instruction. Do any of the other conditions hold? - // Note: we may be generating a few too many NOPs for the case of call preceding an epilog. Technically, - // if the next block is a BBJ_RETURN, an epilog will be generated, but there may be some instructions - // generated before the OS epilog starts, such as a GS cookie check. - if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext)) - { - // We only need the NOP if we're not going to generate any more code as part of the block end. - - switch (block->bbJumpKind) - { - case BBJ_ALWAYS: - case BBJ_THROW: - case BBJ_CALLFINALLY: - case BBJ_EHCATCHRET: - // We're going to generate more code below anyway, so no need for the NOP. - - case BBJ_RETURN: - case BBJ_EHFINALLYRET: - case BBJ_EHFILTERRET: - // These are the "epilog follows" case, handled in the emitter. - - break; - - case BBJ_NONE: - if (block->bbNext == nullptr) - { - // Call immediately before the end of the code; we should never get here . - instGen(INS_BREAKPOINT); // This should never get executed - } - else - { - // We need the NOP - instGen(INS_nop); - } - break; - - case BBJ_COND: - case BBJ_SWITCH: - // These can't have a call as the last instruction! - - default: - noway_assert(!"Unexpected bbJumpKind"); - break; - } - } - } -#endif //_TARGET_AMD64_ - - /* Do we need to generate a jump or return? */ - - switch (block->bbJumpKind) - { - case BBJ_ALWAYS: - inst_JMP(EJ_jmp, block->bbJumpDest); - break; - - case BBJ_RETURN: - genExitCode(block); - break; - - case BBJ_THROW: - // If we have a throw at the end of a function or funclet, we need to emit another instruction - // afterwards to help the OS unwinder determine the correct context during unwind. - // We insert an unexecuted breakpoint instruction in several situations - // following a throw instruction: - // 1. If the throw is the last instruction of the function or funclet. This helps - // the OS unwinder determine the correct context during an unwind from the - // thrown exception. - // 2. If this is this is the last block of the hot section. - // 3. If the subsequent block is a special throw block. - // 4. On AMD64, if the next block is in a different EH region. - if ((block->bbNext == NULL) -#if FEATURE_EH_FUNCLETS - || (block->bbNext->bbFlags & BBF_FUNCLET_BEG) -#endif // FEATURE_EH_FUNCLETS -#ifdef _TARGET_AMD64_ - || !BasicBlock::sameEHRegion(block, block->bbNext) -#endif // _TARGET_AMD64_ - || (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) || - block->bbNext == compiler->fgFirstColdBlock) - { - instGen(INS_BREAKPOINT); // This should never get executed - } - - break; - - case BBJ_CALLFINALLY: - - // Now set REG_LR to the address of where the finally funclet should - // return to directly. - - BasicBlock* bbFinallyRet; - bbFinallyRet = NULL; - - // We don't have retless calls, since we use the BBJ_ALWAYS to point at a NOP pad where - // we would have otherwise created retless calls. - assert(block->isBBCallAlwaysPair()); - - assert(block->bbNext != NULL); - assert(block->bbNext->bbJumpKind == BBJ_ALWAYS); - assert(block->bbNext->bbJumpDest != NULL); - assert(block->bbNext->bbJumpDest->bbFlags & BBF_FINALLY_TARGET); - - bbFinallyRet = block->bbNext->bbJumpDest; - bbFinallyRet->bbFlags |= BBF_JMP_TARGET; - -#if 0 - // TODO-ARM-CQ: - // We don't know the address of finally funclet yet. But adr requires the offset - // to finally funclet from current IP is within 4095 bytes. So this code is disabled - // for now. - getEmitter()->emitIns_J_R (INS_adr, - EA_4BYTE, - bbFinallyRet, - REG_LR); -#else // !0 - // Load the address where the finally funclet should return into LR. - // The funclet prolog/epilog will do "push {lr}" / "pop {pc}" to do - // the return. - getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR); - getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR); -#endif // !0 - - // Jump to the finally BB - inst_JMP(EJ_jmp, block->bbJumpDest); - - // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the - // jump target using bbJumpDest - that is already used to point - // to the finally block. So just skip past the BBJ_ALWAYS unless the - // block is RETLESS. - if (!(block->bbFlags & BBF_RETLESS_CALL)) - { - assert(block->isBBCallAlwaysPair()); - - lblk = block; - block = block->bbNext; - } - break; - -#ifdef _TARGET_ARM_ - - case BBJ_EHCATCHRET: - // set r0 to the address the VM should return to after the catch - getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_R0); - getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_R0); - - __fallthrough; - - case BBJ_EHFINALLYRET: - case BBJ_EHFILTERRET: - genReserveFuncletEpilog(block); - break; - -#elif defined(_TARGET_AMD64_) - - case BBJ_EHCATCHRET: - // Set EAX to the address the VM should return to after the catch. - // Generate a RIP-relative - // lea reg, [rip + disp32] ; the RIP is implicit - // which will be position-indepenent. - // TODO-ARM-Bug?: For ngen, we need to generate a reloc for the displacement (maybe EA_PTR_DSP_RELOC). - getEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, block->bbJumpDest, REG_INTRET); - __fallthrough; - - case BBJ_EHFINALLYRET: - case BBJ_EHFILTERRET: - genReserveFuncletEpilog(block); - break; - -#endif // _TARGET_AMD64_ - - case BBJ_NONE: - case BBJ_COND: - case BBJ_SWITCH: - break; - - default: - noway_assert(!"Unexpected bbJumpKind"); - break; - } - -#ifdef DEBUG - compiler->compCurBB = 0; -#endif - - } //------------------ END-FOR each block of the method ------------------- - - /* Nothing is live at this point */ - genUpdateLife(VarSetOps::MakeEmpty(compiler)); - - /* Finalize the spill tracking logic */ - - regSet.rsSpillEnd(); - - /* Finalize the temp tracking logic */ - - compiler->tmpEnd(); - -#ifdef DEBUG - if (compiler->verbose) - { - printf("\n# "); - printf("compCycleEstimate = %6d, compSizeEstimate = %5d ", compiler->compCycleEstimate, compiler->compSizeEstimate); - printf("%s\n", compiler->info.compFullName); - } -#endif + NYI("ARM genCallFinally"); + return block; } -// return the child that has the same reg as the dst (if any) -// other child returned (out param) in 'other' -GenTree* sameRegAsDst(GenTree* tree, GenTree*& other /*out*/) -{ - if (tree->gtRegNum == REG_NA) - { - other = nullptr; - return NULL; - } +// move an immediate value into an integer register - GenTreePtr op1 = tree->gtOp.gtOp1->gtEffectiveVal(); - GenTreePtr op2 = tree->gtOp.gtOp2->gtEffectiveVal(); - if (op1->gtRegNum == tree->gtRegNum) - { - other = op2; - return op1; - } - if (op2->gtRegNum == tree->gtRegNum) - { - other = op1; - return op2; - } - else - { - other = nullptr; - return NULL; - } +void CodeGen::genEHCatchRet(BasicBlock* block) +{ + NYI("ARM genEHCatchRet"); } -// move an immediate value into an integer register - void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags) { // reg cannot be a FP register @@ -902,16 +93,7 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, } else { -#ifdef _TARGET_AMD64_ - if (AddrShouldUsePCRel(imm)) - { - getEmitter()->emitIns_R_AI(INS_lea, EA_PTR_DSP_RELOC, reg, imm); - } - else -#endif // _TARGET_AMD64_ - { - getEmitter()->emitIns_R_I(INS_mov, size, reg, imm); - } + getEmitter()->emitIns_R_I(INS_mov, size, reg, imm); } regTracker.rsTrackRegIntCns(reg, imm); } @@ -1423,6 +605,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) break; case GT_LIST: + case GT_FIELD_LIST: case GT_ARGPLACE: // Nothing to do break; @@ -1479,7 +662,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) case GT_LOCKADD: case GT_XCHG: case GT_XADD: - genLockedInstructions(treeNode); + genLockedInstructions(treeNode->AsOp()); break; case GT_CMPXCHG: @@ -1554,7 +737,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) { #ifdef DEBUG char message[256]; - sprintf(message, "NYI: Unimplemented node type %s\n", GenTree::NodeName(treeNode->OperGet())); + _snprintf_s(message, _countof(message), _TRUNCATE, "NYI: Unimplemented node type %s\n", + GenTree::NodeName(treeNode->OperGet())); notYetImplemented(message, __FILE__, __LINE__); #else NYI("unimplemented node"); @@ -1566,7 +750,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) // generate code for the locked operations: // GT_LOCKADD, GT_XCHG, GT_XADD -void CodeGen::genLockedInstructions(GenTree* treeNode) +void CodeGen::genLockedInstructions(GenTreeOp* treeNode) { NYI("genLockedInstructions"); } @@ -1697,188 +881,9 @@ void CodeGen::genCodeForShift(GenTreePtr tree) NYI("genCodeForShift"); } -void CodeGen::genUnspillRegIfNeeded(GenTree* tree) -{ - regNumber dstReg = tree->gtRegNum; - - GenTree* unspillTree = tree; - if (tree->gtOper == GT_RELOAD) - { - unspillTree = tree->gtOp.gtOp1; - } - if (unspillTree->gtFlags & GTF_SPILLED) - { - if (genIsRegCandidateLocal(unspillTree)) - { - // Reset spilled flag, since we are going to load a local variable from its home location. - unspillTree->gtFlags &= ~GTF_SPILLED; - - // Load local variable from its home location. - inst_RV_TT(ins_Load(unspillTree->gtType), dstReg, unspillTree); - - unspillTree->SetInReg(); - - GenTreeLclVarCommon* lcl = unspillTree->AsLclVarCommon(); - LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum]; - - // TODO-Review: We would like to call: - // genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(tree)); - // instead of the following code, but this ends up hitting this assert: - // assert((regSet.rsMaskVars & regMask) == 0); - // due to issues with LSRA resolution moves. - // So, just force it for now. This probably indicates a condition that creates a GC hole! - // - // Extra note: I think we really want to call something like gcInfo.gcUpdateForRegVarMove, - // because the variable is not really going live or dead, but that method is somewhat poorly - // factored because it, in turn, updates rsMaskVars which is part of RegSet not GCInfo. - // TODO-Cleanup: This code exists in other CodeGen*.cpp files, and should be moved to CodeGenCommon.cpp. - - genUpdateVarReg(varDsc, tree); -#ifdef DEBUG - if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) - { - JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->gtLclNum); - } -#endif // DEBUG - VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); - -#ifdef DEBUG - if (compiler->verbose) - { - printf("\t\t\t\t\t\t\tV%02u in reg ", lcl->gtLclNum); - varDsc->PrintVarReg(); - printf(" is becoming live "); - Compiler::printTreeID(unspillTree); - printf("\n"); - } -#endif // DEBUG - - regSet.AddMaskVars(genGetRegMask(varDsc)); - } - else - { - TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum); - compiler->tmpRlsTemp(t); - getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitActualTypeSize(unspillTree->gtType), dstReg, - t->tdTempNum(), 0); - - unspillTree->SetInReg(); - } - - gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet()); - } -} - -// do liveness update for a subnode that is being consumed by codegen -regNumber CodeGen::genConsumeReg(GenTree* tree) -{ - genUnspillRegIfNeeded(tree); - - // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar - genUpdateLife(tree); - assert(tree->gtRegNum != REG_NA); - - // there are three cases where consuming a reg means clearing the bit in the live mask - // 1. it was not produced by a local - // 2. it was produced by a local that is going dead - // 3. it was produced by a local that does not live in that reg (like one allocated on the stack) - - if (genIsRegCandidateLocal(tree)) - { - GenTreeLclVarCommon* lcl = tree->AsLclVarCommon(); - LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()]; - - if (varDsc->lvRegNum == tree->gtRegNum && ((tree->gtFlags & GTF_VAR_DEATH) != 0)) - { - gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum)); - } - else if (!varDsc->lvLRACandidate) - { - gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum)); - } - } - else - { - gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum)); - } - - return tree->gtRegNum; -} - -// Do liveness update for an address tree: one of GT_LEA, GT_LCL_VAR, or GT_CNS_INT (for call indirect). -void CodeGen::genConsumeAddress(GenTree* addr) -{ - if (addr->OperGet() == GT_LEA) - { - genConsumeAddrMode(addr->AsAddrMode()); - } - else - { - assert(!addr->isContained()); - genConsumeReg(addr); - } -} - -// do liveness update for a subnode that is being consumed by codegen -void CodeGen::genConsumeAddrMode(GenTreeAddrMode* addr) +void CodeGen::genRegCopy(GenTree* treeNode) { - if (addr->Base()) - genConsumeReg(addr->Base()); - if (addr->Index()) - genConsumeReg(addr->Index()); -} - -// do liveness update for register produced by the current node in codegen -void CodeGen::genProduceReg(GenTree* tree) -{ - if (tree->gtFlags & GTF_SPILL) - { - if (genIsRegCandidateLocal(tree)) - { - // Store local variable to its home location. - tree->gtFlags &= ~GTF_REG_VAL; - inst_TT_RV(ins_Store(tree->gtType), tree, tree->gtRegNum); - } - else - { - tree->SetInReg(); - regSet.rsSpillTree(tree->gtRegNum, tree); - tree->gtFlags |= GTF_SPILLED; - tree->gtFlags &= ~GTF_SPILL; - gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum)); - return; - } - } - - genUpdateLife(tree); - - // If we've produced a register, mark it as a pointer, as needed. - // Except in the case of a dead definition of a lclVar. - if (tree->gtHasReg() && (!tree->IsLocal() || (tree->gtFlags & GTF_VAR_DEATH) == 0)) - { - gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet()); - } - tree->SetInReg(); -} - -// transfer gc/byref status of src reg to dst reg -void CodeGen::genTransferRegGCState(regNumber dst, regNumber src) -{ - regMaskTP srcMask = genRegMask(src); - regMaskTP dstMask = genRegMask(dst); - - if (gcInfo.gcRegGCrefSetCur & srcMask) - { - gcInfo.gcMarkRegSetGCref(dstMask); - } - else if (gcInfo.gcRegByrefSetCur & srcMask) - { - gcInfo.gcMarkRegSetByref(dstMask); - } - else - { - gcInfo.gcMarkRegSetNpt(dstMask); - } + NYI("genRegCopy"); } // Produce code for a GT_CALL node @@ -2050,57 +1055,6 @@ void CodeGen::genEmitHelperCall(unsigned helper, NYI("Helper call"); } -/*****************************************************************************/ -#ifdef DEBUGGING_SUPPORT -/***************************************************************************** - * genSetScopeInfo - * - * Called for every scope info piece to record by the main genSetScopeInfo() - */ - -void CodeGen::genSetScopeInfo(unsigned which, - UNATIVE_OFFSET startOffs, - UNATIVE_OFFSET length, - unsigned varNum, - unsigned LVnum, - bool avail, - Compiler::siVarLoc& varLoc) -{ - /* We need to do some mapping while reporting back these variables */ - - unsigned ilVarNum = compiler->compMap2ILvarNum(varNum); - noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM); - - VarName name = nullptr; - -#ifdef DEBUG - - for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++) - { - if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum) - { - name = compiler->info.compVarScopes[scopeNum].vsdName; - } - } - - // Hang on to this compiler->info. - - TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which]; - - tlvi.tlviVarNum = ilVarNum; - tlvi.tlviLVnum = LVnum; - tlvi.tlviName = name; - tlvi.tlviStartPC = startOffs; - tlvi.tlviLength = length; - tlvi.tlviAvailable = avail; - tlvi.tlviVarLoc = varLoc; - -#endif // DEBUG - - compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc); -} -#endif // DEBUGGING_SUPPORT - #endif // _TARGET_ARM_ #endif // !LEGACY_BACKEND diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index ca0df53a34..cc7c5dc524 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -747,7 +747,7 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * +=======================+ <---- Caller's SP * |Callee saved registers | // multiple of 8 bytes * |-----------------------| - * | PSP slot | // 8 bytes + * | PSP slot | // 8 bytes (omitted in CoreRT ABI) * |-----------------------| * ~ alignment padding ~ // To make the whole frame 16 byte aligned. * |-----------------------| @@ -773,7 +773,7 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * +=======================+ <---- Caller's SP * |Callee saved registers | // multiple of 8 bytes * |-----------------------| - * | PSP slot | // 8 bytes + * | PSP slot | // 8 bytes (omitted in CoreRT ABI) * |-----------------------| * ~ alignment padding ~ // To make the whole frame 16 byte aligned. * |-----------------------| @@ -801,7 +801,7 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * +=======================+ <---- Caller's SP * |Callee saved registers | // multiple of 8 bytes * |-----------------------| - * | PSP slot | // 8 bytes + * | PSP slot | // 8 bytes (omitted in CoreRT ABI) * |-----------------------| * ~ alignment padding ~ // To make the first SP subtraction 16 byte aligned * |-----------------------| @@ -883,7 +883,7 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * +=======================+ <---- Caller's SP * |Callee saved registers | // multiple of 8 bytes * |-----------------------| - * | PSP slot | // 8 bytes + * | PSP slot | // 8 bytes (omitted in CoreRT ABI) * |-----------------------| * | Saved FP, LR | // 16 bytes * |-----------------------| @@ -988,6 +988,12 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // This is the end of the OS-reported prolog for purposes of unwinding compiler->unwindEndProlog(); + // If there is no PSPSym (CoreRT ABI), we are done. + if (compiler->lvaPSPSym == BAD_VAR_NUM) + { + return; + } + if (isFilter) { // This is the first block of a filter @@ -1134,8 +1140,10 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() assert((rsMaskSaveRegs & RBM_LR) != 0); assert((rsMaskSaveRegs & RBM_FP) != 0); + unsigned PSPSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? REGSIZE_BYTES : 0; + unsigned saveRegsCount = genCountBits(rsMaskSaveRegs); - unsigned saveRegsPlusPSPSize = saveRegsCount * REGSIZE_BYTES + /* PSPSym */ REGSIZE_BYTES; + unsigned saveRegsPlusPSPSize = saveRegsCount * REGSIZE_BYTES + PSPSize; if (compiler->info.compIsVarArgs) { // For varargs we always save all of the integer register arguments @@ -1222,22 +1230,29 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() printf(" SP delta 1: %d\n", genFuncletInfo.fiSpDelta1); printf(" SP delta 2: %d\n", genFuncletInfo.fiSpDelta2); - if (CallerSP_to_PSP_slot_delta != compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging + if (compiler->lvaPSPSym != BAD_VAR_NUM) { - printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n", - compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); + if (CallerSP_to_PSP_slot_delta != + compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging + { + printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n", + compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); + } } } -#endif // DEBUG assert(genFuncletInfo.fiSP_to_FPLR_save_delta >= 0); assert(genFuncletInfo.fiSP_to_PSP_slot_delta >= 0); assert(genFuncletInfo.fiSP_to_CalleeSave_delta >= 0); assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta <= 0); - assert(compiler->lvaPSPSym != BAD_VAR_NUM); - assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta == - compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and - // funclet! + + if (compiler->lvaPSPSym != BAD_VAR_NUM) + { + assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta == + compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and + // funclet! + } +#endif // DEBUG } /* @@ -1250,100 +1265,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ -// Get the register assigned to the given node - -regNumber CodeGenInterface::genGetAssignedReg(GenTreePtr tree) -{ - return tree->gtRegNum; -} - -//------------------------------------------------------------------------ -// genSpillVar: Spill a local variable -// -// Arguments: -// tree - the lclVar node for the variable being spilled -// -// Return Value: -// None. -// -// Assumptions: -// The lclVar must be a register candidate (lvRegCandidate) - -void CodeGen::genSpillVar(GenTreePtr tree) -{ - unsigned varNum = tree->gtLclVarCommon.gtLclNum; - LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); - - assert(varDsc->lvIsRegCandidate()); - - // We don't actually need to spill if it is already living in memory - bool needsSpill = ((tree->gtFlags & GTF_VAR_DEF) == 0 && varDsc->lvIsInReg()); - if (needsSpill) - { - var_types lclTyp = varDsc->TypeGet(); - if (varDsc->lvNormalizeOnStore()) - lclTyp = genActualType(lclTyp); - emitAttr size = emitTypeSize(lclTyp); - - bool restoreRegVar = false; - if (tree->gtOper == GT_REG_VAR) - { - tree->SetOper(GT_LCL_VAR); - restoreRegVar = true; - } - - // mask off the flag to generate the right spill code, then bring it back - tree->gtFlags &= ~GTF_REG_VAL; - - instruction storeIns = ins_Store(tree->TypeGet(), compiler->isSIMDTypeLocalAligned(varNum)); - - assert(varDsc->lvRegNum == tree->gtRegNum); - inst_TT_RV(storeIns, tree, tree->gtRegNum, 0, size); - - tree->gtFlags |= GTF_REG_VAL; - - if (restoreRegVar) - { - tree->SetOper(GT_REG_VAR); - } - - genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree)); - gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask()); - - if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex)) - { -#ifdef DEBUG - if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) - { - JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum); - } - else - { - JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum); - } -#endif - VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); - } - } - - tree->gtFlags &= ~GTF_SPILL; - varDsc->lvRegNum = REG_STK; - if (varTypeIsMultiReg(tree)) - { - varDsc->lvOtherReg = REG_STK; - } -} - -// inline -void CodeGenInterface::genUpdateVarReg(LclVarDsc* varDsc, GenTreePtr tree) -{ - assert(tree->OperIsScalarLocal() || (tree->gtOper == GT_COPY)); - varDsc->lvRegNum = tree->gtRegNum; -} - -/*****************************************************************************/ -/*****************************************************************************/ - /***************************************************************************** * * Generate code that will set the given register to the integer constant. @@ -1405,702 +1326,79 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) genDefineTempLabel(gsCheckBlk); } -/***************************************************************************** - * - * Generate code for all the basic blocks in the function. - */ - -void CodeGen::genCodeForBBlist() +BasicBlock* CodeGen::genCallFinally(BasicBlock* block, BasicBlock* lblk) { - unsigned varNum; - LclVarDsc* varDsc; - - unsigned savedStkLvl; - -#ifdef DEBUG - genInterruptibleUsed = true; - - // You have to be careful if you create basic blocks from now on - compiler->fgSafeBasicBlockCreation = false; - - // This stress mode is not comptible with fully interruptible GC - if (genInterruptible && compiler->opts.compStackCheckOnCall) - { - compiler->opts.compStackCheckOnCall = false; - } - - // This stress mode is not comptible with fully interruptible GC - if (genInterruptible && compiler->opts.compStackCheckOnRet) - { - compiler->opts.compStackCheckOnRet = false; - } -#endif // DEBUG - - // Prepare the blocks for exception handling codegen: mark the blocks that needs labels. - genPrepForEHCodegen(); - - assert(!compiler->fgFirstBBScratch || - compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first. - - /* Initialize the spill tracking logic */ - - regSet.rsSpillBeg(); + // Generate a call to the finally, like this: + // mov x0,qword ptr [fp + 10H] / sp // Load x0 with PSPSym, or sp if PSPSym is not used + // bl finally-funclet + // b finally-return // Only for non-retless finally calls + // The 'b' can be a NOP if we're going to the next block. -#ifdef DEBUGGING_SUPPORT - /* Initialize the line# tracking logic */ - - if (compiler->opts.compScopeInfo) - { - siInit(); - } -#endif - - // The current implementation of switch tables requires the first block to have a label so it - // can generate offsets to the switch label targets. - // TODO-ARM64-CQ: remove this when switches have been re-implemented to not use this. - if (compiler->fgHasSwitch) + if (compiler->lvaPSPSym != BAD_VAR_NUM) { - compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET; + getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R0, compiler->lvaPSPSym, 0); } - - genPendingCallLabel = nullptr; - - /* Initialize the pointer tracking code */ - - gcInfo.gcRegPtrSetInit(); - gcInfo.gcVarPtrSetInit(); - - /* If any arguments live in registers, mark those regs as such */ - - for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++) + else { - /* Is this variable a parameter assigned to a register? */ - - if (!varDsc->lvIsParam || !varDsc->lvRegister) - continue; - - /* Is the argument live on entry to the method? */ - - if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex)) - continue; - - /* Is this a floating-point argument? */ - - if (varDsc->IsFloatRegType()) - continue; - - noway_assert(!varTypeIsFloating(varDsc->TypeGet())); - - /* Mark the register as holding the variable */ - - regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum); + getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_R0, REG_SPBASE); } + getEmitter()->emitIns_J(INS_bl_local, block->bbJumpDest); - unsigned finallyNesting = 0; - - // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without - // allocation at the start of each basic block. - VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler)); - - /*------------------------------------------------------------------------- - * - * Walk the basic blocks and generate code for each one - * - */ - - BasicBlock* block; - BasicBlock* lblk; /* previous block */ - - for (lblk = NULL, block = compiler->fgFirstBB; block != NULL; lblk = block, block = block->bbNext) + if (block->bbFlags & BBF_RETLESS_CALL) { -#ifdef DEBUG - if (compiler->verbose) - { - printf("\n=============== Generating "); - block->dspBlockHeader(compiler, true, true); - compiler->fgDispBBLiveness(block); - } -#endif // DEBUG - - /* Figure out which registers hold variables on entry to this block */ - - regSet.ClearMaskVars(); - gcInfo.gcRegGCrefSetCur = RBM_NONE; - gcInfo.gcRegByrefSetCur = RBM_NONE; - - compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(block); - - genUpdateLife(block->bbLiveIn); - - // Even if liveness didn't change, we need to update the registers containing GC references. - // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't - // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change - // here. That would require handling the changes in recordVarLocationsAtStartOfBB(). - - regMaskTP newLiveRegSet = RBM_NONE; - regMaskTP newRegGCrefSet = RBM_NONE; - regMaskTP newRegByrefSet = RBM_NONE; -#ifdef DEBUG - VARSET_TP VARSET_INIT_NOCOPY(removedGCVars, VarSetOps::MakeEmpty(compiler)); - VARSET_TP VARSET_INIT_NOCOPY(addedGCVars, VarSetOps::MakeEmpty(compiler)); -#endif - VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex); - while (iter.NextElem(compiler, &varIndex)) - { - unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; - LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); - - if (varDsc->lvIsInReg()) - { - newLiveRegSet |= varDsc->lvRegMask(); - if (varDsc->lvType == TYP_REF) - { - newRegGCrefSet |= varDsc->lvRegMask(); - } - else if (varDsc->lvType == TYP_BYREF) - { - newRegByrefSet |= varDsc->lvRegMask(); - } -#ifdef DEBUG - if (verbose && VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex)) - { - VarSetOps::AddElemD(compiler, removedGCVars, varIndex); - } -#endif // DEBUG - VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex); - } - else if (compiler->lvaIsGCTracked(varDsc)) - { -#ifdef DEBUG - if (verbose && !VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex)) - { - VarSetOps::AddElemD(compiler, addedGCVars, varIndex); - } -#endif // DEBUG - VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex); - } - } - - regSet.rsMaskVars = newLiveRegSet; - -#ifdef DEBUG - if (compiler->verbose) - { - if (!VarSetOps::IsEmpty(compiler, addedGCVars)) - { - printf("\t\t\t\t\t\t\tAdded GCVars: "); - dumpConvertedVarSet(compiler, addedGCVars); - printf("\n"); - } - if (!VarSetOps::IsEmpty(compiler, removedGCVars)) - { - printf("\t\t\t\t\t\t\tRemoved GCVars: "); - dumpConvertedVarSet(compiler, removedGCVars); - printf("\n"); - } - } -#endif // DEBUG - - gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUGARG(true)); - gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUGARG(true)); - - /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to - represent the exception object (TYP_REF). - We mark REG_EXCEPTION_OBJECT as holding a GC object on entry - to the block, it will be the first thing evaluated - (thanks to GTF_ORDER_SIDEEFF). - */ - - if (handlerGetsXcptnObj(block->bbCatchTyp)) - { - for (GenTree* node : LIR::AsRange(block)) - { - if (node->OperGet() == GT_CATCH_ARG) - { - gcInfo.gcMarkRegSetGCref(RBM_EXCEPTION_OBJECT); - break; - } - } - } - - /* Start a new code output block */ - - genUpdateCurrentFunclet(block); - -#ifdef _TARGET_XARCH_ - if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD) - { - getEmitter()->emitLoopAlign(); - } -#endif - -#ifdef DEBUG - if (compiler->opts.dspCode) - printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum); -#endif - - block->bbEmitCookie = NULL; - - if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL)) - { - /* Mark a label and update the current set of live GC refs */ - - block->bbEmitCookie = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, - gcInfo.gcRegByrefSetCur, FALSE); - } - - if (block == compiler->fgFirstColdBlock) - { -#ifdef DEBUG - if (compiler->verbose) - { - printf("\nThis is the start of the cold region of the method\n"); - } -#endif - // We should never have a block that falls through into the Cold section - noway_assert(!lblk->bbFallsThrough()); - - // We require the block that starts the Cold section to have a label - noway_assert(block->bbEmitCookie); - getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie); - } - - /* Both stacks are always empty on entry to a basic block */ - - genStackLevel = 0; - - savedStkLvl = genStackLevel; - - /* Tell everyone which basic block we're working on */ - - compiler->compCurBB = block; - -#ifdef DEBUGGING_SUPPORT - siBeginBlock(block); - - // BBF_INTERNAL blocks don't correspond to any single IL instruction. - if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) && - !compiler->fgBBisScratch(block)) // If the block is the distinguished first scratch block, then no need to - // emit a NO_MAPPING entry, immediately after the prolog. - { - genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true); - } - - bool firstMapping = true; -#endif // DEBUGGING_SUPPORT - - /*--------------------------------------------------------------------- - * - * Generate code for each statement-tree in the block - * - */ - - if (block->bbFlags & BBF_FUNCLET_BEG) - { - genReserveFuncletProlog(block); - } - - // Clear compCurStmt and compCurLifeTree. - compiler->compCurStmt = nullptr; - compiler->compCurLifeTree = nullptr; - - // Traverse the block in linear order, generating code for each node as we - // as we encounter it. - CLANG_FORMAT_COMMENT_ANCHOR; - -#ifdef DEBUGGING_SUPPORT - IL_OFFSETX currentILOffset = BAD_IL_OFFSET; -#endif - for (GenTree* node : LIR::AsRange(block).NonPhiNodes()) - { -#ifdef DEBUGGING_SUPPORT - // Do we have a new IL offset? - if (node->OperGet() == GT_IL_OFFSET) - { - genEnsureCodeEmitted(currentILOffset); - currentILOffset = node->gtStmt.gtStmtILoffsx; - genIPmappingAdd(currentILOffset, firstMapping); - firstMapping = false; - } -#endif // DEBUGGING_SUPPORT - -#ifdef DEBUG - if (node->OperGet() == GT_IL_OFFSET) - { - noway_assert(node->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize || - node->gtStmt.gtStmtLastILoffs == BAD_IL_OFFSET); - - if (compiler->opts.dspCode && compiler->opts.dspInstrs && - node->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET) - { - while (genCurDispOffset <= node->gtStmt.gtStmtLastILoffs) - { - genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> "); - } - } - } -#endif // DEBUG - - genCodeForTreeNode(node); - if (node->gtHasReg() && node->gtLsraInfo.isLocalDefUse) - { - genConsumeReg(node); - } - } // end for each node in block - -#ifdef DEBUG - // The following set of register spill checks and GC pointer tracking checks used to be - // performed at statement boundaries. Now, with LIR, there are no statements, so they are - // performed at the end of each block. - // TODO: could these checks be performed more frequently? E.g., at each location where - // the register allocator says there are no live non-variable registers. Perhaps this could - // be done by (a) keeping a running count of live non-variable registers by using - // gtLsraInfo.srcCount and gtLsraInfo.dstCount to decrement and increment the count, respectively, - // and running the checks when the count is zero. Or, (b) use the map maintained by LSRA - // (operandToLocationInfoMap) to mark a node somehow when, after the execution of that node, - // there will be no live non-variable registers. - - regSet.rsSpillChk(); - - /* Make sure we didn't bungle pointer register tracking */ - - regMaskTP ptrRegs = gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur; - regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars; - - // If return is a GC-type, clear it. Note that if a common - // epilog is generated (genReturnBB) it has a void return - // even though we might return a ref. We can't use the compRetType - // as the determiner because something we are tracking as a byref - // might be used as a return value of a int function (which is legal) - GenTree* blockLastNode = block->lastNode(); - if ((blockLastNode != nullptr) && (blockLastNode->gtOper == GT_RETURN) && - (varTypeIsGC(compiler->info.compRetType) || - (blockLastNode->gtOp.gtOp1 != nullptr && varTypeIsGC(blockLastNode->gtOp.gtOp1->TypeGet())))) - { - nonVarPtrRegs &= ~RBM_INTRET; - } - - if (nonVarPtrRegs) - { - printf("Regset after BB%02u gcr=", block->bbNum); - printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars); - compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars); - printf(", byr="); - printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars); - compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars); - printf(", regVars="); - printRegMaskInt(regSet.rsMaskVars); - compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars); - printf("\n"); - } - - noway_assert(nonVarPtrRegs == RBM_NONE); -#endif // DEBUG - -#if defined(DEBUG) && defined(_TARGET_ARM64_) - if (block->bbNext == nullptr) - { - // Unit testing of the ARM64 emitter: generate a bunch of instructions into the last block - // (it's as good as any, but better than the prolog, which can only be a single instruction - // group) then use COMPlus_JitLateDisasm=* to see if the late disassembler - // thinks the instructions are the same as we do. - genArm64EmitterUnitTests(); - } -#endif // defined(DEBUG) && defined(_TARGET_ARM64_) - -#ifdef DEBUGGING_SUPPORT - // It is possible to reach the end of the block without generating code for the current IL offset. - // For example, if the following IR ends the current block, no code will have been generated for - // offset 21: - // - // ( 0, 0) [000040] ------------ il_offset void IL offset: 21 - // - // N001 ( 0, 0) [000039] ------------ nop void - // - // This can lead to problems when debugging the generated code. To prevent these issues, make sure - // we've generated code for the last IL offset we saw in the block. - genEnsureCodeEmitted(currentILOffset); - - if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0)) - { - siEndBlock(block); - - /* Is this the last block, and are there any open scopes left ? */ + // We have a retless call, and the last instruction generated was a call. + // If the next block is in a different EH region (or is the end of the code + // block), then we need to generate a breakpoint here (since it will never + // get executed) to get proper unwind behavior. - bool isLastBlockProcessed = (block->bbNext == NULL); - if (block->isBBCallAlwaysPair()) - { - isLastBlockProcessed = (block->bbNext->bbNext == NULL); - } - - if (isLastBlockProcessed && siOpenScopeList.scNext) - { - /* This assert no longer holds, because we may insert a throw - block to demarcate the end of a try or finally region when they - are at the end of the method. It would be nice if we could fix - our code so that this throw block will no longer be necessary. */ - - // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize); - - siCloseAllOpenScopes(); - } - } - -#endif // DEBUGGING_SUPPORT - - genStackLevel -= savedStkLvl; - -#ifdef DEBUG - // compCurLife should be equal to the liveOut set, except that we don't keep - // it up to date for vars that are not register candidates - // (it would be nice to have a xor set function) - - VARSET_TP VARSET_INIT_NOCOPY(extraLiveVars, VarSetOps::Diff(compiler, block->bbLiveOut, compiler->compCurLife)); - VarSetOps::UnionD(compiler, extraLiveVars, VarSetOps::Diff(compiler, compiler->compCurLife, block->bbLiveOut)); - VARSET_ITER_INIT(compiler, extraLiveVarIter, extraLiveVars, extraLiveVarIndex); - while (extraLiveVarIter.NextElem(compiler, &extraLiveVarIndex)) + if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext)) { - unsigned varNum = compiler->lvaTrackedToVarNum[extraLiveVarIndex]; - LclVarDsc* varDsc = compiler->lvaTable + varNum; - assert(!varDsc->lvIsRegCandidate()); + instGen(INS_BREAKPOINT); // This should never get executed } -#endif - - /* Both stacks should always be empty on exit from a basic block */ - - noway_assert(genStackLevel == 0); + } + else + { + // Because of the way the flowgraph is connected, the liveness info for this one instruction + // after the call is not (can not be) correct in cases where a variable has a last use in the + // handler. So turn off GC reporting for this single instruction. + getEmitter()->emitDisableGC(); -#if 0 - // On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several - // situations, to support proper exception handling semantics. This is mostly to ensure that when the stack - // walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region. - // The document "X64 and ARM ABIs.docx" has more details. The situations: - // 1. If the call instruction is in a different EH region as the instruction that follows it. - // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might - // be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters here.) - // We handle case #1 here, and case #2 in the emitter. - if (getEmitter()->emitIsLastInsCall()) + // Now go to where the finally funclet needs to return to. + if (block->bbNext->bbJumpDest == block->bbNext->bbNext) { - // Ok, the last instruction generated is a call instruction. Do any of the other conditions hold? - // Note: we may be generating a few too many NOPs for the case of call preceding an epilog. Technically, - // if the next block is a BBJ_RETURN, an epilog will be generated, but there may be some instructions - // generated before the OS epilog starts, such as a GS cookie check. - if ((block->bbNext == nullptr) || - !BasicBlock::sameEHRegion(block, block->bbNext)) - { - // We only need the NOP if we're not going to generate any more code as part of the block end. - - switch (block->bbJumpKind) - { - case BBJ_ALWAYS: - case BBJ_THROW: - case BBJ_CALLFINALLY: - case BBJ_EHCATCHRET: - // We're going to generate more code below anyway, so no need for the NOP. - - case BBJ_RETURN: - case BBJ_EHFINALLYRET: - case BBJ_EHFILTERRET: - // These are the "epilog follows" case, handled in the emitter. - - break; - - case BBJ_NONE: - if (block->bbNext == nullptr) - { - // Call immediately before the end of the code; we should never get here . - instGen(INS_BREAKPOINT); // This should never get executed - } - else - { - // We need the NOP - instGen(INS_nop); - } - break; - - case BBJ_COND: - case BBJ_SWITCH: - // These can't have a call as the last instruction! - - default: - noway_assert(!"Unexpected bbJumpKind"); - break; - } - } + // Fall-through. + // TODO-ARM64-CQ: Can we get rid of this instruction, and just have the call return directly + // to the next instruction? This would depend on stack walking from within the finally + // handler working without this instruction being in this special EH region. + instGen(INS_nop); } -#endif // 0 - - /* Do we need to generate a jump or return? */ - - switch (block->bbJumpKind) + else { - case BBJ_ALWAYS: - inst_JMP(EJ_jmp, block->bbJumpDest); - break; - - case BBJ_RETURN: - genExitCode(block); - break; - - case BBJ_THROW: - // If we have a throw at the end of a function or funclet, we need to emit another instruction - // afterwards to help the OS unwinder determine the correct context during unwind. - // We insert an unexecuted breakpoint instruction in several situations - // following a throw instruction: - // 1. If the throw is the last instruction of the function or funclet. This helps - // the OS unwinder determine the correct context during an unwind from the - // thrown exception. - // 2. If this is this is the last block of the hot section. - // 3. If the subsequent block is a special throw block. - // 4. On AMD64, if the next block is in a different EH region. - if ((block->bbNext == NULL) || (block->bbNext->bbFlags & BBF_FUNCLET_BEG) || - !BasicBlock::sameEHRegion(block, block->bbNext) || - (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) || - block->bbNext == compiler->fgFirstColdBlock) - { - instGen(INS_BREAKPOINT); // This should never get executed - } - - break; - - case BBJ_CALLFINALLY: - - // Generate a call to the finally, like this: - // mov x0,qword ptr [fp + 10H] // Load x0 with PSPSym - // bl finally-funclet - // b finally-return // Only for non-retless finally calls - // The 'b' can be a NOP if we're going to the next block. - - getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R0, compiler->lvaPSPSym, 0); - getEmitter()->emitIns_J(INS_bl_local, block->bbJumpDest); - - if (block->bbFlags & BBF_RETLESS_CALL) - { - // We have a retless call, and the last instruction generated was a call. - // If the next block is in a different EH region (or is the end of the code - // block), then we need to generate a breakpoint here (since it will never - // get executed) to get proper unwind behavior. - - if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext)) - { - instGen(INS_BREAKPOINT); // This should never get executed - } - } - else - { - // Because of the way the flowgraph is connected, the liveness info for this one instruction - // after the call is not (can not be) correct in cases where a variable has a last use in the - // handler. So turn off GC reporting for this single instruction. - getEmitter()->emitDisableGC(); - - // Now go to where the finally funclet needs to return to. - if (block->bbNext->bbJumpDest == block->bbNext->bbNext) - { - // Fall-through. - // TODO-ARM64-CQ: Can we get rid of this instruction, and just have the call return directly - // to the next instruction? This would depend on stack walking from within the finally - // handler working without this instruction being in this special EH region. - instGen(INS_nop); - } - else - { - inst_JMP(EJ_jmp, block->bbNext->bbJumpDest); - } - - getEmitter()->emitEnableGC(); - } - - // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the - // jump target using bbJumpDest - that is already used to point - // to the finally block. So just skip past the BBJ_ALWAYS unless the - // block is RETLESS. - if (!(block->bbFlags & BBF_RETLESS_CALL)) - { - assert(block->isBBCallAlwaysPair()); - - lblk = block; - block = block->bbNext; - } - break; - - case BBJ_EHCATCHRET: - // For long address (default): `adrp + add` will be emitted. - // For short address (proven later): `adr` will be emitted. - getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, block->bbJumpDest, REG_INTRET); - - __fallthrough; - - case BBJ_EHFINALLYRET: - case BBJ_EHFILTERRET: - genReserveFuncletEpilog(block); - break; - - case BBJ_NONE: - case BBJ_COND: - case BBJ_SWITCH: - break; - - default: - noway_assert(!"Unexpected bbJumpKind"); - break; + inst_JMP(EJ_jmp, block->bbNext->bbJumpDest); } -#ifdef DEBUG - compiler->compCurBB = 0; -#endif - - } //------------------ END-FOR each block of the method ------------------- - - /* Nothing is live at this point */ - genUpdateLife(VarSetOps::MakeEmpty(compiler)); - - /* Finalize the spill tracking logic */ - - regSet.rsSpillEnd(); - - /* Finalize the temp tracking logic */ - - compiler->tmpEnd(); + getEmitter()->emitEnableGC(); + } -#ifdef DEBUG - if (compiler->verbose) + // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the + // jump target using bbJumpDest - that is already used to point + // to the finally block. So just skip past the BBJ_ALWAYS unless the + // block is RETLESS. + if (!(block->bbFlags & BBF_RETLESS_CALL)) { - printf("\n# "); - printf("compCycleEstimate = %6d, compSizeEstimate = %5d ", compiler->compCycleEstimate, - compiler->compSizeEstimate); - printf("%s\n", compiler->info.compFullName); + assert(block->isBBCallAlwaysPair()); + + lblk = block; + block = block->bbNext; } -#endif + return block; } -// return the child that has the same reg as the dst (if any) -// other child returned (out param) in 'other' -// TODO-Cleanup: move to CodeGenCommon.cpp -GenTree* sameRegAsDst(GenTree* tree, GenTree*& other /*out*/) +void CodeGen::genEHCatchRet(BasicBlock* block) { - if (tree->gtRegNum == REG_NA) - { - other = nullptr; - return NULL; - } - - GenTreePtr op1 = tree->gtOp.gtOp1; - GenTreePtr op2 = tree->gtOp.gtOp2; - if (op1->gtRegNum == tree->gtRegNum) - { - other = op2; - return op1; - } - if (op2->gtRegNum == tree->gtRegNum) - { - other = op1; - return op2; - } - else - { - other = nullptr; - return NULL; - } + // For long address (default): `adrp + add` will be emitted. + // For short address (proven later): `adr` will be emitted. + getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, block->bbJumpDest, REG_INTRET); } // move an immediate value into an integer register @@ -3397,12 +2695,13 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) break; case GT_LIST: + case GT_FIELD_LIST: case GT_ARGPLACE: // Nothing to do break; case GT_PUTARG_STK: - genPutArgStk(treeNode); + genPutArgStk(treeNode->AsPutArgStk()); break; case GT_PUTARG_REG: @@ -3432,7 +2731,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) case GT_LOCKADD: case GT_XCHG: case GT_XADD: - genLockedInstructions(treeNode); + genLockedInstructions(treeNode->AsOp()); break; case GT_MEMORYBARRIER: @@ -3597,7 +2896,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) { #ifdef DEBUG char message[256]; - sprintf(message, "Unimplemented node type %s\n", GenTree::NodeName(treeNode->OperGet())); + _snprintf_s(message, _countof(message), _TRUNCATE, "Unimplemented node type %s\n", + GenTree::NodeName(treeNode->OperGet())); #endif assert(!"Unknown node in codegen"); } @@ -3998,9 +3298,11 @@ BAILOUT: if (endLabel != nullptr) genDefineTempLabel(endLabel); - // Write the lvaShadowSPfirst stack frame slot - noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM); - getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, targetReg, compiler->lvaLocAllocSPvar, 0); + // Write the lvaLocAllocSPvar stack frame slot + if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM) + { + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, targetReg, compiler->lvaLocAllocSPvar, 0); + } #if STACK_PROBES if (compiler->opts.compNeedStackProbes) @@ -4034,6 +3336,10 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode) unsigned size = initBlkNode->Size(); GenTreePtr dstAddr = initBlkNode->Addr(); GenTreePtr initVal = initBlkNode->Data(); + if (initVal->OperIsInitVal()) + { + initVal = initVal->gtGetOp1(); + } assert(!dstAddr->isContained()); assert(!initVal->isContained()); @@ -4043,8 +3349,7 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode) emitter *emit = getEmitter(); - genConsumeReg(initVal); - genConsumeReg(dstAddr); + genConsumeOperands(initBlkNode); // If the initVal was moved, or spilled and reloaded to a different register, // get the original initVal from below the GT_RELOAD, but only after capturing the valReg, @@ -4066,27 +3371,25 @@ void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode) unsigned size = initBlkNode->Size(); GenTreePtr dstAddr = initBlkNode->Addr(); GenTreePtr initVal = initBlkNode->Data(); + if (initVal->OperIsInitVal()) + { + initVal = initVal->gtGetOp1(); + } assert(!dstAddr->isContained()); assert(!initVal->isContained()); assert(initBlkNode->gtRsvdRegs == RBM_ARG_2); - if (size == 0) - { - noway_assert(initBlkNode->gtOper == GT_DYN_BLK); - genConsumeRegAndCopy(initBlkNode->AsDynBlk()->gtDynamicSize, REG_ARG_2); - } - else - { // TODO-ARM64-CQ: When initblk loop unrolling is implemented // put this assert back on. #if 0 - assert(size >= INITBLK_UNROLL_LIMIT); -#endif // 0 - genSetRegToIcon(REG_ARG_2, size); + if (size != 0) + { + assert(blockSize >= INITBLK_UNROLL_LIMIT); } - genConsumeRegAndCopy(initVal, REG_ARG_1); - genConsumeRegAndCopy(dstAddr, REG_ARG_0); +#endif // 0 + + genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2); genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN); } @@ -4238,29 +3541,38 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) // str tempReg, [R14, #8] void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) { - // Make sure we got the arguments of the cpobj operation in the right registers - GenTreePtr dstAddr = cpObjNode->Addr(); - GenTreePtr source = cpObjNode->Data(); - noway_assert(source->gtOper == GT_IND); - GenTreePtr srcAddr = source->gtGetOp1(); + GenTreePtr dstAddr = cpObjNode->Addr(); + GenTreePtr source = cpObjNode->Data(); + var_types srcAddrType = TYP_BYREF; + bool sourceIsLocal = false; + + assert(source->isContained()); + if (source->gtOper == GT_IND) + { + GenTree* srcAddr = source->gtGetOp1(); + assert(!srcAddr->isContained()); + srcAddrType = srcAddr->TypeGet(); + } + else + { + noway_assert(source->IsLocal()); + sourceIsLocal = true; + } bool dstOnStack = dstAddr->OperIsLocalAddr(); #ifdef DEBUG assert(!dstAddr->isContained()); - assert(!srcAddr->isContained()); // This GenTree node has data about GC pointers, this means we're dealing // with CpObj. assert(cpObjNode->gtGcPtrCount > 0); #endif // DEBUG - // Consume these registers. + // Consume the operands and get them into the right registers. // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing"). - genConsumeRegAndCopy(srcAddr, REG_WRITE_BARRIER_SRC_BYREF); - gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_SRC_BYREF, srcAddr->TypeGet()); - - genConsumeRegAndCopy(dstAddr, REG_WRITE_BARRIER_DST_BYREF); + genConsumeBlockOp(cpObjNode, REG_WRITE_BARRIER_DST_BYREF, REG_WRITE_BARRIER_SRC_BYREF, REG_NA); + gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_SRC_BYREF, srcAddrType); gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet()); // Temp register used to perform the sequence of loads and stores. @@ -4332,31 +3644,17 @@ void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode) // Make sure we got the arguments of the cpblk operation in the right registers unsigned blockSize = cpBlkNode->Size(); GenTreePtr dstAddr = cpBlkNode->Addr(); - GenTreePtr source = cpBlkNode->Data(); - noway_assert(source->gtOper == GT_IND); - GenTreePtr srcAddr = source->gtGetOp1(); - assert(!dstAddr->isContained()); - assert(!srcAddr->isContained()); - assert(cpBlkNode->gtRsvdRegs == RBM_ARG_2); - if (blockSize != 0) - { + genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2); + #if 0 // Enable this when we support cpblk loop unrolling. - - assert(blockSize->gtIntCon.gtIconVal >= CPBLK_UNROLL_LIMIT); - -#endif // 0 - genSetRegToIcon(REG_ARG_2, blockSize); - } - else + if (blockSize != 0) { - noway_assert(cpBlkNode->gtOper == GT_DYN_BLK); - genConsumeRegAndCopy(cpBlkNode->AsDynBlk()->gtDynamicSize, REG_ARG_2); + assert(blockSize->gtIntCon.gtIconVal >= CPBLK_UNROLL_LIMIT); } - genConsumeRegAndCopy(srcAddr, REG_ARG_1); - genConsumeRegAndCopy(dstAddr, REG_ARG_0); +#endif // 0 genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN); } @@ -4421,7 +3719,7 @@ void CodeGen::genJumpTable(GenTree* treeNode) // generate code for the locked operations: // GT_LOCKADD, GT_XCHG, GT_XADD -void CodeGen::genLockedInstructions(GenTree* treeNode) +void CodeGen::genLockedInstructions(GenTreeOp* treeNode) { #if 0 GenTree* data = treeNode->gtOp.gtOp2; @@ -4839,154 +4137,6 @@ void CodeGen::genCodeForShift(GenTreePtr tree) genProduceReg(tree); } -// TODO-Cleanup: move to CodeGenCommon.cpp -void CodeGen::genUnspillRegIfNeeded(GenTree* tree) -{ - regNumber dstReg = tree->gtRegNum; - - GenTree* unspillTree = tree; - if (tree->gtOper == GT_RELOAD) - { - unspillTree = tree->gtOp.gtOp1; - } - - if (unspillTree->gtFlags & GTF_SPILLED) - { - if (genIsRegCandidateLocal(unspillTree)) - { - // Reset spilled flag, since we are going to load a local variable from its home location. - unspillTree->gtFlags &= ~GTF_SPILLED; - - GenTreeLclVarCommon* lcl = unspillTree->AsLclVarCommon(); - LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum]; - - var_types targetType = unspillTree->gtType; - instruction ins = ins_Load(targetType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)); - emitAttr attr = emitTypeSize(targetType); - emitter* emit = getEmitter(); - - // Fixes Issue #3326 - attr = emit->emitInsAdjustLoadStoreAttr(ins, attr); - - // Load local variable from its home location. - inst_RV_TT(ins, dstReg, unspillTree, 0, attr); - - unspillTree->SetInReg(); - - // TODO-Review: We would like to call: - // genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(tree)); - // instead of the following code, but this ends up hitting this assert: - // assert((regSet.rsMaskVars & regMask) == 0); - // due to issues with LSRA resolution moves. - // So, just force it for now. This probably indicates a condition that creates a GC hole! - // - // Extra note: I think we really want to call something like gcInfo.gcUpdateForRegVarMove, - // because the variable is not really going live or dead, but that method is somewhat poorly - // factored because it, in turn, updates rsMaskVars which is part of RegSet not GCInfo. - // This code exists in other CodeGen*.cpp files. - - // Don't update the variable's location if we are just re-spilling it again. - - if ((unspillTree->gtFlags & GTF_SPILL) == 0) - { - genUpdateVarReg(varDsc, tree); -#ifdef DEBUG - if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) - { - JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->gtLclNum); - } -#endif // DEBUG - VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); - -#ifdef DEBUG - if (compiler->verbose) - { - printf("\t\t\t\t\t\t\tV%02u in reg ", lcl->gtLclNum); - varDsc->PrintVarReg(); - printf(" is becoming live "); - compiler->printTreeID(unspillTree); - printf("\n"); - } -#endif // DEBUG - - regSet.AddMaskVars(genGetRegMask(varDsc)); - } - - gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet()); - } - else if (unspillTree->IsMultiRegCall()) - { - GenTreeCall* call = unspillTree->AsCall(); - ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc(); - unsigned regCount = pRetTypeDesc->GetReturnRegCount(); - GenTreeCopyOrReload* reloadTree = nullptr; - if (tree->OperGet() == GT_RELOAD) - { - reloadTree = tree->AsCopyOrReload(); - } - - // In case of multi-reg call node, GTF_SPILLED flag on it indicates that - // one or more of its result regs are spilled. Call node needs to be - // queried to know which specific result regs to be unspilled. - for (unsigned i = 0; i < regCount; ++i) - { - unsigned flags = call->GetRegSpillFlagByIdx(i); - if ((flags & GTF_SPILLED) != 0) - { - var_types dstType = pRetTypeDesc->GetReturnRegType(i); - regNumber unspillTreeReg = call->GetRegNumByIdx(i); - - if (reloadTree != nullptr) - { - dstReg = reloadTree->GetRegNumByIdx(i); - if (dstReg == REG_NA) - { - dstReg = unspillTreeReg; - } - } - else - { - dstReg = unspillTreeReg; - } - - TempDsc* t = regSet.rsUnspillInPlace(call, unspillTreeReg, i); - getEmitter()->emitIns_R_S(ins_Load(dstType), emitActualTypeSize(dstType), dstReg, t->tdTempNum(), - 0); - compiler->tmpRlsTemp(t); - gcInfo.gcMarkRegPtrVal(dstReg, dstType); - } - } - - unspillTree->gtFlags &= ~GTF_SPILLED; - unspillTree->SetInReg(); - } - else - { - TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum); - getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitActualTypeSize(unspillTree->TypeGet()), dstReg, - t->tdTempNum(), 0); - compiler->tmpRlsTemp(t); - - unspillTree->gtFlags &= ~GTF_SPILLED; - unspillTree->SetInReg(); - gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet()); - } - } -} - -// Do Liveness update for a subnodes that is being consumed by codegen -// including the logic for reload in case is needed and also takes care -// of locating the value on the desired register. -void CodeGen::genConsumeRegAndCopy(GenTree* tree, regNumber needReg) -{ - regNumber treeReg = genConsumeReg(tree); - if (treeReg != needReg) - { - var_types targetType = tree->TypeGet(); - inst_RV_RV(ins_Copy(targetType), needReg, treeReg, targetType); - } -} - void CodeGen::genRegCopy(GenTree* treeNode) { assert(treeNode->OperGet() == GT_COPY); @@ -5049,261 +4199,6 @@ void CodeGen::genRegCopy(GenTree* treeNode) genProduceReg(treeNode); } -// Do liveness update for a subnode that is being consumed by codegen. -// TODO-Cleanup: move to CodeGenCommon.cpp -regNumber CodeGen::genConsumeReg(GenTree* tree) -{ - if (tree->OperGet() == GT_COPY) - { - genRegCopy(tree); - } - // Handle the case where we have a lclVar that needs to be copied before use (i.e. because it - // interferes with one of the other sources (or the target, if it's a "delayed use" register)). - // TODO-Cleanup: This is a special copyReg case in LSRA - consider eliminating these and - // always using GT_COPY to make the lclVar location explicit. - // Note that we have to do this before calling genUpdateLife because otherwise if we spill it - // the lvRegNum will be set to REG_STK and we will lose track of what register currently holds - // the lclVar (normally when a lclVar is spilled it is then used from its former register - // location, which matches the gtRegNum on the node). - // (Note that it doesn't matter if we call this before or after genUnspillRegIfNeeded - // because if it's on the stack it will always get reloaded into tree->gtRegNum). - if (genIsRegCandidateLocal(tree)) - { - GenTreeLclVarCommon* lcl = tree->AsLclVarCommon(); - LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()]; - if ((varDsc->lvRegNum != REG_STK) && (varDsc->lvRegNum != tree->gtRegNum)) - { - inst_RV_RV(ins_Copy(tree->TypeGet()), tree->gtRegNum, varDsc->lvRegNum); - } - } - - genUnspillRegIfNeeded(tree); - - // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar - genUpdateLife(tree); - assert(tree->gtRegNum != REG_NA); - - // there are three cases where consuming a reg means clearing the bit in the live mask - // 1. it was not produced by a local - // 2. it was produced by a local that is going dead - // 3. it was produced by a local that does not live in that reg (like one allocated on the stack) - - if (genIsRegCandidateLocal(tree)) - { - GenTreeLclVarCommon* lcl = tree->AsLclVarCommon(); - LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()]; - assert(varDsc->lvLRACandidate); - - if ((tree->gtFlags & GTF_VAR_DEATH) != 0) - { - gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->lvRegNum)); - } - else if (varDsc->lvRegNum == REG_STK) - { - // We have loaded this into a register only temporarily - gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum)); - } - } - else - { - gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum)); - } - - return tree->gtRegNum; -} - -// Do liveness update for an address tree: one of GT_LEA, GT_LCL_VAR, or GT_CNS_INT (for call indirect). -// TODO-Cleanup: move to CodeGenCommon.cpp -void CodeGen::genConsumeAddress(GenTree* addr) -{ - if (addr->OperGet() == GT_LEA) - { - genConsumeAddrMode(addr->AsAddrMode()); - } - else if (!addr->isContained()) - { - genConsumeReg(addr); - } -} - -// do liveness update for a subnode that is being consumed by codegen -// TODO-Cleanup: move to CodeGenCommon.cpp -void CodeGen::genConsumeAddrMode(GenTreeAddrMode* addr) -{ - if (addr->Base()) - genConsumeReg(addr->Base()); - if (addr->Index()) - genConsumeReg(addr->Index()); -} - -// TODO-Cleanup: move to CodeGenCommon.cpp -void CodeGen::genConsumeRegs(GenTree* tree) -{ - if (tree->isContained()) - { - if (tree->isIndir()) - { - genConsumeAddress(tree->AsIndir()->Addr()); - } - else if (tree->OperGet() == GT_AND) - { - // This is the special contained GT_AND that we created in Lowering::LowerCmp() - // Now we need to consume the operands of the GT_AND node. - genConsumeOperands(tree->AsOp()); - } - else - { - assert(tree->OperIsLeaf()); - } - } - else - { - genConsumeReg(tree); - } -} - -//------------------------------------------------------------------------ -// genConsumeOperands: Do liveness update for the operands of a unary or binary tree -// -// Arguments: -// tree - the GenTreeOp whose operands will have their liveness updated. -// -// Return Value: -// None. -// -// Notes: -// Note that this logic is localized here because we must do the liveness update in -// the correct execution order. This is important because we may have two operands -// that involve the same lclVar, and if one is marked "lastUse" we must handle it -// after the first. -// TODO-Cleanup: move to CodeGenCommon.cpp - -void CodeGen::genConsumeOperands(GenTreeOp* tree) -{ - GenTree* firstOp = tree->gtOp1; - GenTree* secondOp = tree->gtOp2; - if ((tree->gtFlags & GTF_REVERSE_OPS) != 0) - { - assert(secondOp != nullptr); - firstOp = secondOp; - secondOp = tree->gtOp1; - } - if (firstOp != nullptr) - { - genConsumeRegs(firstOp); - } - if (secondOp != nullptr) - { - genConsumeRegs(secondOp); - } -} - -// do liveness update for register produced by the current node in codegen -// TODO-Cleanup: move to CodeGenCommon.cpp -void CodeGen::genProduceReg(GenTree* tree) -{ - if (tree->gtFlags & GTF_SPILL) - { - if (genIsRegCandidateLocal(tree)) - { - // Store local variable to its home location. - tree->gtFlags &= ~GTF_REG_VAL; - inst_TT_RV(ins_Store(tree->gtType, compiler->isSIMDTypeLocalAligned(tree->gtLclVarCommon.gtLclNum)), tree, - tree->gtRegNum); - } - else - { - tree->SetInReg(); - regSet.rsSpillTree(tree->gtRegNum, tree); - tree->gtFlags |= GTF_SPILLED; - tree->gtFlags &= ~GTF_SPILL; - gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum)); - return; - } - } - - genUpdateLife(tree); - - // If we've produced a register, mark it as a pointer, as needed. - if (tree->gtHasReg()) - { - // We only mark the register in the following cases: - // 1. It is not a register candidate local. In this case, we're producing a - // register from a local, but the local is not a register candidate. Thus, - // we must be loading it as a temp register, and any "last use" flag on - // the register wouldn't be relevant. - // 2. The register candidate local is going dead. There's no point to mark - // the register as live, with a GC pointer, if the variable is dead. - if (!genIsRegCandidateLocal(tree) || ((tree->gtFlags & GTF_VAR_DEATH) == 0)) - { - gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet()); - } - } - tree->SetInReg(); -} - -// transfer gc/byref status of src reg to dst reg -// TODO-Cleanup: move to CodeGenCommon.cpp -void CodeGen::genTransferRegGCState(regNumber dst, regNumber src) -{ - regMaskTP srcMask = genRegMask(src); - regMaskTP dstMask = genRegMask(dst); - - if (gcInfo.gcRegGCrefSetCur & srcMask) - { - gcInfo.gcMarkRegSetGCref(dstMask); - } - else if (gcInfo.gcRegByrefSetCur & srcMask) - { - gcInfo.gcMarkRegSetByref(dstMask); - } - else - { - gcInfo.gcMarkRegSetNpt(dstMask); - } -} - -// generates an ip-relative call or indirect call via reg ('call reg') -// pass in 'addr' for a relative call or 'base' for a indirect register call -// methHnd - optional, only used for pretty printing -// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC) -// TODO-Cleanup: move to CodeGenCommon.cpp -void CodeGen::genEmitCall(int callType, - CORINFO_METHOD_HANDLE methHnd, - INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) void* addr, - emitAttr retSize, - emitAttr secondRetSize, - IL_OFFSETX ilOffset, - regNumber base, - bool isJump, - bool isNoGC) -{ - - getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, 0, - retSize, secondRetSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, - gcInfo.gcRegByrefSetCur, ilOffset, base, REG_NA, 0, 0, isJump, - emitter::emitNoGChelper(compiler->eeGetHelperNum(methHnd))); -} - -// generates an indirect call via addressing mode (call []) given an indir node -// methHnd - optional, only used for pretty printing -// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC) -// TODO-Cleanup: move to CodeGenCommon.cpp -void CodeGen::genEmitCall(int callType, - CORINFO_METHOD_HANDLE methHnd, - INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) GenTreeIndir* indir, - emitAttr retSize, - emitAttr secondRetSize, - IL_OFFSETX ilOffset) -{ - genConsumeAddress(indir->Addr()); - - getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) nullptr, 0, - retSize, secondRetSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, - gcInfo.gcRegByrefSetCur, ilOffset, indir->Base() ? indir->Base()->gtRegNum : REG_NA, - indir->Index() ? indir->Index()->gtRegNum : REG_NA, indir->Scale(), indir->Offset()); -} - // Produce code for a GT_CALL node void CodeGen::genCallInstruction(GenTreePtr node) { @@ -5321,7 +4216,7 @@ void CodeGen::genCallInstruction(GenTreePtr node) // Consume all the arg regs for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) { - assert(list->IsList()); + assert(list->OperIsList()); GenTreePtr argNode = list->Current(); @@ -5332,7 +4227,7 @@ void CodeGen::genCallInstruction(GenTreePtr node) continue; // Deal with multi register passed struct args. - if (argNode->OperGet() == GT_LIST) + if (argNode->OperGet() == GT_FIELD_LIST) { GenTreeArgList* argListPtr = argNode->AsArgList(); unsigned iterationNum = 0; @@ -5457,7 +4352,6 @@ void CodeGen::genCallInstruction(GenTreePtr node) } } -#ifdef DEBUGGING_SUPPORT // We need to propagate the IL offset information to the call instruction, so we can emit // an IL to native mapping record for the call, to support managed return value debugging. // We don't want tail call helper calls that were converted from normal calls to get a record, @@ -5466,7 +4360,6 @@ void CodeGen::genCallInstruction(GenTreePtr node) { (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset); } -#endif // DEBUGGING_SUPPORT if (target != nullptr) { @@ -6703,7 +5596,7 @@ void CodeGen::genIntrinsic(GenTreePtr treeNode) // Return value: // None // -void CodeGen::genPutArgStk(GenTreePtr treeNode) +void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) { assert(treeNode->OperGet() == GT_PUTARG_STK); var_types targetType = treeNode->TypeGet(); @@ -6759,7 +5652,7 @@ void CodeGen::genPutArgStk(GenTreePtr treeNode) varNumOut = compiler->lvaOutgoingArgSpaceVar; argOffsetMax = compiler->lvaOutgoingArgSpaceSize; } - bool isStruct = (targetType == TYP_STRUCT) || (source->OperGet() == GT_LIST); + bool isStruct = (targetType == TYP_STRUCT) || (source->OperGet() == GT_FIELD_LIST); if (!isStruct) // a normal non-Struct argument { @@ -6785,24 +5678,24 @@ void CodeGen::genPutArgStk(GenTreePtr treeNode) { assert(source->isContained()); // We expect that this node was marked as contained in LowerArm64 - if (source->OperGet() == GT_LIST) + if (source->OperGet() == GT_FIELD_LIST) { // Deal with the multi register passed struct args. - GenTreeArgList* argListPtr = source->AsArgList(); + GenTreeFieldList* fieldListPtr = source->AsFieldList(); - // Evaluate each of the GT_LIST items into their register + // Evaluate each of the GT_FIELD_LIST items into their register // and store their register into the outgoing argument area - for (; argListPtr != nullptr; argListPtr = argListPtr->Rest()) + for (; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest()) { - GenTreePtr nextArgNode = argListPtr->gtOp.gtOp1; + GenTreePtr nextArgNode = fieldListPtr->gtOp.gtOp1; genConsumeReg(nextArgNode); regNumber reg = nextArgNode->gtRegNum; var_types type = nextArgNode->TypeGet(); emitAttr attr = emitTypeSize(type); - // Emit store instructions to store the registers produced by the GT_LIST into the outgoing argument - // area + // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing + // argument area emit->emitIns_S_R(ins_Store(type), attr, reg, varNumOut, argOffsetOut); argOffsetOut += EA_SIZE_IN_BYTES(attr); assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area @@ -7159,7 +6052,6 @@ void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize // Now we can actually use those slot ID's to declare live ranges. gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK); -#if defined(DEBUGGING_SUPPORT) if (compiler->opts.compDbgEnC) { // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp) @@ -7183,7 +6075,6 @@ void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize // frame gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize); } -#endif gcInfoEncoder->Build(); @@ -7249,58 +6140,6 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regTracker.rsTrashRegsForGCInterruptability(); } -/*****************************************************************************/ -#ifdef DEBUGGING_SUPPORT -/***************************************************************************** - * genSetScopeInfo - * - * Called for every scope info piece to record by the main genSetScopeInfo() - */ - -// TODO-Cleanup: move to CodeGenCommon.cpp -void CodeGen::genSetScopeInfo(unsigned which, - UNATIVE_OFFSET startOffs, - UNATIVE_OFFSET length, - unsigned varNum, - unsigned LVnum, - bool avail, - Compiler::siVarLoc& varLoc) -{ - /* We need to do some mapping while reporting back these variables */ - - unsigned ilVarNum = compiler->compMap2ILvarNum(varNum); - noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM); - - VarName name = nullptr; - -#ifdef DEBUG - - for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++) - { - if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum) - { - name = compiler->info.compVarScopes[scopeNum].vsdName; - } - } - - // Hang on to this compiler->info. - - TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which]; - - tlvi.tlviVarNum = ilVarNum; - tlvi.tlviLVnum = LVnum; - tlvi.tlviName = name; - tlvi.tlviStartPC = startOffs; - tlvi.tlviLength = length; - tlvi.tlviAvailable = avail; - tlvi.tlviVarLoc = varLoc; - -#endif // DEBUG - - compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc); -} -#endif // DEBUGGING_SUPPORT - /***************************************************************************** * Unit testing of the ARM64 emitter: generate a bunch of instructions into the prolog * (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late diff --git a/src/jit/codegenclassic.h b/src/jit/codegenclassic.h index 81b7b34194..3a88c83915 100644 --- a/src/jit/codegenclassic.h +++ b/src/jit/codegenclassic.h @@ -63,10 +63,6 @@ void genPInvokeCallEpilog(LclVarDsc* varDsc, regMaskTP retVal); regNumber genLclHeap(GenTreePtr size); -void genSinglePush(); - -void genSinglePop(); - void genDyingVars(VARSET_VALARG_TP beforeSet, VARSET_VALARG_TP afterSet); bool genContainsVarDeath(GenTreePtr from, GenTreePtr to, unsigned varNum); @@ -287,9 +283,6 @@ void genCodeForJumpTable(GenTreePtr tree); void genCodeForSwitchTable(GenTreePtr tree); void genCodeForSwitch(GenTreePtr tree); -regMaskTP genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs); -void genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs); - size_t genPushArgList(GenTreePtr call); #ifdef _TARGET_ARM_ diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index 2710447ade..240911523f 100755..100644 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -103,6 +103,10 @@ CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler) u8ToDblBitmask = nullptr; #endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87 +#if defined(FEATURE_PUT_STRUCT_ARG_STK) && !defined(_TARGET_X86_) + m_stkArgVarNum = BAD_VAR_NUM; +#endif + regTracker.rsTrackInit(compiler, ®Set); gcInfo.regSet = ®Set; m_cgEmitter = new (compiler->getAllocator()) emitter(); @@ -163,12 +167,10 @@ CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler) genFlagsEqualToNone(); #endif // LEGACY_BACKEND -#ifdef DEBUGGING_SUPPORT // Initialize the IP-mapping logic. compiler->genIPmappingList = nullptr; compiler->genIPmappingLast = nullptr; compiler->genCallSite2ILOffsetMap = nullptr; -#endif /* Assume that we not fully interruptible */ @@ -359,7 +361,7 @@ void CodeGen::genPrepForCompiler() { VarSetOps::AddElemD(compiler, compiler->raRegVarsMask, varDsc->lvVarIndex); } - else if (compiler->lvaIsGCTracked(varDsc) && (!varDsc->lvIsParam || varDsc->lvIsRegArg)) + else if (compiler->lvaIsGCTracked(varDsc)) { VarSetOps::AddElemD(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex); } @@ -646,23 +648,32 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper) return RBM_RSI | RBM_RDI | RBM_CALLEE_TRASH; #elif defined(_TARGET_ARM64_) return RBM_CALLEE_TRASH_NOGC; +#elif defined(_TARGET_X86_) + return RBM_ESI | RBM_EDI | RBM_ECX; #else NYI("Model kill set for CORINFO_HELP_ASSIGN_BYREF on target arch"); return RBM_CALLEE_TRASH; #endif case CORINFO_HELP_PROF_FCN_ENTER: -#ifdef _TARGET_AMD64_ +#ifdef RBM_PROFILER_ENTER_TRASH return RBM_PROFILER_ENTER_TRASH; #else - unreached(); + NYI("Model kill set for CORINFO_HELP_PROF_FCN_ENTER on target arch"); #endif + case CORINFO_HELP_PROF_FCN_LEAVE: - case CORINFO_HELP_PROF_FCN_TAILCALL: -#ifdef _TARGET_AMD64_ +#ifdef RBM_PROFILER_LEAVE_TRASH return RBM_PROFILER_LEAVE_TRASH; #else - unreached(); + NYI("Model kill set for CORINFO_HELP_PROF_FCN_LEAVE on target arch"); +#endif + + case CORINFO_HELP_PROF_FCN_TAILCALL: +#ifdef RBM_PROFILER_TAILCALL_TRASH + return RBM_PROFILER_TAILCALL_TRASH; +#else + NYI("Model kill set for CORINFO_HELP_PROF_FCN_TAILCALL on target arch"); #endif case CORINFO_HELP_STOP_FOR_GC: @@ -685,26 +696,34 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper) regMaskTP Compiler::compNoGCHelperCallKillSet(CorInfoHelpFunc helper) { assert(emitter::emitNoGChelper(helper)); -#ifdef _TARGET_AMD64_ + switch (helper) { +#if defined(_TARGET_AMD64_) || defined(_TARGET_X86_) case CORINFO_HELP_PROF_FCN_ENTER: return RBM_PROFILER_ENTER_TRASH; case CORINFO_HELP_PROF_FCN_LEAVE: - case CORINFO_HELP_PROF_FCN_TAILCALL: return RBM_PROFILER_LEAVE_TRASH; + case CORINFO_HELP_PROF_FCN_TAILCALL: + return RBM_PROFILER_TAILCALL_TRASH; +#endif // defined(_TARGET_AMD64_) || defined(_TARGET_X86_) + case CORINFO_HELP_ASSIGN_BYREF: +#if defined(_TARGET_AMD64_) // this helper doesn't trash RSI and RDI return RBM_CALLEE_TRASH_NOGC & ~(RBM_RSI | RBM_RDI); +#elif defined(_TARGET_X86_) + // This helper only trashes ECX. + return RBM_ECX; +#else + return RBM_CALLEE_TRASH_NOGC; +#endif // defined(_TARGET_AMD64_) default: return RBM_CALLEE_TRASH_NOGC; } -#else - return RBM_CALLEE_TRASH_NOGC; -#endif } // Update liveness (always var liveness, i.e., compCurLife, and also, if "ForCodeGen" is true, reg liveness, i.e., @@ -1004,9 +1023,7 @@ void Compiler::compUpdateLifeVar(GenTreePtr tree, VARSET_TP* pLastUseVars) #endif // LEGACY_BACKEND -#ifdef DEBUGGING_SUPPORT codeGen->siUpdate(); -#endif } } @@ -1172,9 +1189,7 @@ void Compiler::compChangeLife(VARSET_VALARG_TP newLife DEBUGARG(GenTreePtr tree) #endif // !LEGACY_BACKEND } -#ifdef DEBUGGING_SUPPORT codeGen->siUpdate(); -#endif } // Need an explicit instantiation. @@ -1626,6 +1641,44 @@ void CodeGen::genAdjustSP(ssize_t delta) inst_RV_IV(INS_add, REG_SPBASE, delta, EA_PTRSIZE); } +//------------------------------------------------------------------------ +// genAdjustStackLevel: Adjust the stack level, if required, for a throw helper block +// +// Arguments: +// block - The BasicBlock for which we are about to generate code. +// +// Assumptions: +// Must be called just prior to generating code for 'block'. +// +// Notes: +// This only makes an adjustment if !FEATURE_FIXED_OUT_ARGS, if there is no frame pointer, +// and if 'block' is a throw helper block with a non-zero stack level. + +void CodeGen::genAdjustStackLevel(BasicBlock* block) +{ +#if !FEATURE_FIXED_OUT_ARGS + // Check for inserted throw blocks and adjust genStackLevel. + + if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block)) + { + noway_assert(block->bbFlags & BBF_JMP_TARGET); + + genStackLevel = compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int); + + if (genStackLevel != 0) + { +#ifdef _TARGET_X86_ + getEmitter()->emitMarkStackLvl(genStackLevel); + inst_RV_IV(INS_add, REG_SPBASE, genStackLevel, EA_PTRSIZE); + genStackLevel = 0; +#else // _TARGET_X86_ + NYI("Need emitMarkStackLvl()"); +#endif // _TARGET_X86_ + } + } +#endif // !FEATURE_FIXED_OUT_ARGS +} + #ifdef _TARGET_ARM_ // return size // alignmentWB is out param @@ -2539,14 +2592,12 @@ emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind void CodeGen::genExitCode(BasicBlock* block) { -#ifdef DEBUGGING_SUPPORT /* Just wrote the first instruction of the epilog - inform debugger Note that this may result in a duplicate IPmapping entry, and that this is ok */ // For non-optimized debuggable code, there is only one epilog. genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::EPILOG, true); -#endif // DEBUGGING_SUPPORT bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0); if (compiler->getNeedsGSSecurityCookie()) @@ -2968,7 +3019,7 @@ void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode) #if defined(DEBUG) , (compiler->compCodeOpt() != Compiler::SMALL_CODE) && - !(compiler->opts.eeFlags & CORJIT_FLG_PREJIT) + !compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) #endif #ifdef LEGACY_BACKEND , @@ -3095,7 +3146,8 @@ void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode) We need to relax the assert as our estimation won't include code-gen stack changes (which we know don't affect fgAddCodeRef()) */ noway_assert(getEmitter()->emitMaxStackDepth <= - (compiler->fgPtrArgCntMax + compiler->compHndBBtabCount + // Return address for locally-called finallys + (compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments. + compiler->compHndBBtabCount + // Return address for locally-called finallys genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc (compiler->compTailCallUsed ? 4 : 0))); // CORINFO_HELP_TAILCALL args #endif @@ -3116,8 +3168,6 @@ void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode) compiler->unwindEmit(*codePtr, coldCodePtr); -#ifdef DEBUGGING_SUPPORT - /* Finalize the line # tracking logic after we know the exact block sizes/offsets */ genIPmappingGen(); @@ -3126,8 +3176,6 @@ void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode) genSetScopeInfo(); -#endif // DEBUGGING_SUPPORT - #ifdef LATE_DISASM unsigned finalHotCodeSize; unsigned finalColdCodeSize; @@ -3272,6 +3320,8 @@ void CodeGen::genReportEH() EHblkDsc* HBtab; EHblkDsc* HBtabEnd; + bool isCoreRTABI = compiler->IsTargetAbi(CORINFO_CORERT_ABI); + unsigned EHCount = compiler->compHndBBtabCount; #if FEATURE_EH_FUNCLETS @@ -3279,46 +3329,55 @@ void CodeGen::genReportEH() // VM. unsigned duplicateClauseCount = 0; unsigned enclosingTryIndex; - for (XTnum = 0; XTnum < compiler->compHndBBtabCount; XTnum++) + + // Duplicate clauses are not used by CoreRT ABI + if (!isCoreRTABI) { - for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index, - // ignoring 'mutual protect' trys - enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX; - enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex)) + for (XTnum = 0; XTnum < compiler->compHndBBtabCount; XTnum++) { - ++duplicateClauseCount; + for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index, + // ignoring 'mutual protect' trys + enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX; + enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex)) + { + ++duplicateClauseCount; + } } + EHCount += duplicateClauseCount; } - EHCount += duplicateClauseCount; #if FEATURE_EH_CALLFINALLY_THUNKS unsigned clonedFinallyCount = 0; - // We don't keep track of how many cloned finally there are. So, go through and count. - // We do a quick pass first through the EH table to see if there are any try/finally - // clauses. If there aren't, we don't need to look for BBJ_CALLFINALLY. - - bool anyFinallys = false; - for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount; - HBtab < HBtabEnd; HBtab++) + // Duplicate clauses are not used by CoreRT ABI + if (!isCoreRTABI) { - if (HBtab->HasFinallyHandler()) + // We don't keep track of how many cloned finally there are. So, go through and count. + // We do a quick pass first through the EH table to see if there are any try/finally + // clauses. If there aren't, we don't need to look for BBJ_CALLFINALLY. + + bool anyFinallys = false; + for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount; + HBtab < HBtabEnd; HBtab++) { - anyFinallys = true; - break; + if (HBtab->HasFinallyHandler()) + { + anyFinallys = true; + break; + } } - } - if (anyFinallys) - { - for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext) + if (anyFinallys) { - if (block->bbJumpKind == BBJ_CALLFINALLY) + for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext) { - ++clonedFinallyCount; + if (block->bbJumpKind == BBJ_CALLFINALLY) + { + ++clonedFinallyCount; + } } - } - EHCount += clonedFinallyCount; + EHCount += clonedFinallyCount; + } } #endif // FEATURE_EH_CALLFINALLY_THUNKS @@ -3373,6 +3432,23 @@ void CodeGen::genReportEH() CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(HBtab->ebdHandlerType); + if (isCoreRTABI && (XTnum > 0)) + { + // For CoreRT, CORINFO_EH_CLAUSE_SAMETRY flag means that the current clause covers same + // try block as the previous one. The runtime cannot reliably infer this information from + // native code offsets because of different try blocks can have same offsets. Alternative + // solution to this problem would be inserting extra nops to ensure that different try + // blocks have different offsets. + if (EHblkDsc::ebdIsSameTry(HBtab, HBtab - 1)) + { + // The SAMETRY bit should only be set on catch clauses. This is ensured in IL, where only 'catch' is + // allowed to be mutually-protect. E.g., the C# "try {} catch {} catch {} finally {}" actually exists in + // IL as "try { try {} catch {} catch {} } finally {}". + assert(HBtab->HasCatchHandler()); + flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | CORINFO_EH_CLAUSE_SAMETRY); + } + } + // Note that we reuse the CORINFO_EH_CLAUSE type, even though the names of // the fields aren't accurate. @@ -3578,9 +3654,7 @@ void CodeGen::genReportEH() CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(encTab->ebdHandlerType); // Tell the VM this is an extra clause caused by moving funclets out of line. - // It seems weird this is from the CorExceptionFlag enum in corhdr.h, - // not the CORINFO_EH_CLAUSE_FLAGS enum in corinfo.h. - flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | COR_ILEXCEPTION_CLAUSE_DUPLICATED); + flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | CORINFO_EH_CLAUSE_DUPLICATE); // Note that the JIT-EE interface reuses the CORINFO_EH_CLAUSE type, even though the names of // the fields aren't really accurate. For example, we set "TryLength" to the offset of the @@ -3617,7 +3691,7 @@ void CodeGen::genReportEH() } // if (duplicateClauseCount > 0) #if FEATURE_EH_CALLFINALLY_THUNKS - if (anyFinallys) + if (clonedFinallyCount > 0) { unsigned reportedClonedFinallyCount = 0; for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext) @@ -3647,9 +3721,9 @@ void CodeGen::genReportEH() CORINFO_EH_CLAUSE clause; clause.ClassToken = 0; // unused - clause.Flags = (CORINFO_EH_CLAUSE_FLAGS)(CORINFO_EH_CLAUSE_FINALLY | COR_ILEXCEPTION_CLAUSE_DUPLICATED); - clause.TryOffset = hndBeg; - clause.TryLength = hndBeg; + clause.Flags = (CORINFO_EH_CLAUSE_FLAGS)(CORINFO_EH_CLAUSE_FINALLY | CORINFO_EH_CLAUSE_DUPLICATE); + clause.TryOffset = hndBeg; + clause.TryLength = hndBeg; clause.HandlerOffset = hndBeg; clause.HandlerLength = hndEnd; @@ -3671,7 +3745,7 @@ void CodeGen::genReportEH() } // for each block assert(clonedFinallyCount == reportedClonedFinallyCount); - } // if (anyFinallys) + } // if (clonedFinallyCount > 0) #endif // FEATURE_EH_CALLFINALLY_THUNKS #endif // FEATURE_EH_FUNCLETS @@ -6995,12 +7069,12 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, noway_assert(varTypeIsGC(varDsc->TypeGet()) || (varDsc->TypeGet() == TYP_STRUCT) || compiler->info.compInitMem || compiler->opts.compDbgCode); -#ifdef _TARGET_64BIT_ +#ifndef LEGACY_BACKEND if (!varDsc->lvOnFrame) { continue; } -#else // !_TARGET_64BIT_ +#else // LEGACY_BACKEND if (varDsc->lvRegister) { if (varDsc->lvOnFrame) @@ -7016,7 +7090,7 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, } continue; } -#endif // !_TARGET_64BIT_ +#endif // LEGACY_BACKEND if ((varDsc->TypeGet() == TYP_STRUCT) && !compiler->info.compInitMem && (varDsc->lvExactSize >= TARGET_POINTER_SIZE)) @@ -7221,11 +7295,31 @@ void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed) #ifdef PROFILING_SUPPORTED -/*----------------------------------------------------------------------------- - * - * Generate the profiling function enter callback. - */ - +//----------------------------------------------------------------------------------- +// genProfilingEnterCallback: Generate the profiling function enter callback. +// +// Arguments: +// initReg - register to use as scratch register +// pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is +// not zero after this call. +// +// Return Value: +// None +// +// Notes: +// The x86 profile enter helper has the following requirements (see ProfileEnterNaked in +// VM\i386\asmhelpers.asm for details): +// 1. The calling sequence for calling the helper is: +// push FunctionIDOrClientID +// call ProfileEnterHelper +// 2. The calling function has an EBP frame. +// 3. EBP points to the saved ESP which is the first thing saved in the function. Thus, +// the following prolog is assumed: +// push ESP +// mov EBP, ESP +// 4. All registers are preserved. +// 5. The helper pops the FunctionIDOrClientID argument from the stack. +// void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) { assert(compiler->compGeneratingProlog); @@ -7236,7 +7330,6 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) return; } -#ifndef LEGACY_BACKEND #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) // No profiling for System V systems yet. unsigned varNum; LclVarDsc* varDsc; @@ -7280,7 +7373,7 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) else { // No need to record relocations, if we are generating ELT hooks under the influence - // of complus_JitELtHookEnabled=1 + // of COMPlus_JitELTHookEnabled=1 if (compiler->opts.compJitELTHookEnabled) { genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL); @@ -7346,11 +7439,7 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) *pInitRegZeroed = false; } -#else //!_TARGET_AMD64_ - NYI("RyuJIT: Emit Profiler Enter callback"); -#endif - -#else // LEGACY_BACKEND +#elif defined(_TARGET_X86_) || (defined(_TARGET_ARM_) && defined(LEGACY_BACKEND)) unsigned saveStackLvl2 = genStackLevel; @@ -7423,17 +7512,41 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) /* Restore the stack level */ genStackLevel = saveStackLvl2; -#endif // LEGACY_BACKEND -} -/***************************************************************************** - * - * Generates Leave profiler hook. - * Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node. - */ +#else // target + NYI("Emit Profiler Enter callback"); +#endif // target +} +//----------------------------------------------------------------------------------- +// genProfilingLeaveCallback: Generate the profiling function leave or tailcall callback. +// Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node. +// +// Arguments: +// helper - which helper to call. Either CORINFO_HELP_PROF_FCN_LEAVE or CORINFO_HELP_PROF_FCN_TAILCALL +// +// Return Value: +// None +// +// Notes: +// The x86 profile leave/tailcall helper has the following requirements (see ProfileLeaveNaked and +// ProfileTailcallNaked in VM\i386\asmhelpers.asm for details): +// 1. The calling sequence for calling the helper is: +// push FunctionIDOrClientID +// call ProfileLeaveHelper or ProfileTailcallHelper +// 2. The calling function has an EBP frame. +// 3. EBP points to the saved ESP which is the first thing saved in the function. Thus, +// the following prolog is assumed: +// push ESP +// mov EBP, ESP +// 4. helper == CORINFO_HELP_PROF_FCN_LEAVE: All registers are preserved. +// helper == CORINFO_HELP_PROF_FCN_TAILCALL: Only argument registers are preserved. +// 5. The helper pops the FunctionIDOrClientID argument from the stack. +// void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FCN_LEAVE*/) { + assert((helper == CORINFO_HELP_PROF_FCN_LEAVE) || (helper == CORINFO_HELP_PROF_FCN_TAILCALL)); + // Only hook if profiler says it's okay. if (!compiler->compIsProfilerHookNeeded()) { @@ -7442,12 +7555,11 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FC compiler->info.compProfilerCallback = true; - // Need to save on to the stack level, since the callee will pop the argument + // Need to save on to the stack level, since the helper call will pop the argument unsigned saveStackLvl2 = genStackLevel; -#ifndef LEGACY_BACKEND - #if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) // No profiling for System V systems yet. + // Since the method needs to make a profiler callback, it should have out-going arg space allocated. noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM); noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES)); @@ -7477,7 +7589,7 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FC else { // Don't record relocations, if we are generating ELT hooks under the influence - // of complus_JitELtHookEnabled=1 + // of COMPlus_JitELTHookEnabled=1 if (compiler->opts.compJitELTHookEnabled) { genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL); @@ -7517,13 +7629,8 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FC // "mov r8, helper addr; call r8" genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_ARG_2); -#else //!_TARGET_AMD64_ - NYI("RyuJIT: Emit Profiler Leave callback"); -#endif // _TARGET_* - -#else // LEGACY_BACKEND +#elif defined(_TARGET_X86_) -#if defined(_TARGET_X86_) // // Push the profilerHandle // @@ -7538,7 +7645,7 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FC } genSinglePush(); - genEmitHelperCall(CORINFO_HELP_PROF_FCN_LEAVE, + genEmitHelperCall(helper, sizeof(int) * 1, // argSize EA_UNKNOWN); // retSize @@ -7549,7 +7656,9 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FC { compiler->fgPtrArgCntMax = 1; } -#elif defined(_TARGET_ARM_) + +#elif defined(LEGACY_BACKEND) && defined(_TARGET_ARM_) + // // Push the profilerHandle // @@ -7571,9 +7680,9 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FC bool r0Trashed; emitAttr attr = EA_UNKNOWN; - if (compiler->info.compRetType == TYP_VOID || - (!compiler->info.compIsVarArgs && !compiler->opts.compUseSoftFP && (varTypeIsFloating(compiler->info.compRetType) || - compiler->IsHfa(compiler->info.compMethodInfo->args.retTypeClass)))) + if (compiler->info.compRetType == TYP_VOID || (!compiler->info.compIsVarArgs && !compiler->opts.compUseSoftFP && + (varTypeIsFloating(compiler->info.compRetType) || + compiler->IsHfa(compiler->info.compMethodInfo->args.retTypeClass)))) { r0Trashed = false; } @@ -7625,11 +7734,10 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FC } regSet.rsUnlockReg(RBM_PROFILER_RET_USED); -#else // _TARGET_* - NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking them"); -#endif // _TARGET_* -#endif // LEGACY_BACKEND +#else // target + NYI("Emit Profiler Leave callback"); +#endif // target /* Restore the stack level */ genStackLevel = saveStackLvl2; @@ -7741,7 +7849,7 @@ void CodeGen::genPrologPadForReJit() assert(compiler->compGeneratingProlog); #ifdef _TARGET_XARCH_ - if (!(compiler->opts.eeFlags & CORJIT_FLG_PROF_REJIT_NOPS)) + if (!compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_REJIT_NOPS)) { return; } @@ -8165,11 +8273,9 @@ void CodeGen::genFnProlog() getEmitter()->emitBegProlog(); compiler->unwindBegProlog(); -#ifdef DEBUGGING_SUPPORT // Do this so we can put the prolog instruction group ahead of // other instruction groups genIPmappingAddToFront((IL_OFFSETX)ICorDebugInfo::PROLOG); -#endif // DEBUGGING_SUPPORT #ifdef DEBUG if (compiler->opts.dspCode) @@ -8178,13 +8284,11 @@ void CodeGen::genFnProlog() } #endif -#ifdef DEBUGGING_SUPPORT if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0)) { // Create new scopes for the method-parameters for the prolog-block. psiBegProlog(); } -#endif #ifdef DEBUG @@ -8664,12 +8768,6 @@ void CodeGen::genFnProlog() // when compInitMem is true the genZeroInitFrame will zero out the shadow SP slots if (compiler->ehNeedsShadowSPslots() && !compiler->info.compInitMem) { - /* - // size/speed option? - getEmitter()->emitIns_I_ARR(INS_mov, EA_PTRSIZE, 0, - REG_EBP, REG_NA, -compiler->lvaShadowSPfirstOffs); - */ - // The last slot is reserved for ICodeManager::FixContext(ppEndRegion) unsigned filterEndOffsetSlotOffs = compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*)); @@ -8707,9 +8805,8 @@ void CodeGen::genFnProlog() // Initialize any "hidden" slots/locals - if (compiler->compLocallocUsed) + if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM) { - noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM); #ifdef _TARGET_ARM64_ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_FPBASE, compiler->lvaLocAllocSPvar, 0); #else @@ -8870,12 +8967,10 @@ void CodeGen::genFnProlog() getEmitter()->emitMarkPrologEnd(); } -#ifdef DEBUGGING_SUPPORT if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0)) { psiEndProlog(); } -#endif if (hasGCRef) { @@ -8927,7 +9022,7 @@ void CodeGen::genFnProlog() // LEA EAX, &<VARARGS HANDLE> + EAX getEmitter()->emitIns_R_ARR(INS_lea, EA_PTRSIZE, REG_EAX, genFramePointerReg(), REG_EAX, offset); - if (varDsc->lvRegister) + if (varDsc->lvIsInReg()) { if (varDsc->lvRegNum != REG_EAX) { @@ -9637,7 +9732,7 @@ void CodeGen::genFnEpilog(BasicBlock* block) * |Pre-spill regs space | // This is only necessary to keep the PSP slot at the same offset * | | // in function and funclet * |-----------------------| - * | PSP slot | + * | PSP slot | // Omitted in CoreRT ABI * |-----------------------| * ~ possible 4 byte pad ~ * ~ for alignment ~ @@ -9936,7 +10031,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() * ~ possible 8 byte pad ~ * ~ for alignment ~ * |-----------------------| - * | PSP slot | + * | PSP slot | // Omitted in CoreRT ABI * |-----------------------| * | Outgoing arg space | // this only exists if the function makes a call * |-----------------------| <---- Initial SP @@ -10007,6 +10102,12 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // This is the end of the OS-reported prolog for purposes of unwinding compiler->unwindEndProlog(); + // If there is no PSPSym (CoreRT ABI), we are done. + if (compiler->lvaPSPSym == BAD_VAR_NUM) + { + return; + } + getEmitter()->emitIns_R_AR(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_ARG_0, genFuncletInfo.fiPSP_slot_InitialSP_offset); regTracker.rsTrackRegTrash(REG_FPBASE); @@ -10100,10 +10201,12 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() unsigned calleeFPRegsSavedSize = genCountBits(compiler->compCalleeFPRegsSavedMask) * XMM_REGSIZE_BYTES; unsigned FPRegsPad = (calleeFPRegsSavedSize > 0) ? AlignmentPad(totalFrameSize, XMM_REGSIZE_BYTES) : 0; + unsigned PSPSymSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? REGSIZE_BYTES : 0; + totalFrameSize += FPRegsPad // Padding before pushing entire xmm regs + calleeFPRegsSavedSize // pushed callee-saved float regs // below calculated 'pad' will go here - + REGSIZE_BYTES // PSPSym + + PSPSymSize // PSPSym + compiler->lvaOutgoingArgSpaceSize // outgoing arg space ; @@ -10111,7 +10214,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() genFuncletInfo.fiSpDelta = FPRegsPad // Padding to align SP on XMM_REGSIZE_BYTES boundary + calleeFPRegsSavedSize // Callee saved xmm regs - + pad + REGSIZE_BYTES // PSPSym + + pad + PSPSymSize // PSPSym + compiler->lvaOutgoingArgSpaceSize // outgoing arg space ; @@ -10124,12 +10227,14 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta); printf(" PSP slot Initial SP offset: %d\n", genFuncletInfo.fiPSP_slot_InitialSP_offset); } -#endif // DEBUG - assert(compiler->lvaPSPSym != BAD_VAR_NUM); - assert(genFuncletInfo.fiPSP_slot_InitialSP_offset == - compiler->lvaGetInitialSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and - // funclet! + if (compiler->lvaPSPSym != BAD_VAR_NUM) + { + assert(genFuncletInfo.fiPSP_slot_InitialSP_offset == + compiler->lvaGetInitialSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and + // funclet! + } +#endif // DEBUG } #elif defined(_TARGET_ARM64_) @@ -10249,13 +10354,12 @@ void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed) { assert(compiler->compGeneratingProlog); - if (!compiler->ehNeedsPSPSym()) + if (compiler->lvaPSPSym == BAD_VAR_NUM) { return; } - noway_assert(isFramePointerUsed()); // We need an explicit frame pointer - assert(compiler->lvaPSPSym != BAD_VAR_NUM); // We should have created the PSPSym variable + noway_assert(isFramePointerUsed()); // We need an explicit frame pointer #if defined(_TARGET_ARM_) @@ -10851,8 +10955,162 @@ unsigned CodeGen::getFirstArgWithStackSlot() #endif // !LEGACY_BACKEND && (_TARGET_XARCH_ || _TARGET_ARM64_) -/*****************************************************************************/ -#ifdef DEBUGGING_SUPPORT +//------------------------------------------------------------------------ +// genSinglePush: Report a change in stack level caused by a single word-sized push instruction +// +void CodeGen::genSinglePush() +{ + genStackLevel += sizeof(void*); +} + +//------------------------------------------------------------------------ +// genSinglePop: Report a change in stack level caused by a single word-sized pop instruction +// +void CodeGen::genSinglePop() +{ + genStackLevel -= sizeof(void*); +} + +//------------------------------------------------------------------------ +// genPushRegs: Push the given registers. +// +// Arguments: +// regs - mask or registers to push +// byrefRegs - OUT arg. Set to byref registers that were pushed. +// noRefRegs - OUT arg. Set to non-GC ref registers that were pushed. +// +// Return Value: +// Mask of registers pushed. +// +// Notes: +// This function does not check if the register is marked as used, etc. +// +regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs) +{ + *byrefRegs = RBM_NONE; + *noRefRegs = RBM_NONE; + + if (regs == RBM_NONE) + { + return RBM_NONE; + } + +#if FEATURE_FIXED_OUT_ARGS + + NYI("Don't call genPushRegs with real regs!"); + return RBM_NONE; + +#else // FEATURE_FIXED_OUT_ARGS + + noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_I_IMPL)); + noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL)); + + regMaskTP pushedRegs = regs; + + for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg)) + { + regMaskTP regBit = regMaskTP(1) << reg; + + if ((regBit & regs) == RBM_NONE) + continue; + + var_types type; + if (regBit & gcInfo.gcRegGCrefSetCur) + { + type = TYP_REF; + } + else if (regBit & gcInfo.gcRegByrefSetCur) + { + *byrefRegs |= regBit; + type = TYP_BYREF; + } + else if (noRefRegs != NULL) + { + *noRefRegs |= regBit; + type = TYP_I_IMPL; + } + else + { + continue; + } + + inst_RV(INS_push, reg, type); + + genSinglePush(); + gcInfo.gcMarkRegSetNpt(regBit); + + regs &= ~regBit; + } + + return pushedRegs; + +#endif // FEATURE_FIXED_OUT_ARGS +} + +//------------------------------------------------------------------------ +// genPopRegs: Pop the registers that were pushed by genPushRegs(). +// +// Arguments: +// regs - mask of registers to pop +// byrefRegs - The byref registers that were pushed by genPushRegs(). +// noRefRegs - The non-GC ref registers that were pushed by genPushRegs(). +// +// Return Value: +// None +// +void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs) +{ + if (regs == RBM_NONE) + { + return; + } + +#if FEATURE_FIXED_OUT_ARGS + + NYI("Don't call genPopRegs with real regs!"); + +#else // FEATURE_FIXED_OUT_ARGS + + noway_assert((regs & byrefRegs) == byrefRegs); + noway_assert((regs & noRefRegs) == noRefRegs); + noway_assert((regs & (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur)) == RBM_NONE); + + noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_INT)); + noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_INT)); + + // Walk the registers in the reverse order as genPushRegs() + for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg)) + { + regMaskTP regBit = regMaskTP(1) << reg; + + if ((regBit & regs) == RBM_NONE) + continue; + + var_types type; + if (regBit & byrefRegs) + { + type = TYP_BYREF; + } + else if (regBit & noRefRegs) + { + type = TYP_INT; + } + else + { + type = TYP_REF; + } + + inst_RV(INS_pop, reg, type); + genSinglePop(); + + if (type != TYP_INT) + gcInfo.gcMarkRegPtrVal(reg, type); + + regs &= ~regBit; + } + +#endif // FEATURE_FIXED_OUT_ARGS +} /***************************************************************************** * genSetScopeInfo @@ -11151,6 +11409,103 @@ void CodeGen::genSetScopeInfo() compiler->eeSetLVdone(); } +//------------------------------------------------------------------------ +// genSetScopeInfo: Record scope information for debug info +// +// Arguments: +// which +// startOffs - the starting offset for this scope +// length - the length of this scope +// varNum - the lclVar for this scope info +// LVnum +// avail +// varLoc +// +// Notes: +// Called for every scope info piece to record by the main genSetScopeInfo() + +void CodeGen::genSetScopeInfo(unsigned which, + UNATIVE_OFFSET startOffs, + UNATIVE_OFFSET length, + unsigned varNum, + unsigned LVnum, + bool avail, + Compiler::siVarLoc& varLoc) +{ + // We need to do some mapping while reporting back these variables. + + unsigned ilVarNum = compiler->compMap2ILvarNum(varNum); + noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM); + +#ifdef _TARGET_X86_ + // Non-x86 platforms are allowed to access all arguments directly + // so we don't need this code. + + // Is this a varargs function? + + if (compiler->info.compIsVarArgs && varNum != compiler->lvaVarargsHandleArg && + varNum < compiler->info.compArgsCount && !compiler->lvaTable[varNum].lvIsRegArg) + { + noway_assert(varLoc.vlType == Compiler::VLT_STK || varLoc.vlType == Compiler::VLT_STK2); + + // All stack arguments (except the varargs handle) have to be + // accessed via the varargs cookie. Discard generated info, + // and just find its position relative to the varargs handle + + PREFIX_ASSUME(compiler->lvaVarargsHandleArg < compiler->info.compArgsCount); + if (!compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame) + { + noway_assert(!compiler->opts.compDbgCode); + return; + } + + // Can't check compiler->lvaTable[varNum].lvOnFrame as we don't set it for + // arguments of vararg functions to avoid reporting them to GC. + noway_assert(!compiler->lvaTable[varNum].lvRegister); + unsigned cookieOffset = compiler->lvaTable[compiler->lvaVarargsHandleArg].lvStkOffs; + unsigned varOffset = compiler->lvaTable[varNum].lvStkOffs; + + noway_assert(cookieOffset < varOffset); + unsigned offset = varOffset - cookieOffset; + unsigned stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * sizeof(void*); + noway_assert(offset < stkArgSize); + offset = stkArgSize - offset; + + varLoc.vlType = Compiler::VLT_FIXED_VA; + varLoc.vlFixedVarArg.vlfvOffset = offset; + } + +#endif // _TARGET_X86_ + + VarName name = nullptr; + +#ifdef DEBUG + + for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++) + { + if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum) + { + name = compiler->info.compVarScopes[scopeNum].vsdName; + } + } + + // Hang on to this compiler->info. + + TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which]; + + tlvi.tlviVarNum = ilVarNum; + tlvi.tlviLVnum = LVnum; + tlvi.tlviName = name; + tlvi.tlviStartPC = startOffs; + tlvi.tlviLength = length; + tlvi.tlviAvailable = avail; + tlvi.tlviVarLoc = varLoc; + +#endif // DEBUG + + compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc); +} + /*****************************************************************************/ #ifdef LATE_DISASM #if defined(DEBUG) @@ -11747,19 +12102,16 @@ void CodeGen::genIPmappingGen() compiler->eeSetLIdone(); } -#endif // DEBUGGING_SUPPORT - /*============================================================================ * * These are empty stubs to help the late dis-assembler to compile - * if DEBUGGING_SUPPORT is not enabled, or the late disassembler is being - * built into a non-DEBUG build. + * if the late disassembler is being built into a non-DEBUG build. * *============================================================================ */ #if defined(LATE_DISASM) -#if !defined(DEBUGGING_SUPPORT) || !defined(DEBUG) +#if !defined(DEBUG) /* virtual */ const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg) @@ -11774,6 +12126,6 @@ const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsi } /*****************************************************************************/ -#endif // !defined(DEBUGGING_SUPPORT) || !defined(DEBUG) +#endif // !defined(DEBUG) #endif // defined(LATE_DISASM) /*****************************************************************************/ diff --git a/src/jit/codegeninterface.h b/src/jit/codegeninterface.h index e9abbe6b3c..3950673e3a 100644 --- a/src/jit/codegeninterface.h +++ b/src/jit/codegeninterface.h @@ -253,12 +253,14 @@ public: private: bool m_cgDoubleAlign; -#else // !DOUBLE_ALIGN +#else // !DOUBLE_ALIGN + public: bool doubleAlignOrFramePointerUsed() const { return isFramePointerUsed(); } + #endif // !DOUBLE_ALIGN #ifdef DEBUG @@ -424,10 +426,8 @@ public: private: bool m_cgFullPtrRegMap; -#ifdef DEBUGGING_SUPPORT public: virtual void siUpdate() = 0; -#endif // DEBUGGING_SUPPORT #ifdef LATE_DISASM public: diff --git a/src/jit/codegenlegacy.cpp b/src/jit/codegenlegacy.cpp index ea40eb2aff..667b9d4af8 100644 --- a/src/jit/codegenlegacy.cpp +++ b/src/jit/codegenlegacy.cpp @@ -243,18 +243,6 @@ GenTreePtr CodeGen::genGetAddrModeBase(GenTreePtr tree) return NULL; } -// inline -void CodeGen::genSinglePush() -{ - genStackLevel += sizeof(void*); -} - -// inline -void CodeGen::genSinglePop() -{ - genStackLevel -= sizeof(void*); -} - #if FEATURE_STACK_FP_X87 // inline void CodeGenInterface::genResetFPstkLevel(unsigned newValue /* = 0 */) @@ -497,9 +485,10 @@ void CodeGen::genIncRegBy(regNumber reg, ssize_t ival, GenTreePtr tree, var_type } } #endif - - insFlags flags = setFlags ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; - inst_RV_IV(INS_add, reg, ival, emitActualTypeSize(dstType), flags); + { + insFlags flags = setFlags ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; + inst_RV_IV(INS_add, reg, ival, emitActualTypeSize(dstType), flags); + } #ifdef _TARGET_XARCH_ UPDATE_LIVENESS: @@ -4328,8 +4317,6 @@ emitJumpKind CodeGen::genCondSetFlags(GenTreePtr cond) addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, smallOk); } - // #if defined(DEBUGGING_SUPPORT) - /* Special case: comparison of two constants */ // Needed if Importer doesn't call gtFoldExpr() @@ -4347,8 +4334,6 @@ emitJumpKind CodeGen::genCondSetFlags(GenTreePtr cond) addrReg1 = genRegMask(op1->gtRegNum); } - // #endif - /* Compare the operand against the constant */ if (op2->IsIconHandle()) @@ -7087,84 +7072,87 @@ void CodeGen::genCodeForTreeSmpBinArithLogOp(GenTreePtr tree, regMaskTP destReg, regTracker.rsTrackRegTrash(reg); - bool op2Released = false; + { + bool op2Released = false; - // For overflow instructions, tree->gtType is the accurate type, - // and gives us the size for the operands. + // For overflow instructions, tree->gtType is the accurate type, + // and gives us the size for the operands. - emitAttr opSize = emitTypeSize(treeType); + emitAttr opSize = emitTypeSize(treeType); - /* Compute the new value */ + /* Compute the new value */ - if (isArith && !op2->InReg() && (op2->OperKind() & GTK_CONST) + if (isArith && !op2->InReg() && (op2->OperKind() & GTK_CONST) #if !CPU_HAS_FP_SUPPORT - && (treeType == TYP_INT || treeType == TYP_I_IMPL) + && (treeType == TYP_INT || treeType == TYP_I_IMPL) #endif - ) - { - ssize_t ival = op2->gtIntCon.gtIconVal; - - if (oper == GT_ADD) - { - genIncRegBy(reg, ival, tree, treeType, ovfl); - } - else if (oper == GT_SUB) - { - if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) || - (ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN))) // -0x80000000 == 0x80000000. - // Therefore we can't use -ival. ) - { - /* For unsigned overflow, we have to use INS_sub to set - the flags correctly */ + { + ssize_t ival = op2->gtIntCon.gtIconVal; - genDecRegBy(reg, ival, tree); + if (oper == GT_ADD) + { + genIncRegBy(reg, ival, tree, treeType, ovfl); } - else + else if (oper == GT_SUB) { - /* Else, we simply add the negative of the value */ + if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) || + (ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN))) // -0x80000000 == 0x80000000. + // Therefore we can't use -ival. + ) + { + /* For unsigned overflow, we have to use INS_sub to set + the flags correctly */ - genIncRegBy(reg, -ival, tree, treeType, ovfl); + genDecRegBy(reg, ival, tree); + } + else + { + /* Else, we simply add the negative of the value */ + + genIncRegBy(reg, -ival, tree, treeType, ovfl); + } + } + else if (oper == GT_MUL) + { + genMulRegBy(reg, ival, tree, treeType, ovfl); } } - else if (oper == GT_MUL) - { - genMulRegBy(reg, ival, tree, treeType, ovfl); - } - } - else - { - // op2 could be a GT_COMMA (i.e. an assignment for a CSE def) - op2 = op2->gtEffectiveVal(); - if (varTypeIsByte(treeType) && op2->InReg()) + else { - noway_assert(genRegMask(reg) & RBM_BYTE_REGS); + // op2 could be a GT_COMMA (i.e. an assignment for a CSE def) + op2 = op2->gtEffectiveVal(); + if (varTypeIsByte(treeType) && op2->InReg()) + { + noway_assert(genRegMask(reg) & RBM_BYTE_REGS); - regNumber op2reg = op2->gtRegNum; - regMaskTP op2regMask = genRegMask(op2reg); + regNumber op2reg = op2->gtRegNum; + regMaskTP op2regMask = genRegMask(op2reg); - if (!(op2regMask & RBM_BYTE_REGS)) - { - regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS); + if (!(op2regMask & RBM_BYTE_REGS)) + { + regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS); - inst_RV_RV(INS_mov, byteReg, op2reg); - regTracker.rsTrackRegTrash(byteReg); + inst_RV_RV(INS_mov, byteReg, op2reg); + regTracker.rsTrackRegTrash(byteReg); - genDoneAddressable(op2, addrReg, RegSet::KEEP_REG); - op2Released = true; + genDoneAddressable(op2, addrReg, RegSet::KEEP_REG); + op2Released = true; - op2->gtRegNum = byteReg; + op2->gtRegNum = byteReg; + } } - } - inst_RV_TT(ins, reg, op2, 0, opSize, flags); - } + inst_RV_TT(ins, reg, op2, 0, opSize, flags); + } - /* Free up anything that was tied up by the operand */ - - if (!op2Released) - genDoneAddressable(op2, addrReg, RegSet::KEEP_REG); + /* Free up anything that was tied up by the operand */ + if (!op2Released) + { + genDoneAddressable(op2, addrReg, RegSet::KEEP_REG); + } + } /* The result will be where the first operand is sitting */ /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */ @@ -9721,7 +9709,7 @@ void CodeGen::genCodeForTreeSmpOp(GenTreePtr tree, regMaskTP destReg, regMaskTP switch (oper) { case GT_ASG: - if (tree->OperIsBlkOp()) + if (tree->OperIsBlkOp() && op1->gtOper != GT_LCL_VAR) { genCodeForBlkOp(tree, destReg); } @@ -10184,6 +10172,9 @@ void CodeGen::genCodeForTreeSmpOp(GenTreePtr tree, regMaskTP destReg, regMaskTP if (op1 == NULL) return; #endif + __fallthrough; + + case GT_INIT_VAL: /* Generate the operand into some register */ @@ -11293,10 +11284,8 @@ void CodeGen::genCodeForTreeSmpOpAsg(GenTreePtr tree) bool volat = false; // Is this a volatile store regMaskTP regGC; instruction ins; -#ifdef DEBUGGING_SUPPORT - unsigned lclVarNum = compiler->lvaCount; - unsigned lclILoffs = DUMMY_INIT(0); -#endif + unsigned lclVarNum = compiler->lvaCount; + unsigned lclILoffs = DUMMY_INIT(0); #ifdef _TARGET_ARM_ if (tree->gtType == TYP_STRUCT) @@ -11335,7 +11324,6 @@ void CodeGen::genCodeForTreeSmpOpAsg(GenTreePtr tree) noway_assert(varNum < compiler->lvaCount); varDsc = compiler->lvaTable + varNum; -#ifdef DEBUGGING_SUPPORT /* For non-debuggable code, every definition of a lcl-var has * to be checked to see if we need to open a new scope for it. * Remember the local var info to call siCheckVarScope @@ -11346,7 +11334,6 @@ void CodeGen::genCodeForTreeSmpOpAsg(GenTreePtr tree) lclVarNum = varNum; lclILoffs = op1->gtLclVar.gtLclILoffs; } -#endif /* Check against dead store ? (with min opts we may have dead stores) */ @@ -11999,13 +11986,11 @@ void CodeGen::genCodeForTreeSmpOpAsg(GenTreePtr tree) genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, REG_NA, ovfl); LExit: -#ifdef DEBUGGING_SUPPORT /* For non-debuggable code, every definition of a lcl-var has * to be checked to see if we need to open a new scope for it. */ if (lclVarNum < compiler->lvaCount) siCheckVarScope(lclVarNum, lclILoffs); -#endif } #ifdef _PREFAST_ #pragma warning(pop) @@ -12436,14 +12421,12 @@ void CodeGen::genCodeForBBlist() regSet.rsSpillBeg(); -#ifdef DEBUGGING_SUPPORT /* Initialize the line# tracking logic */ if (compiler->opts.compScopeInfo) { siInit(); } -#endif #ifdef _TARGET_X86_ if (compiler->compTailCallUsed) @@ -12774,27 +12757,7 @@ void CodeGen::genCodeForBBlist() genResetFPstkLevel(); #endif // FEATURE_STACK_FP_X87 -#if !FEATURE_FIXED_OUT_ARGS - /* Check for inserted throw blocks and adjust genStackLevel */ - - if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block)) - { - noway_assert(block->bbFlags & BBF_JMP_TARGET); - - genStackLevel = compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int); - - if (genStackLevel) - { -#ifdef _TARGET_X86_ - getEmitter()->emitMarkStackLvl(genStackLevel); - inst_RV_IV(INS_add, REG_SPBASE, genStackLevel, EA_PTRSIZE); - genStackLevel = 0; -#else // _TARGET_X86_ - NYI("Need emitMarkStackLvl()"); -#endif // _TARGET_X86_ - } - } -#endif // !FEATURE_FIXED_OUT_ARGS + genAdjustStackLevel(block); savedStkLvl = genStackLevel; @@ -12802,7 +12765,6 @@ void CodeGen::genCodeForBBlist() compiler->compCurBB = block; -#ifdef DEBUGGING_SUPPORT siBeginBlock(block); // BBF_INTERNAL blocks don't correspond to any single IL instruction. @@ -12810,7 +12772,6 @@ void CodeGen::genCodeForBBlist() genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true); bool firstMapping = true; -#endif // DEBUGGING_SUPPORT /*--------------------------------------------------------------------- * @@ -12830,8 +12791,6 @@ void CodeGen::genCodeForBBlist() { noway_assert(stmt->gtOper == GT_STMT); -#if defined(DEBUGGING_SUPPORT) - /* Do we have a new IL-offset ? */ if (stmt->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET) @@ -12841,8 +12800,6 @@ void CodeGen::genCodeForBBlist() firstMapping = false; } -#endif // DEBUGGING_SUPPORT - #ifdef DEBUG if (stmt->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET) { @@ -12945,7 +12902,7 @@ void CodeGen::genCodeForBBlist() // harmless "inc" instruction (does not interfere with the exception // object). - if ((compiler->opts.eeFlags & CORJIT_FLG_BBINSTR) && (stmt == block->bbTreeList) && + if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR) && (stmt == block->bbTreeList) && (block->bbCatchTyp && handlerGetsXcptnObj(block->bbCatchTyp))) { nonVarPtrRegs &= ~RBM_EXCEPTION_OBJECT; @@ -12972,14 +12929,10 @@ void CodeGen::genCodeForBBlist() noway_assert(stmt->gtOper == GT_STMT); -#ifdef DEBUGGING_SUPPORT genEnsureCodeEmitted(stmt->gtStmt.gtStmtILoffsx); -#endif } //-------- END-FOR each statement-tree of the current block --------- -#ifdef DEBUGGING_SUPPORT - if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0)) { siEndBlock(block); @@ -13005,8 +12958,6 @@ void CodeGen::genCodeForBBlist() } } -#endif // DEBUGGING_SUPPORT - genStackLevel -= savedStkLvl; gcInfo.gcMarkRegSetNpt(gcrefRegs | byrefRegs); @@ -13449,10 +13400,8 @@ void CodeGen::genCodeForTreeLng(GenTreePtr tree, regMaskTP needReg, regMaskTP av { case GT_ASG: { -#ifdef DEBUGGING_SUPPORT unsigned lclVarNum = compiler->lvaCount; unsigned lclVarILoffs = DUMMY_INIT(0); -#endif /* Is the target a local ? */ @@ -13467,7 +13416,6 @@ void CodeGen::genCodeForTreeLng(GenTreePtr tree, regMaskTP needReg, regMaskTP av // No dead stores, (with min opts we may have dead stores) noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH)); -#ifdef DEBUGGING_SUPPORT /* For non-debuggable code, every definition of a lcl-var has * to be checked to see if we need to open a new scope for it. * Remember the local var info to call siCheckVarScope @@ -13479,7 +13427,6 @@ void CodeGen::genCodeForTreeLng(GenTreePtr tree, regMaskTP needReg, regMaskTP av lclVarNum = varNum; lclVarILoffs = op1->gtLclVar.gtLclILoffs; } -#endif /* Has the variable been assigned to a register (pair) ? */ @@ -13767,13 +13714,11 @@ void CodeGen::genCodeForTreeLng(GenTreePtr tree, regMaskTP needReg, regMaskTP av genUpdateLife(op1); genUpdateLife(tree); -#ifdef DEBUGGING_SUPPORT /* For non-debuggable code, every definition of a lcl-var has * to be checked to see if we need to open a new scope for it. */ if (lclVarNum < compiler->lvaCount) siCheckVarScope(lclVarNum, lclVarILoffs); -#endif } return; @@ -15792,132 +15737,6 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize) /***************************************************************************** * - * Push the given registers. - * This function does not check if the register is marked as used, etc. - */ - -regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs) -{ - *byrefRegs = RBM_NONE; - *noRefRegs = RBM_NONE; - - // noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this - - if (regs == RBM_NONE) - return RBM_NONE; - -#if FEATURE_FIXED_OUT_ARGS - - NYI("Don't call genPushRegs with real regs!"); - return RBM_NONE; - -#else // FEATURE_FIXED_OUT_ARGS - - noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_I_IMPL)); - noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL)); - - regMaskTP pushedRegs = regs; - - for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg)) - { - regMaskTP regBit = regMaskTP(1) << reg; - - if ((regBit & regs) == RBM_NONE) - continue; - - var_types type; - if (regBit & gcInfo.gcRegGCrefSetCur) - { - type = TYP_REF; - } - else if (regBit & gcInfo.gcRegByrefSetCur) - { - *byrefRegs |= regBit; - type = TYP_BYREF; - } - else if (noRefRegs != NULL) - { - *noRefRegs |= regBit; - type = TYP_I_IMPL; - } - else - { - continue; - } - - inst_RV(INS_push, reg, type); - - genSinglePush(); - gcInfo.gcMarkRegSetNpt(regBit); - - regs &= ~regBit; - } - - return pushedRegs; - -#endif // FEATURE_FIXED_OUT_ARGS -} - -/***************************************************************************** - * - * Pop the registers pushed by genPushRegs() - */ - -void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs) -{ - if (regs == RBM_NONE) - return; - -#if FEATURE_FIXED_OUT_ARGS - - NYI("Don't call genPopRegs with real regs!"); - -#else // FEATURE_FIXED_OUT_ARGS - - noway_assert((regs & byrefRegs) == byrefRegs); - noway_assert((regs & noRefRegs) == noRefRegs); - // noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this - noway_assert((regs & (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur)) == RBM_NONE); - - noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_INT)); - noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_INT)); - - // Walk the registers in the reverse order as genPushRegs() - for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg)) - { - regMaskTP regBit = regMaskTP(1) << reg; - - if ((regBit & regs) == RBM_NONE) - continue; - - var_types type; - if (regBit & byrefRegs) - { - type = TYP_BYREF; - } - else if (regBit & noRefRegs) - { - type = TYP_INT; - } - else - { - type = TYP_REF; - } - - inst_RV(INS_pop, reg, type); - genSinglePop(); - - if (type != TYP_INT) - gcInfo.gcMarkRegPtrVal(reg, type); - - regs &= ~regBit; - } - -#endif // FEATURE_FIXED_OUT_ARGS -} - -/***************************************************************************** - * * Push the given argument list, right to left; returns the total amount of * stuff pushed. */ @@ -18519,12 +18338,10 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed) CORINFO_SIG_INFO* sigInfo = nullptr; -#ifdef DEBUGGING_SUPPORT if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != NULL) { (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset); } -#endif /* Make some sanity checks on the call node */ @@ -19600,6 +19417,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed) regNumber indCallReg; case IAT_VALUE: + { //------------------------------------------------------ // Non-virtual direct calls to known addressess // @@ -19607,7 +19425,24 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed) // it be nice if they all did! CLANG_FORMAT_COMMENT_ANCHOR; #ifdef _TARGET_ARM_ - if (!arm_Valid_Imm_For_BL((ssize_t)addr)) + // We may use direct call for some of recursive calls + // as we can safely estimate the distance from the call site to the top of the method + const int codeOffset = MAX_PROLOG_SIZE_BYTES + // prolog size + getEmitter()->emitCurCodeOffset + // offset of the current IG + getEmitter()->emitCurIGsize + // size of the current IG + 4; // size of the jump instruction + // that we are now emitting + if (compiler->gtIsRecursiveCall(call->AsCall()) && codeOffset <= -CALL_DIST_MAX_NEG) + { + getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr + args, retSize, gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset, + REG_NA, REG_NA, 0, 0, // ireg, xreg, xmul, disp + false, // isJump + emitter::emitNoGChelper(helperNum)); + } + else if (!arm_Valid_Imm_For_BL((ssize_t)addr)) { // Load the address into a register and call through a register indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the @@ -19634,7 +19469,8 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed) false, /* isJump */ emitter::emitNoGChelper(helperNum)); } - break; + } + break; case IAT_PVALUE: //------------------------------------------------------ @@ -20046,7 +19882,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed) #if defined(_TARGET_X86_) if (call->gtFlags & GTF_CALL_UNMANAGED) { - if ((compiler->opts.eeFlags & CORJIT_FLG_PINVOKE_RESTORE_ESP) || + if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PINVOKE_RESTORE_ESP) || compiler->compStressCompile(Compiler::STRESS_PINVOKE_RESTORE_ESP, 50)) { // P/Invoke signature mismatch resilience - restore ESP to pre-call value. We would ideally @@ -20756,9 +20592,11 @@ DONE: } #endif - /* Write the lvaShadowSPfirst stack frame slot */ - noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM); - getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0); + /* Write the lvaLocAllocSPvar stack frame slot */ + if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM) + { + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0); + } #if STACK_PROBES // Don't think it is worth it the codegen complexity to embed this @@ -20783,98 +20621,6 @@ DONE: return regCnt; } -/*****************************************************************************/ -#ifdef DEBUGGING_SUPPORT -/***************************************************************************** - * genSetScopeInfo - * - * Called for every scope info piece to record by the main genSetScopeInfo() - */ - -void CodeGen::genSetScopeInfo(unsigned which, - UNATIVE_OFFSET startOffs, - UNATIVE_OFFSET length, - unsigned varNum, - unsigned LVnum, - bool avail, - Compiler::siVarLoc& varLoc) -{ - /* We need to do some mapping while reporting back these variables */ - - unsigned ilVarNum = compiler->compMap2ILvarNum(varNum); - noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM); - -#ifdef _TARGET_X86_ - // Non-x86 platforms are allowed to access all arguments directly - // so we don't need this code. - - // Is this a varargs function? - - if (compiler->info.compIsVarArgs && varNum != compiler->lvaVarargsHandleArg && - varNum < compiler->info.compArgsCount && !compiler->lvaTable[varNum].lvIsRegArg) - { - noway_assert(varLoc.vlType == Compiler::VLT_STK || varLoc.vlType == Compiler::VLT_STK2); - - // All stack arguments (except the varargs handle) have to be - // accessed via the varargs cookie. Discard generated info, - // and just find its position relative to the varargs handle - - PREFIX_ASSUME(compiler->lvaVarargsHandleArg < compiler->info.compArgsCount); - if (!compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame) - { - noway_assert(!compiler->opts.compDbgCode); - return; - } - - // Can't check compiler->lvaTable[varNum].lvOnFrame as we don't set it for - // arguments of vararg functions to avoid reporting them to GC. - noway_assert(!compiler->lvaTable[varNum].lvRegister); - unsigned cookieOffset = compiler->lvaTable[compiler->lvaVarargsHandleArg].lvStkOffs; - unsigned varOffset = compiler->lvaTable[varNum].lvStkOffs; - - noway_assert(cookieOffset < varOffset); - unsigned offset = varOffset - cookieOffset; - unsigned stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * sizeof(void*); - noway_assert(offset < stkArgSize); - offset = stkArgSize - offset; - - varLoc.vlType = Compiler::VLT_FIXED_VA; - varLoc.vlFixedVarArg.vlfvOffset = offset; - } - -#endif // _TARGET_X86_ - - VarName name = NULL; - -#ifdef DEBUG - - for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++) - { - if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum) - { - name = compiler->info.compVarScopes[scopeNum].vsdName; - } - } - - // Hang on to this compiler->info. - - TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which]; - - tlvi.tlviVarNum = ilVarNum; - tlvi.tlviLVnum = LVnum; - tlvi.tlviName = name; - tlvi.tlviStartPC = startOffs; - tlvi.tlviLength = length; - tlvi.tlviAvailable = avail; - tlvi.tlviVarLoc = varLoc; - -#endif // DEBUG - - compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc); -} - -#endif // DEBUGGING_SUPPORT - /***************************************************************************** * * Return non-zero if the given register is free after the given tree is diff --git a/src/jit/codegenlinear.cpp b/src/jit/codegenlinear.cpp new file mode 100644 index 0000000000..9713288e08 --- /dev/null +++ b/src/jit/codegenlinear.cpp @@ -0,0 +1,1773 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Code Generation Support Methods for Linear Codegen XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator. +#include "emit.h" +#include "codegen.h" + +//------------------------------------------------------------------------ +// genCodeForBBlist: Generate code for all the blocks in a method +// +// Arguments: +// None +// +// Notes: +// This is the main method for linear codegen. It calls genCodeForTreeNode +// to generate the code for each node in each BasicBlock, and handles BasicBlock +// boundaries and branches. +// +void CodeGen::genCodeForBBlist() +{ + unsigned varNum; + LclVarDsc* varDsc; + + unsigned savedStkLvl; + +#ifdef DEBUG + genInterruptibleUsed = true; + + // You have to be careful if you create basic blocks from now on + compiler->fgSafeBasicBlockCreation = false; + + // This stress mode is not comptible with fully interruptible GC + if (genInterruptible && compiler->opts.compStackCheckOnCall) + { + compiler->opts.compStackCheckOnCall = false; + } + + // This stress mode is not comptible with fully interruptible GC + if (genInterruptible && compiler->opts.compStackCheckOnRet) + { + compiler->opts.compStackCheckOnRet = false; + } +#endif // DEBUG + + // Prepare the blocks for exception handling codegen: mark the blocks that needs labels. + genPrepForEHCodegen(); + + assert(!compiler->fgFirstBBScratch || + compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first. + + /* Initialize the spill tracking logic */ + + regSet.rsSpillBeg(); + + /* Initialize the line# tracking logic */ + + if (compiler->opts.compScopeInfo) + { + siInit(); + } + + // The current implementation of switch tables requires the first block to have a label so it + // can generate offsets to the switch label targets. + // TODO-CQ: remove this when switches have been re-implemented to not use this. + if (compiler->fgHasSwitch) + { + compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET; + } + + genPendingCallLabel = nullptr; + + /* Initialize the pointer tracking code */ + + gcInfo.gcRegPtrSetInit(); + gcInfo.gcVarPtrSetInit(); + + /* If any arguments live in registers, mark those regs as such */ + + for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++) + { + /* Is this variable a parameter assigned to a register? */ + + if (!varDsc->lvIsParam || !varDsc->lvRegister) + { + continue; + } + + /* Is the argument live on entry to the method? */ + + if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex)) + { + continue; + } + + /* Is this a floating-point argument? */ + + if (varDsc->IsFloatRegType()) + { + continue; + } + + noway_assert(!varTypeIsFloating(varDsc->TypeGet())); + + /* Mark the register as holding the variable */ + + regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum); + } + + unsigned finallyNesting = 0; + + // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without + // allocation at the start of each basic block. + VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler)); + + /*------------------------------------------------------------------------- + * + * Walk the basic blocks and generate code for each one + * + */ + + BasicBlock* block; + BasicBlock* lblk; /* previous block */ + + for (lblk = nullptr, block = compiler->fgFirstBB; block != nullptr; lblk = block, block = block->bbNext) + { +#ifdef DEBUG + if (compiler->verbose) + { + printf("\n=============== Generating "); + block->dspBlockHeader(compiler, true, true); + compiler->fgDispBBLiveness(block); + } +#endif // DEBUG + + // Figure out which registers hold variables on entry to this block + + regSet.ClearMaskVars(); + gcInfo.gcRegGCrefSetCur = RBM_NONE; + gcInfo.gcRegByrefSetCur = RBM_NONE; + + compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(block); + + genUpdateLife(block->bbLiveIn); + + // Even if liveness didn't change, we need to update the registers containing GC references. + // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't + // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change + // here. That would require handling the changes in recordVarLocationsAtStartOfBB(). + + regMaskTP newLiveRegSet = RBM_NONE; + regMaskTP newRegGCrefSet = RBM_NONE; + regMaskTP newRegByrefSet = RBM_NONE; +#ifdef DEBUG + VARSET_TP VARSET_INIT_NOCOPY(removedGCVars, VarSetOps::MakeEmpty(compiler)); + VARSET_TP VARSET_INIT_NOCOPY(addedGCVars, VarSetOps::MakeEmpty(compiler)); +#endif + VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex); + while (iter.NextElem(compiler, &varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + + if (varDsc->lvIsInReg()) + { + newLiveRegSet |= varDsc->lvRegMask(); + if (varDsc->lvType == TYP_REF) + { + newRegGCrefSet |= varDsc->lvRegMask(); + } + else if (varDsc->lvType == TYP_BYREF) + { + newRegByrefSet |= varDsc->lvRegMask(); + } +#ifdef DEBUG + if (verbose && VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex)) + { + VarSetOps::AddElemD(compiler, removedGCVars, varIndex); + } +#endif // DEBUG + VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex); + } + else if (compiler->lvaIsGCTracked(varDsc)) + { +#ifdef DEBUG + if (verbose && !VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex)) + { + VarSetOps::AddElemD(compiler, addedGCVars, varIndex); + } +#endif // DEBUG + VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex); + } + } + + regSet.rsMaskVars = newLiveRegSet; + +#ifdef DEBUG + if (compiler->verbose) + { + if (!VarSetOps::IsEmpty(compiler, addedGCVars)) + { + printf("\t\t\t\t\t\t\tAdded GCVars: "); + dumpConvertedVarSet(compiler, addedGCVars); + printf("\n"); + } + if (!VarSetOps::IsEmpty(compiler, removedGCVars)) + { + printf("\t\t\t\t\t\t\tRemoved GCVars: "); + dumpConvertedVarSet(compiler, removedGCVars); + printf("\n"); + } + } +#endif // DEBUG + + gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUGARG(true)); + gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUGARG(true)); + + /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to + represent the exception object (TYP_REF). + We mark REG_EXCEPTION_OBJECT as holding a GC object on entry + to the block, it will be the first thing evaluated + (thanks to GTF_ORDER_SIDEEFF). + */ + + if (handlerGetsXcptnObj(block->bbCatchTyp)) + { + for (GenTree* node : LIR::AsRange(block)) + { + if (node->OperGet() == GT_CATCH_ARG) + { + gcInfo.gcMarkRegSetGCref(RBM_EXCEPTION_OBJECT); + break; + } + } + } + + /* Start a new code output block */ + + genUpdateCurrentFunclet(block); + +#ifdef _TARGET_XARCH_ + if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD) + { + getEmitter()->emitLoopAlign(); + } +#endif + +#ifdef DEBUG + if (compiler->opts.dspCode) + { + printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum); + } +#endif + + block->bbEmitCookie = nullptr; + + if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL)) + { + /* Mark a label and update the current set of live GC refs */ + + block->bbEmitCookie = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, + gcInfo.gcRegByrefSetCur, FALSE); + } + + if (block == compiler->fgFirstColdBlock) + { +#ifdef DEBUG + if (compiler->verbose) + { + printf("\nThis is the start of the cold region of the method\n"); + } +#endif + // We should never have a block that falls through into the Cold section + noway_assert(!lblk->bbFallsThrough()); + + // We require the block that starts the Cold section to have a label + noway_assert(block->bbEmitCookie); + getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie); + } + + /* Both stacks are always empty on entry to a basic block */ + + genStackLevel = 0; + genAdjustStackLevel(block); + savedStkLvl = genStackLevel; + + /* Tell everyone which basic block we're working on */ + + compiler->compCurBB = block; + + siBeginBlock(block); + + // BBF_INTERNAL blocks don't correspond to any single IL instruction. + if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) && + !compiler->fgBBisScratch(block)) // If the block is the distinguished first scratch block, then no need to + // emit a NO_MAPPING entry, immediately after the prolog. + { + genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true); + } + + bool firstMapping = true; + +#if FEATURE_EH_FUNCLETS + if (block->bbFlags & BBF_FUNCLET_BEG) + { + genReserveFuncletProlog(block); + } +#endif // FEATURE_EH_FUNCLETS + + // Clear compCurStmt and compCurLifeTree. + compiler->compCurStmt = nullptr; + compiler->compCurLifeTree = nullptr; + + // Traverse the block in linear order, generating code for each node as we + // as we encounter it. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + // Set the use-order numbers for each node. + { + int useNum = 0; + for (GenTree* node : LIR::AsRange(block).NonPhiNodes()) + { + assert((node->gtDebugFlags & GTF_DEBUG_NODE_CG_CONSUMED) == 0); + + node->gtUseNum = -1; + if (node->isContained() || node->IsCopyOrReload()) + { + continue; + } + + for (GenTree* operand : node->Operands()) + { + genNumberOperandUse(operand, useNum); + } + } + } +#endif // DEBUG + + IL_OFFSETX currentILOffset = BAD_IL_OFFSET; + for (GenTree* node : LIR::AsRange(block).NonPhiNodes()) + { + // Do we have a new IL offset? + if (node->OperGet() == GT_IL_OFFSET) + { + genEnsureCodeEmitted(currentILOffset); + currentILOffset = node->gtStmt.gtStmtILoffsx; + genIPmappingAdd(currentILOffset, firstMapping); + firstMapping = false; + } + +#ifdef DEBUG + if (node->OperGet() == GT_IL_OFFSET) + { + noway_assert(node->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize || + node->gtStmt.gtStmtLastILoffs == BAD_IL_OFFSET); + + if (compiler->opts.dspCode && compiler->opts.dspInstrs && + node->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET) + { + while (genCurDispOffset <= node->gtStmt.gtStmtLastILoffs) + { + genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> "); + } + } + } +#endif // DEBUG + + genCodeForTreeNode(node); + if (node->gtHasReg() && node->gtLsraInfo.isLocalDefUse) + { + genConsumeReg(node); + } + } // end for each node in block + +#ifdef DEBUG + // The following set of register spill checks and GC pointer tracking checks used to be + // performed at statement boundaries. Now, with LIR, there are no statements, so they are + // performed at the end of each block. + // TODO: could these checks be performed more frequently? E.g., at each location where + // the register allocator says there are no live non-variable registers. Perhaps this could + // be done by (a) keeping a running count of live non-variable registers by using + // gtLsraInfo.srcCount and gtLsraInfo.dstCount to decrement and increment the count, respectively, + // and running the checks when the count is zero. Or, (b) use the map maintained by LSRA + // (operandToLocationInfoMap) to mark a node somehow when, after the execution of that node, + // there will be no live non-variable registers. + + regSet.rsSpillChk(); + + /* Make sure we didn't bungle pointer register tracking */ + + regMaskTP ptrRegs = gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur; + regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars; + + // If return is a GC-type, clear it. Note that if a common + // epilog is generated (genReturnBB) it has a void return + // even though we might return a ref. We can't use the compRetType + // as the determiner because something we are tracking as a byref + // might be used as a return value of a int function (which is legal) + GenTree* blockLastNode = block->lastNode(); + if ((blockLastNode != nullptr) && (blockLastNode->gtOper == GT_RETURN) && + (varTypeIsGC(compiler->info.compRetType) || + (blockLastNode->gtOp.gtOp1 != nullptr && varTypeIsGC(blockLastNode->gtOp.gtOp1->TypeGet())))) + { + nonVarPtrRegs &= ~RBM_INTRET; + } + + if (nonVarPtrRegs) + { + printf("Regset after BB%02u gcr=", block->bbNum); + printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars); + compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars); + printf(", byr="); + printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars); + compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars); + printf(", regVars="); + printRegMaskInt(regSet.rsMaskVars); + compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars); + printf("\n"); + } + + noway_assert(nonVarPtrRegs == RBM_NONE); +#endif // DEBUG + +#if defined(DEBUG) + if (block->bbNext == nullptr) + { +// Unit testing of the emitter: generate a bunch of instructions into the last block +// (it's as good as any, but better than the prolog, which can only be a single instruction +// group) then use COMPlus_JitLateDisasm=* to see if the late disassembler +// thinks the instructions are the same as we do. +#if defined(_TARGET_AMD64_) && defined(LATE_DISASM) + genAmd64EmitterUnitTests(); +#elif defined(_TARGET_ARM64_) + genArm64EmitterUnitTests(); +#endif // _TARGET_ARM64_ + } +#endif // defined(DEBUG) + + // It is possible to reach the end of the block without generating code for the current IL offset. + // For example, if the following IR ends the current block, no code will have been generated for + // offset 21: + // + // ( 0, 0) [000040] ------------ il_offset void IL offset: 21 + // + // N001 ( 0, 0) [000039] ------------ nop void + // + // This can lead to problems when debugging the generated code. To prevent these issues, make sure + // we've generated code for the last IL offset we saw in the block. + genEnsureCodeEmitted(currentILOffset); + + if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0)) + { + siEndBlock(block); + + /* Is this the last block, and are there any open scopes left ? */ + + bool isLastBlockProcessed = (block->bbNext == nullptr); + if (block->isBBCallAlwaysPair()) + { + isLastBlockProcessed = (block->bbNext->bbNext == nullptr); + } + + if (isLastBlockProcessed && siOpenScopeList.scNext) + { + /* This assert no longer holds, because we may insert a throw + block to demarcate the end of a try or finally region when they + are at the end of the method. It would be nice if we could fix + our code so that this throw block will no longer be necessary. */ + + // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize); + + siCloseAllOpenScopes(); + } + } + + genStackLevel -= savedStkLvl; + +#ifdef DEBUG + // compCurLife should be equal to the liveOut set, except that we don't keep + // it up to date for vars that are not register candidates + // (it would be nice to have a xor set function) + + VARSET_TP VARSET_INIT_NOCOPY(extraLiveVars, VarSetOps::Diff(compiler, block->bbLiveOut, compiler->compCurLife)); + VarSetOps::UnionD(compiler, extraLiveVars, VarSetOps::Diff(compiler, compiler->compCurLife, block->bbLiveOut)); + VARSET_ITER_INIT(compiler, extraLiveVarIter, extraLiveVars, extraLiveVarIndex); + while (extraLiveVarIter.NextElem(compiler, &extraLiveVarIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[extraLiveVarIndex]; + LclVarDsc* varDsc = compiler->lvaTable + varNum; + assert(!varDsc->lvIsRegCandidate()); + } +#endif + + /* Both stacks should always be empty on exit from a basic block */ + noway_assert(genStackLevel == 0); + +#ifdef _TARGET_AMD64_ + // On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several + // situations, to support proper exception handling semantics. This is mostly to ensure that when the stack + // walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region. + // The document "X64 and ARM ABIs.docx" has more details. The situations: + // 1. If the call instruction is in a different EH region as the instruction that follows it. + // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might + // be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters + // here.) + // We handle case #1 here, and case #2 in the emitter. + if (getEmitter()->emitIsLastInsCall()) + { + // Ok, the last instruction generated is a call instruction. Do any of the other conditions hold? + // Note: we may be generating a few too many NOPs for the case of call preceding an epilog. Technically, + // if the next block is a BBJ_RETURN, an epilog will be generated, but there may be some instructions + // generated before the OS epilog starts, such as a GS cookie check. + if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext)) + { + // We only need the NOP if we're not going to generate any more code as part of the block end. + + switch (block->bbJumpKind) + { + case BBJ_ALWAYS: + case BBJ_THROW: + case BBJ_CALLFINALLY: + case BBJ_EHCATCHRET: + // We're going to generate more code below anyway, so no need for the NOP. + + case BBJ_RETURN: + case BBJ_EHFINALLYRET: + case BBJ_EHFILTERRET: + // These are the "epilog follows" case, handled in the emitter. + + break; + + case BBJ_NONE: + if (block->bbNext == nullptr) + { + // Call immediately before the end of the code; we should never get here . + instGen(INS_BREAKPOINT); // This should never get executed + } + else + { + // We need the NOP + instGen(INS_nop); + } + break; + + case BBJ_COND: + case BBJ_SWITCH: + // These can't have a call as the last instruction! + + default: + noway_assert(!"Unexpected bbJumpKind"); + break; + } + } + } +#endif // _TARGET_AMD64_ + + /* Do we need to generate a jump or return? */ + + switch (block->bbJumpKind) + { + case BBJ_ALWAYS: + inst_JMP(EJ_jmp, block->bbJumpDest); + break; + + case BBJ_RETURN: + genExitCode(block); + break; + + case BBJ_THROW: + // If we have a throw at the end of a function or funclet, we need to emit another instruction + // afterwards to help the OS unwinder determine the correct context during unwind. + // We insert an unexecuted breakpoint instruction in several situations + // following a throw instruction: + // 1. If the throw is the last instruction of the function or funclet. This helps + // the OS unwinder determine the correct context during an unwind from the + // thrown exception. + // 2. If this is this is the last block of the hot section. + // 3. If the subsequent block is a special throw block. + // 4. On AMD64, if the next block is in a different EH region. + if ((block->bbNext == nullptr) || (block->bbNext->bbFlags & BBF_FUNCLET_BEG) || + !BasicBlock::sameEHRegion(block, block->bbNext) || + (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) || + block->bbNext == compiler->fgFirstColdBlock) + { + instGen(INS_BREAKPOINT); // This should never get executed + } + + break; + + case BBJ_CALLFINALLY: + block = genCallFinally(block, lblk); + break; + +#if FEATURE_EH_FUNCLETS + + case BBJ_EHCATCHRET: + genEHCatchRet(block); + __fallthrough; + + case BBJ_EHFINALLYRET: + case BBJ_EHFILTERRET: + genReserveFuncletEpilog(block); + break; + +#else // !FEATURE_EH_FUNCLETS + + case BBJ_EHCATCHRET: + noway_assert(!"Unexpected BBJ_EHCATCHRET"); // not used on x86 + + case BBJ_EHFINALLYRET: + case BBJ_EHFILTERRET: + genEHFinallyOrFilterRet(block); + break; + +#endif // !FEATURE_EH_FUNCLETS + + case BBJ_NONE: + case BBJ_COND: + case BBJ_SWITCH: + break; + + default: + noway_assert(!"Unexpected bbJumpKind"); + break; + } + +#ifdef DEBUG + compiler->compCurBB = nullptr; +#endif + + } //------------------ END-FOR each block of the method ------------------- + + /* Nothing is live at this point */ + genUpdateLife(VarSetOps::MakeEmpty(compiler)); + + /* Finalize the spill tracking logic */ + + regSet.rsSpillEnd(); + + /* Finalize the temp tracking logic */ + + compiler->tmpEnd(); + +#ifdef DEBUG + if (compiler->verbose) + { + printf("\n# "); + printf("compCycleEstimate = %6d, compSizeEstimate = %5d ", compiler->compCycleEstimate, + compiler->compSizeEstimate); + printf("%s\n", compiler->info.compFullName); + } +#endif +} + +/* +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XX XX +XX Register Management XX +XX XX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +*/ +// + +//------------------------------------------------------------------------ +// genGetAssignedReg: Get the register assigned to the given node +// +// Arguments: +// tree - the lclVar node whose assigned register we want +// +// Return Value: +// The assigned regNumber +// +regNumber CodeGenInterface::genGetAssignedReg(GenTreePtr tree) +{ + return tree->gtRegNum; +} + +//------------------------------------------------------------------------ +// genSpillVar: Spill a local variable +// +// Arguments: +// tree - the lclVar node for the variable being spilled +// +// Return Value: +// None. +// +// Assumptions: +// The lclVar must be a register candidate (lvRegCandidate) + +void CodeGen::genSpillVar(GenTreePtr tree) +{ + unsigned varNum = tree->gtLclVarCommon.gtLclNum; + LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); + + assert(varDsc->lvIsRegCandidate()); + + // We don't actually need to spill if it is already living in memory + bool needsSpill = ((tree->gtFlags & GTF_VAR_DEF) == 0 && varDsc->lvIsInReg()); + if (needsSpill) + { + // In order for a lclVar to have been allocated to a register, it must not have been aliasable, and can + // therefore be store-normalized (rather than load-normalized). In fact, not performing store normalization + // can lead to problems on architectures where a lclVar may be allocated to a register that is not + // addressable at the granularity of the lclVar's defined type (e.g. x86). + var_types lclTyp = genActualType(varDsc->TypeGet()); + emitAttr size = emitTypeSize(lclTyp); + + bool restoreRegVar = false; + if (tree->gtOper == GT_REG_VAR) + { + tree->SetOper(GT_LCL_VAR); + restoreRegVar = true; + } + + // mask off the flag to generate the right spill code, then bring it back + tree->gtFlags &= ~GTF_REG_VAL; + + instruction storeIns = ins_Store(tree->TypeGet(), compiler->isSIMDTypeLocalAligned(varNum)); +#if CPU_LONG_USES_REGPAIR + if (varTypeIsMultiReg(tree)) + { + assert(varDsc->lvRegNum == genRegPairLo(tree->gtRegPair)); + assert(varDsc->lvOtherReg == genRegPairHi(tree->gtRegPair)); + regNumber regLo = genRegPairLo(tree->gtRegPair); + regNumber regHi = genRegPairHi(tree->gtRegPair); + inst_TT_RV(storeIns, tree, regLo); + inst_TT_RV(storeIns, tree, regHi, 4); + } + else +#endif + { + assert(varDsc->lvRegNum == tree->gtRegNum); + inst_TT_RV(storeIns, tree, tree->gtRegNum, 0, size); + } + tree->gtFlags |= GTF_REG_VAL; + + if (restoreRegVar) + { + tree->SetOper(GT_REG_VAR); + } + + genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree)); + gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask()); + + if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex)) + { +#ifdef DEBUG + if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) + { + JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum); + } + else + { + JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum); + } +#endif + VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); + } + } + + tree->gtFlags &= ~GTF_SPILL; + varDsc->lvRegNum = REG_STK; + if (varTypeIsMultiReg(tree)) + { + varDsc->lvOtherReg = REG_STK; + } +} + +//------------------------------------------------------------------------ +// genUpdateVarReg: Update the current register location for a lclVar +// +// Arguments: +// varDsc - the LclVarDsc for the lclVar +// tree - the lclVar node +// +// inline +void CodeGenInterface::genUpdateVarReg(LclVarDsc* varDsc, GenTreePtr tree) +{ + assert(tree->OperIsScalarLocal() || (tree->gtOper == GT_COPY)); + varDsc->lvRegNum = tree->gtRegNum; +} + +//------------------------------------------------------------------------ +// sameRegAsDst: Return the child that has the same reg as the dst (if any) +// +// Arguments: +// tree - the node of interest +// other - an out parameter to return the other child +// +// Notes: +// If 'tree' has a child with the same assigned register as its target reg, +// that child will be returned, and 'other' will contain the non-matching child. +// Otherwise, both other and the return value will be nullptr. +// +GenTree* sameRegAsDst(GenTree* tree, GenTree*& other /*out*/) +{ + if (tree->gtRegNum == REG_NA) + { + other = nullptr; + return nullptr; + } + + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtOp.gtOp2; + if (op1->gtRegNum == tree->gtRegNum) + { + other = op2; + return op1; + } + if (op2->gtRegNum == tree->gtRegNum) + { + other = op1; + return op2; + } + else + { + other = nullptr; + return nullptr; + } +} + +//------------------------------------------------------------------------ +// genUnspillRegIfNeeded: Reload the value into a register, if needed +// +// Arguments: +// tree - the node of interest. +// +// Notes: +// In the normal case, the value will be reloaded into the register it +// was originally computed into. However, if that register is not available, +// the register allocator will have allocated a different register, and +// inserted a GT_RELOAD to indicate the register into which it should be +// reloaded. +// +void CodeGen::genUnspillRegIfNeeded(GenTree* tree) +{ + regNumber dstReg = tree->gtRegNum; + GenTree* unspillTree = tree; + + if (tree->gtOper == GT_RELOAD) + { + unspillTree = tree->gtOp.gtOp1; + } + + if ((unspillTree->gtFlags & GTF_SPILLED) != 0) + { + if (genIsRegCandidateLocal(unspillTree)) + { + // Reset spilled flag, since we are going to load a local variable from its home location. + unspillTree->gtFlags &= ~GTF_SPILLED; + + GenTreeLclVarCommon* lcl = unspillTree->AsLclVarCommon(); + LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum]; + +// TODO-Cleanup: The following code could probably be further merged and cleaned up. +#ifdef _TARGET_XARCH_ + // Load local variable from its home location. + // In most cases the tree type will indicate the correct type to use for the load. + // However, if it is NOT a normalizeOnLoad lclVar (i.e. NOT a small int that always gets + // widened when loaded into a register), and its size is not the same as genActualType of + // the type of the lclVar, then we need to change the type of the tree node when loading. + // This situation happens due to "optimizations" that avoid a cast and + // simply retype the node when using long type lclVar as an int. + // While loading the int in that case would work for this use of the lclVar, if it is + // later used as a long, we will have incorrectly truncated the long. + // In the normalizeOnLoad case ins_Load will return an appropriate sign- or zero- + // extending load. + + var_types treeType = unspillTree->TypeGet(); + if (treeType != genActualType(varDsc->lvType) && !varTypeIsGC(treeType) && !varDsc->lvNormalizeOnLoad()) + { + assert(!varTypeIsGC(varDsc)); + var_types spillType = genActualType(varDsc->lvType); + unspillTree->gtType = spillType; + inst_RV_TT(ins_Load(spillType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree); + unspillTree->gtType = treeType; + } + else + { + inst_RV_TT(ins_Load(treeType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree); + } +#elif defined(_TARGET_ARM64_) + var_types targetType = unspillTree->gtType; + instruction ins = ins_Load(targetType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)); + emitAttr attr = emitTypeSize(targetType); + emitter* emit = getEmitter(); + + // Fixes Issue #3326 + attr = emit->emitInsAdjustLoadStoreAttr(ins, attr); + + // Load local variable from its home location. + inst_RV_TT(ins, dstReg, unspillTree, 0, attr); +#else + NYI("Unspilling not implemented for this target architecture."); +#endif + unspillTree->SetInReg(); + + // TODO-Review: We would like to call: + // genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(tree)); + // instead of the following code, but this ends up hitting this assert: + // assert((regSet.rsMaskVars & regMask) == 0); + // due to issues with LSRA resolution moves. + // So, just force it for now. This probably indicates a condition that creates a GC hole! + // + // Extra note: I think we really want to call something like gcInfo.gcUpdateForRegVarMove, + // because the variable is not really going live or dead, but that method is somewhat poorly + // factored because it, in turn, updates rsMaskVars which is part of RegSet not GCInfo. + // TODO-Cleanup: This code exists in other CodeGen*.cpp files, and should be moved to CodeGenCommon.cpp. + + // Don't update the variable's location if we are just re-spilling it again. + + if ((unspillTree->gtFlags & GTF_SPILL) == 0) + { + genUpdateVarReg(varDsc, tree); +#ifdef DEBUG + if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) + { + JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->gtLclNum); + } +#endif // DEBUG + VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); + +#ifdef DEBUG + if (compiler->verbose) + { + printf("\t\t\t\t\t\t\tV%02u in reg ", lcl->gtLclNum); + varDsc->PrintVarReg(); + printf(" is becoming live "); + compiler->printTreeID(unspillTree); + printf("\n"); + } +#endif // DEBUG + + regSet.AddMaskVars(genGetRegMask(varDsc)); + } + + gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet()); + } + else if (unspillTree->IsMultiRegCall()) + { + GenTreeCall* call = unspillTree->AsCall(); + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + unsigned regCount = retTypeDesc->GetReturnRegCount(); + GenTreeCopyOrReload* reloadTree = nullptr; + if (tree->OperGet() == GT_RELOAD) + { + reloadTree = tree->AsCopyOrReload(); + } + + // In case of multi-reg call node, GTF_SPILLED flag on it indicates that + // one or more of its result regs are spilled. Call node needs to be + // queried to know which specific result regs to be unspilled. + for (unsigned i = 0; i < regCount; ++i) + { + unsigned flags = call->GetRegSpillFlagByIdx(i); + if ((flags & GTF_SPILLED) != 0) + { + var_types dstType = retTypeDesc->GetReturnRegType(i); + regNumber unspillTreeReg = call->GetRegNumByIdx(i); + + if (reloadTree != nullptr) + { + dstReg = reloadTree->GetRegNumByIdx(i); + if (dstReg == REG_NA) + { + dstReg = unspillTreeReg; + } + } + else + { + dstReg = unspillTreeReg; + } + + TempDsc* t = regSet.rsUnspillInPlace(call, unspillTreeReg, i); + getEmitter()->emitIns_R_S(ins_Load(dstType), emitActualTypeSize(dstType), dstReg, t->tdTempNum(), + 0); + compiler->tmpRlsTemp(t); + gcInfo.gcMarkRegPtrVal(dstReg, dstType); + } + } + + unspillTree->gtFlags &= ~GTF_SPILLED; + unspillTree->SetInReg(); + } + else + { + TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum); + getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitActualTypeSize(unspillTree->TypeGet()), dstReg, + t->tdTempNum(), 0); + compiler->tmpRlsTemp(t); + + unspillTree->gtFlags &= ~GTF_SPILLED; + unspillTree->SetInReg(); + gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet()); + } + } +} + +//------------------------------------------------------------------------ +// genCopyRegIfNeeded: Copy the given node into the specified register +// +// Arguments: +// node - The node that has been evaluated (consumed). +// needReg - The register in which its value is needed. +// +// Notes: +// This must be a node that has a register. +// +void CodeGen::genCopyRegIfNeeded(GenTree* node, regNumber needReg) +{ + assert((node->gtRegNum != REG_NA) && (needReg != REG_NA)); + if (node->gtRegNum != needReg) + { + inst_RV_RV(INS_mov, needReg, node->gtRegNum, node->TypeGet()); + } +} + +// Do Liveness update for a subnodes that is being consumed by codegen +// including the logic for reload in case is needed and also takes care +// of locating the value on the desired register. +void CodeGen::genConsumeRegAndCopy(GenTree* node, regNumber needReg) +{ + if (needReg == REG_NA) + { + return; + } + regNumber treeReg = genConsumeReg(node); + genCopyRegIfNeeded(node, needReg); +} + +// Check that registers are consumed in the right order for the current node being generated. +#ifdef DEBUG +void CodeGen::genNumberOperandUse(GenTree* const operand, int& useNum) const +{ + assert(operand != nullptr); + assert(operand->gtUseNum == -1); + + // Ignore argument placeholders. + if (operand->OperGet() == GT_ARGPLACE) + { + return; + } + + if (!operand->isContained() && !operand->IsCopyOrReload()) + { + operand->gtUseNum = useNum; + useNum++; + } + else + { + for (GenTree* operand : operand->Operands()) + { + genNumberOperandUse(operand, useNum); + } + } +} + +void CodeGen::genCheckConsumeNode(GenTree* const node) +{ + assert(node != nullptr); + + if (verbose) + { + if ((node->gtDebugFlags & GTF_DEBUG_NODE_CG_CONSUMED) != 0) + { + printf("Node was consumed twice:\n"); + compiler->gtDispTree(node, nullptr, nullptr, true); + } + else if ((lastConsumedNode != nullptr) && (node->gtUseNum < lastConsumedNode->gtUseNum)) + { + printf("Nodes were consumed out-of-order:\n"); + compiler->gtDispTree(lastConsumedNode, nullptr, nullptr, true); + compiler->gtDispTree(node, nullptr, nullptr, true); + } + } + + assert((node->OperGet() == GT_CATCH_ARG) || ((node->gtDebugFlags & GTF_DEBUG_NODE_CG_CONSUMED) == 0)); + assert((lastConsumedNode == nullptr) || (node->gtUseNum == -1) || (node->gtUseNum > lastConsumedNode->gtUseNum)); + + node->gtDebugFlags |= GTF_DEBUG_NODE_CG_CONSUMED; + lastConsumedNode = node; +} +#endif // DEBUG + +//-------------------------------------------------------------------- +// genConsumeReg: Do liveness update for a subnode that is being +// consumed by codegen. +// +// Arguments: +// tree - GenTree node +// +// Return Value: +// Returns the reg number of tree. +// In case of multi-reg call node returns the first reg number +// of the multi-reg return. +regNumber CodeGen::genConsumeReg(GenTree* tree) +{ + if (tree->OperGet() == GT_COPY) + { + genRegCopy(tree); + } + + // Handle the case where we have a lclVar that needs to be copied before use (i.e. because it + // interferes with one of the other sources (or the target, if it's a "delayed use" register)). + // TODO-Cleanup: This is a special copyReg case in LSRA - consider eliminating these and + // always using GT_COPY to make the lclVar location explicit. + // Note that we have to do this before calling genUpdateLife because otherwise if we spill it + // the lvRegNum will be set to REG_STK and we will lose track of what register currently holds + // the lclVar (normally when a lclVar is spilled it is then used from its former register + // location, which matches the gtRegNum on the node). + // (Note that it doesn't matter if we call this before or after genUnspillRegIfNeeded + // because if it's on the stack it will always get reloaded into tree->gtRegNum). + if (genIsRegCandidateLocal(tree)) + { + GenTreeLclVarCommon* lcl = tree->AsLclVarCommon(); + LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()]; + if (varDsc->lvRegNum != REG_STK && varDsc->lvRegNum != tree->gtRegNum) + { + inst_RV_RV(ins_Copy(tree->TypeGet()), tree->gtRegNum, varDsc->lvRegNum); + } + } + + genUnspillRegIfNeeded(tree); + + // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar + genUpdateLife(tree); + + assert(tree->gtHasReg()); + + // there are three cases where consuming a reg means clearing the bit in the live mask + // 1. it was not produced by a local + // 2. it was produced by a local that is going dead + // 3. it was produced by a local that does not live in that reg (like one allocated on the stack) + + if (genIsRegCandidateLocal(tree)) + { + GenTreeLclVarCommon* lcl = tree->AsLclVarCommon(); + LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()]; + assert(varDsc->lvLRACandidate); + + if ((tree->gtFlags & GTF_VAR_DEATH) != 0) + { + gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->lvRegNum)); + } + else if (varDsc->lvRegNum == REG_STK) + { + // We have loaded this into a register only temporarily + gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum)); + } + } + else + { + gcInfo.gcMarkRegSetNpt(tree->gtGetRegMask()); + } + + genCheckConsumeNode(tree); + return tree->gtRegNum; +} + +// Do liveness update for an address tree: one of GT_LEA, GT_LCL_VAR, or GT_CNS_INT (for call indirect). +void CodeGen::genConsumeAddress(GenTree* addr) +{ + if (!addr->isContained()) + { + genConsumeReg(addr); + } + else if (addr->OperGet() == GT_LEA) + { + genConsumeAddrMode(addr->AsAddrMode()); + } +} + +// do liveness update for a subnode that is being consumed by codegen +void CodeGen::genConsumeAddrMode(GenTreeAddrMode* addr) +{ + genConsumeOperands(addr); +} + +void CodeGen::genConsumeRegs(GenTree* tree) +{ +#if !defined(_TARGET_64BIT_) + if (tree->OperGet() == GT_LONG) + { + genConsumeRegs(tree->gtGetOp1()); + genConsumeRegs(tree->gtGetOp2()); + return; + } +#endif // !defined(_TARGET_64BIT_) + + if (tree->isContained()) + { + if (tree->isContainedSpillTemp()) + { + // spill temps are un-tracked and hence no need to update life + } + else if (tree->isIndir()) + { + genConsumeAddress(tree->AsIndir()->Addr()); + } + else if (tree->OperGet() == GT_AND) + { + // This is the special contained GT_AND that we created in Lowering::TreeNodeInfoInitCmp() + // Now we need to consume the operands of the GT_AND node. + genConsumeOperands(tree->AsOp()); + } +#ifdef _TARGET_XARCH_ + else if (tree->OperGet() == GT_LCL_VAR) + { + // A contained lcl var must be living on stack and marked as reg optional, or not be a + // register candidate. + unsigned varNum = tree->AsLclVarCommon()->GetLclNum(); + LclVarDsc* varDsc = compiler->lvaTable + varNum; + + noway_assert(varDsc->lvRegNum == REG_STK); + noway_assert(tree->IsRegOptional() || !varDsc->lvLRACandidate); + + // Update the life of the lcl var. + genUpdateLife(tree); + } +#endif // _TARGET_XARCH_ + else if (tree->OperIsInitVal()) + { + genConsumeReg(tree->gtGetOp1()); + } + else + { +#ifdef FEATURE_SIMD + // (In)Equality operation that produces bool result, when compared + // against Vector zero, marks its Vector Zero operand as contained. + assert(tree->OperIsLeaf() || tree->IsIntegralConstVector(0)); +#else + assert(tree->OperIsLeaf()); +#endif + } + } + else + { + genConsumeReg(tree); + } +} + +//------------------------------------------------------------------------ +// genConsumeOperands: Do liveness update for the operands of a unary or binary tree +// +// Arguments: +// tree - the GenTreeOp whose operands will have their liveness updated. +// +// Return Value: +// None. +// +// Notes: +// Note that this logic is localized here because we must do the liveness update in +// the correct execution order. This is important because we may have two operands +// that involve the same lclVar, and if one is marked "lastUse" we must handle it +// after the first. + +void CodeGen::genConsumeOperands(GenTreeOp* tree) +{ + GenTree* firstOp = tree->gtOp1; + GenTree* secondOp = tree->gtOp2; + if ((tree->gtFlags & GTF_REVERSE_OPS) != 0) + { + assert(secondOp != nullptr); + firstOp = secondOp; + secondOp = tree->gtOp1; + } + if (firstOp != nullptr) + { + genConsumeRegs(firstOp); + } + if (secondOp != nullptr) + { + genConsumeRegs(secondOp); + } +} + +#if FEATURE_PUT_STRUCT_ARG_STK +//------------------------------------------------------------------------ +// genConsumePutStructArgStk: Do liveness update for the operands of a PutArgStk node. +// Also loads in the right register the addresses of the +// src/dst for rep mov operation. +// +// Arguments: +// putArgNode - the PUTARG_STK tree. +// dstReg - the dstReg for the rep move operation. +// srcReg - the srcReg for the rep move operation. +// sizeReg - the sizeReg for the rep move operation. +// +// Return Value: +// None. +// +// Notes: +// sizeReg can be REG_NA when this function is used to consume the dstReg and srcReg +// for copying on the stack a struct with references. +// The source address/offset is determined from the address on the GT_OBJ node, while +// the destination address is the address contained in 'm_stkArgVarNum' plus the offset +// provided in the 'putArgNode'. +// m_stkArgVarNum must be set to the varnum for the local used for placing the "by-value" args on the stack. + +void CodeGen::genConsumePutStructArgStk(GenTreePutArgStk* putArgNode, + regNumber dstReg, + regNumber srcReg, + regNumber sizeReg) +{ + assert(varTypeIsStruct(putArgNode)); + + // The putArgNode children are always contained. We should not consume any registers. + assert(putArgNode->gtGetOp1()->isContained()); + + GenTree* dstAddr = putArgNode; + + // Get the source address. + GenTree* src = putArgNode->gtGetOp1(); + assert((src->gtOper == GT_OBJ) || ((src->gtOper == GT_IND && varTypeIsSIMD(src)))); + GenTree* srcAddr = src->gtGetOp1(); + + size_t size = putArgNode->getArgSize(); + + assert(dstReg != REG_NA); + assert(srcReg != REG_NA); + + // Consume the registers only if they are not contained or set to REG_NA. + if (srcAddr->gtRegNum != REG_NA) + { + genConsumeReg(srcAddr); + } + + // If the op1 is already in the dstReg - nothing to do. + // Otherwise load the op1 (GT_ADDR) into the dstReg to copy the struct on the stack by value. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef _TARGET_X86_ + assert(dstReg != REG_SPBASE); + inst_RV_RV(INS_mov, dstReg, REG_SPBASE); +#else // !_TARGET_X86_ + if (dstAddr->gtRegNum != dstReg) + { + // Generate LEA instruction to load the stack of the outgoing var + SlotNum offset (or the incoming arg area + // for tail calls) in RDI. + // Destination is always local (on the stack) - use EA_PTRSIZE. + assert(m_stkArgVarNum != BAD_VAR_NUM); + getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, dstReg, m_stkArgVarNum, putArgNode->getArgOffset()); + } +#endif // !_TARGET_X86_ + + if (srcAddr->gtRegNum != srcReg) + { + if (srcAddr->OperIsLocalAddr()) + { + // The OperLocalAddr is always contained. + assert(srcAddr->isContained()); + GenTreeLclVarCommon* lclNode = srcAddr->AsLclVarCommon(); + + // Generate LEA instruction to load the LclVar address in RSI. + // Source is known to be on the stack. Use EA_PTRSIZE. + unsigned int offset = 0; + if (srcAddr->OperGet() == GT_LCL_FLD_ADDR) + { + offset = srcAddr->AsLclFld()->gtLclOffs; + } + getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, srcReg, lclNode->gtLclNum, offset); + } + else + { + assert(srcAddr->gtRegNum != REG_NA); + // Source is not known to be on the stack. Use EA_BYREF. + getEmitter()->emitIns_R_R(INS_mov, EA_BYREF, srcReg, srcAddr->gtRegNum); + } + } + + if (sizeReg != REG_NA) + { + inst_RV_IV(INS_mov, sizeReg, size, EA_PTRSIZE); + } +} +#endif // FEATURE_PUT_STRUCT_ARG_STK + +//------------------------------------------------------------------------ +// genSetBlockSize: Ensure that the block size is in the given register +// +// Arguments: +// blkNode - The block node +// sizeReg - The register into which the block's size should go +// + +void CodeGen::genSetBlockSize(GenTreeBlk* blkNode, regNumber sizeReg) +{ + if (sizeReg != REG_NA) + { + unsigned blockSize = blkNode->Size(); + if (blockSize != 0) + { + assert((blkNode->gtRsvdRegs & genRegMask(sizeReg)) != 0); + genSetRegToIcon(sizeReg, blockSize); + } + else + { + noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK); + GenTree* sizeNode = blkNode->AsDynBlk()->gtDynamicSize; + if (sizeNode->gtRegNum != sizeReg) + { + inst_RV_RV(INS_mov, sizeReg, sizeNode->gtRegNum, sizeNode->TypeGet()); + } + } + } +} + +//------------------------------------------------------------------------ +// genConsumeBlockSrc: Consume the source address register of a block node, if any. +// +// Arguments: +// blkNode - The block node + +void CodeGen::genConsumeBlockSrc(GenTreeBlk* blkNode) +{ + GenTree* src = blkNode->Data(); + if (blkNode->OperIsCopyBlkOp()) + { + // For a CopyBlk we need the address of the source. + if (src->OperGet() == GT_IND) + { + src = src->gtOp.gtOp1; + } + else + { + // This must be a local. + // For this case, there is no source address register, as it is a + // stack-based address. + assert(src->OperIsLocal()); + return; + } + } + else + { + if (src->OperIsInitVal()) + { + src = src->gtGetOp1(); + } + } + genConsumeReg(src); +} + +//------------------------------------------------------------------------ +// genSetBlockSrc: Ensure that the block source is in its allocated register. +// +// Arguments: +// blkNode - The block node +// srcReg - The register in which to set the source (address or init val). +// +void CodeGen::genSetBlockSrc(GenTreeBlk* blkNode, regNumber srcReg) +{ + GenTree* src = blkNode->Data(); + if (blkNode->OperIsCopyBlkOp()) + { + // For a CopyBlk we need the address of the source. + if (src->OperGet() == GT_IND) + { + src = src->gtOp.gtOp1; + } + else + { + // This must be a local struct. + // Load its address into srcReg. + inst_RV_TT(INS_lea, srcReg, src, 0, EA_BYREF); + return; + } + } + else + { + if (src->OperIsInitVal()) + { + src = src->gtGetOp1(); + } + } + genCopyRegIfNeeded(src, srcReg); +} + +//------------------------------------------------------------------------ +// genConsumeBlockOp: Ensure that the block's operands are enregistered +// as needed. +// Arguments: +// blkNode - The block node +// +// Notes: +// This ensures that the operands are consumed in the proper order to +// obey liveness modeling. + +void CodeGen::genConsumeBlockOp(GenTreeBlk* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg) +{ + // We have to consume the registers, and perform any copies, in the actual execution order. + // The nominal order is: dst, src, size. However this may have been changed + // with reverse flags on the blkNode and the setting of gtEvalSizeFirst in the case of a dynamic + // block size. + // Note that the register allocator ensures that the registers ON THE NODES will not interfere + // with one another if consumed (i.e. reloaded or moved to their ASSIGNED reg) in execution order. + // Further, it ensures that they will not interfere with one another if they are then copied + // to the REQUIRED register (if a fixed register requirement) in execution order. This requires, + // then, that we first consume all the operands, then do any necessary moves. + + GenTree* dstAddr = blkNode->Addr(); + GenTree* src = nullptr; + unsigned blockSize = blkNode->Size(); + GenTree* size = nullptr; + bool evalSizeFirst = true; + + // First, consume all the sources in order + if (blkNode->OperGet() == GT_STORE_DYN_BLK) + { + size = blkNode->AsDynBlk()->gtDynamicSize; + if (blkNode->AsDynBlk()->gtEvalSizeFirst) + { + genConsumeReg(size); + } + else + { + evalSizeFirst = false; + } + } + if (blkNode->IsReverseOp()) + { + + genConsumeBlockSrc(blkNode); + genConsumeReg(dstAddr); + } + else + { + genConsumeReg(dstAddr); + genConsumeBlockSrc(blkNode); + } + if (!evalSizeFirst) + { + noway_assert(size != nullptr); + genConsumeReg(size); + } + + // Next, perform any necessary moves. + if (evalSizeFirst) + { + genSetBlockSize(blkNode, sizeReg); + } + if (blkNode->IsReverseOp()) + { + genSetBlockSrc(blkNode, srcReg); + genCopyRegIfNeeded(dstAddr, dstReg); + } + else + { + genCopyRegIfNeeded(dstAddr, dstReg); + genSetBlockSrc(blkNode, srcReg); + } + if (!evalSizeFirst) + { + genSetBlockSize(blkNode, sizeReg); + } +} + +//------------------------------------------------------------------------- +// genProduceReg: do liveness update for register produced by the current +// node in codegen. +// +// Arguments: +// tree - Gentree node +// +// Return Value: +// None. +void CodeGen::genProduceReg(GenTree* tree) +{ +#ifdef DEBUG + assert((tree->gtDebugFlags & GTF_DEBUG_NODE_CG_PRODUCED) == 0); + tree->gtDebugFlags |= GTF_DEBUG_NODE_CG_PRODUCED; +#endif + + if (tree->gtFlags & GTF_SPILL) + { + // Code for GT_COPY node gets generated as part of consuming regs by its parent. + // A GT_COPY node in turn produces reg result and it should never be marked to + // spill. + // + // Similarly GT_RELOAD node gets generated as part of consuming regs by its + // parent and should never be marked for spilling. + noway_assert(!tree->IsCopyOrReload()); + + if (genIsRegCandidateLocal(tree)) + { + // Store local variable to its home location. + tree->gtFlags &= ~GTF_REG_VAL; + // Ensure that lclVar stores are typed correctly. + unsigned varNum = tree->gtLclVarCommon.gtLclNum; + assert(!compiler->lvaTable[varNum].lvNormalizeOnStore() || + (tree->TypeGet() == genActualType(compiler->lvaTable[varNum].TypeGet()))); + inst_TT_RV(ins_Store(tree->gtType, compiler->isSIMDTypeLocalAligned(varNum)), tree, tree->gtRegNum); + } + else + { + // In case of multi-reg call node, spill flag on call node + // indicates that one or more of its allocated regs need to + // be spilled. Call node needs to be further queried to + // know which of its result regs needs to be spilled. + if (tree->IsMultiRegCall()) + { + GenTreeCall* call = tree->AsCall(); + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + unsigned regCount = retTypeDesc->GetReturnRegCount(); + + for (unsigned i = 0; i < regCount; ++i) + { + unsigned flags = call->GetRegSpillFlagByIdx(i); + if ((flags & GTF_SPILL) != 0) + { + regNumber reg = call->GetRegNumByIdx(i); + call->SetInReg(); + regSet.rsSpillTree(reg, call, i); + gcInfo.gcMarkRegSetNpt(genRegMask(reg)); + } + } + } + else + { + tree->SetInReg(); + regSet.rsSpillTree(tree->gtRegNum, tree); + gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum)); + } + + tree->gtFlags |= GTF_SPILLED; + tree->gtFlags &= ~GTF_SPILL; + + return; + } + } + + genUpdateLife(tree); + + // If we've produced a register, mark it as a pointer, as needed. + if (tree->gtHasReg()) + { + // We only mark the register in the following cases: + // 1. It is not a register candidate local. In this case, we're producing a + // register from a local, but the local is not a register candidate. Thus, + // we must be loading it as a temp register, and any "last use" flag on + // the register wouldn't be relevant. + // 2. The register candidate local is going dead. There's no point to mark + // the register as live, with a GC pointer, if the variable is dead. + if (!genIsRegCandidateLocal(tree) || ((tree->gtFlags & GTF_VAR_DEATH) == 0)) + { + // Multi-reg call node will produce more than one register result. + // Mark all the regs produced by call node. + if (tree->IsMultiRegCall()) + { + GenTreeCall* call = tree->AsCall(); + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + unsigned regCount = retTypeDesc->GetReturnRegCount(); + + for (unsigned i = 0; i < regCount; ++i) + { + regNumber reg = call->GetRegNumByIdx(i); + var_types type = retTypeDesc->GetReturnRegType(i); + gcInfo.gcMarkRegPtrVal(reg, type); + } + } + else if (tree->IsCopyOrReloadOfMultiRegCall()) + { + // we should never see reload of multi-reg call here + // because GT_RELOAD gets generated in reg consuming path. + noway_assert(tree->OperGet() == GT_COPY); + + // A multi-reg GT_COPY node produces those regs to which + // copy has taken place. + GenTreeCopyOrReload* copy = tree->AsCopyOrReload(); + GenTreeCall* call = copy->gtGetOp1()->AsCall(); + ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); + unsigned regCount = retTypeDesc->GetReturnRegCount(); + + for (unsigned i = 0; i < regCount; ++i) + { + var_types type = retTypeDesc->GetReturnRegType(i); + regNumber fromReg = call->GetRegNumByIdx(i); + regNumber toReg = copy->GetRegNumByIdx(i); + + if (toReg != REG_NA) + { + gcInfo.gcMarkRegPtrVal(toReg, type); + } + } + } + else + { + gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet()); + } + } + } + tree->SetInReg(); +} + +// transfer gc/byref status of src reg to dst reg +void CodeGen::genTransferRegGCState(regNumber dst, regNumber src) +{ + regMaskTP srcMask = genRegMask(src); + regMaskTP dstMask = genRegMask(dst); + + if (gcInfo.gcRegGCrefSetCur & srcMask) + { + gcInfo.gcMarkRegSetGCref(dstMask); + } + else if (gcInfo.gcRegByrefSetCur & srcMask) + { + gcInfo.gcMarkRegSetByref(dstMask); + } + else + { + gcInfo.gcMarkRegSetNpt(dstMask); + } +} + +// generates an ip-relative call or indirect call via reg ('call reg') +// pass in 'addr' for a relative call or 'base' for a indirect register call +// methHnd - optional, only used for pretty printing +// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC) +void CodeGen::genEmitCall(int callType, + CORINFO_METHOD_HANDLE methHnd, + INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) void* addr X86_ARG(ssize_t argSize), + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + IL_OFFSETX ilOffset, + regNumber base, + bool isJump, + bool isNoGC) +{ +#if !defined(_TARGET_X86_) + ssize_t argSize = 0; +#endif // !defined(_TARGET_X86_) + getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, argSize, + retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), gcInfo.gcVarPtrSetCur, + gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset, base, REG_NA, 0, 0, isJump, + emitter::emitNoGChelper(compiler->eeGetHelperNum(methHnd))); +} + +// generates an indirect call via addressing mode (call []) given an indir node +// methHnd - optional, only used for pretty printing +// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC) +void CodeGen::genEmitCall(int callType, + CORINFO_METHOD_HANDLE methHnd, + INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) GenTreeIndir* indir X86_ARG(ssize_t argSize), + emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), + IL_OFFSETX ilOffset) +{ +#if !defined(_TARGET_X86_) + ssize_t argSize = 0; +#endif // !defined(_TARGET_X86_) + genConsumeAddress(indir->Addr()); + + getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) nullptr, + argSize, retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), + gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset, + indir->Base() ? indir->Base()->gtRegNum : REG_NA, + indir->Index() ? indir->Index()->gtRegNum : REG_NA, indir->Scale(), indir->Offset()); +} + +#endif // !LEGACY_BACKEND diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h index fb0d6ea165..406ab779f1 100644 --- a/src/jit/codegenlinear.h +++ b/src/jit/codegenlinear.h @@ -16,6 +16,10 @@ void genCodeForTreeNode(GenTreePtr treeNode); void genCodeForBinary(GenTreePtr treeNode); +#if defined(_TARGET_X86_) +void genCodeForLongUMod(GenTreeOp* node); +#endif // _TARGET_X86_ + void genCodeForDivMod(GenTreeOp* treeNode); void genCodeForMulHi(GenTreeOp* treeNode); @@ -24,6 +28,10 @@ void genLeaInstruction(GenTreeAddrMode* lea); void genSetRegToCond(regNumber dstReg, GenTreePtr tree); +#if !defined(_TARGET_64BIT_) +void genLongToIntCast(GenTreePtr treeNode); +#endif + void genIntToIntCast(GenTreePtr treeNode); void genFloatToFloatCast(GenTreePtr treeNode); @@ -36,7 +44,7 @@ void genCkfinite(GenTreePtr treeNode); void genIntrinsic(GenTreePtr treeNode); -void genPutArgStk(GenTreePtr treeNode); +void genPutArgStk(GenTreePutArgStk* treeNode); unsigned getBaseVarForPutArgStk(GenTreePtr treeNode); #if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_) @@ -49,7 +57,6 @@ void genCompareInt(GenTreePtr treeNode); #if !defined(_TARGET_64BIT_) void genCompareLong(GenTreePtr treeNode); -void genJTrueLong(GenTreePtr treeNode); #endif #ifdef FEATURE_SIMD @@ -61,7 +68,8 @@ enum SIMDScalarMoveType }; instruction getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival = nullptr); -void genSIMDScalarMove(var_types type, regNumber target, regNumber src, SIMDScalarMoveType moveType); +void genSIMDScalarMove( + var_types targetType, var_types type, regNumber target, regNumber src, SIMDScalarMoveType moveType); void genSIMDZero(var_types targetType, var_types baseType, regNumber targetReg); void genSIMDIntrinsicInit(GenTreeSIMD* simdNode); void genSIMDIntrinsicInitN(GenTreeSIMD* simdNode); @@ -87,7 +95,10 @@ void genSIMDCheck(GenTree* treeNode); void genStoreIndTypeSIMD12(GenTree* treeNode); void genStoreLclFldTypeSIMD12(GenTree* treeNode); void genLoadIndTypeSIMD12(GenTree* treeNode); -void genLoadLclFldTypeSIMD12(GenTree* treeNode); +void genLoadLclTypeSIMD12(GenTree* treeNode); +#ifdef _TARGET_X86_ +void genPutArgStkSIMD12(GenTree* treeNode); +#endif // _TARGET_X86_ #endif // FEATURE_SIMD #if !defined(_TARGET_64BIT_) @@ -104,6 +115,7 @@ void genUnspillRegIfNeeded(GenTree* tree); regNumber genConsumeReg(GenTree* tree); +void genCopyRegIfNeeded(GenTree* tree, regNumber needReg); void genConsumeRegAndCopy(GenTree* tree, regNumber needReg); void genConsumeIfReg(GenTreePtr tree) @@ -122,15 +134,14 @@ void genConsumeAddress(GenTree* addr); void genConsumeAddrMode(GenTreeAddrMode* mode); -void genConsumeBlockSize(GenTreeBlk* blkNode, regNumber sizeReg); -void genConsumeBlockDst(GenTreeBlk* blkNode); -GenTree* genConsumeBlockSrc(GenTreeBlk* blkNode); +void genSetBlockSize(GenTreeBlk* blkNode, regNumber sizeReg); +void genConsumeBlockSrc(GenTreeBlk* blkNode); +void genSetBlockSrc(GenTreeBlk* blkNode, regNumber srcReg); void genConsumeBlockOp(GenTreeBlk* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg); -#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING -void genConsumePutStructArgStk( - GenTreePutArgStk* putArgStkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg, unsigned baseVarNum); -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING +#ifdef FEATURE_PUT_STRUCT_ARG_STK +void genConsumePutStructArgStk(GenTreePutArgStk* putArgStkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg); +#endif // FEATURE_PUT_STRUCT_ARG_STK void genConsumeRegs(GenTree* tree); @@ -142,6 +153,10 @@ void genSetRegToIcon(regNumber reg, ssize_t val, var_types type = TYP_INT, insFl void genCodeForShift(GenTreePtr tree); +#if defined(_TARGET_X86_) +void genCodeForShiftLong(GenTreePtr tree); +#endif + #ifdef _TARGET_XARCH_ void genCodeForShiftRMW(GenTreeStoreInd* storeInd); #endif // _TARGET_XARCH_ @@ -154,12 +169,23 @@ void genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode); void genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode); -#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING -void genPutStructArgStk(GenTreePtr treeNode, unsigned baseVarNum); +#ifdef FEATURE_PUT_STRUCT_ARG_STK +#ifdef _TARGET_X86_ +bool genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk); +void genPushReg(var_types type, regNumber srcReg); +void genPutArgStkFieldList(GenTreePutArgStk* putArgStk); +#endif // _TARGET_X86_ + +void genPutStructArgStk(GenTreePutArgStk* treeNode); -void genStructPutArgRepMovs(GenTreePutArgStk* putArgStkNode, unsigned baseVarNum); -void genStructPutArgUnroll(GenTreePutArgStk* putArgStkNode, unsigned baseVarNum); -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING +int genMove8IfNeeded(unsigned size, regNumber tmpReg, GenTree* srcAddr, unsigned offset); +int genMove4IfNeeded(unsigned size, regNumber tmpReg, GenTree* srcAddr, unsigned offset); +int genMove2IfNeeded(unsigned size, regNumber tmpReg, GenTree* srcAddr, unsigned offset); +int genMove1IfNeeded(unsigned size, regNumber tmpReg, GenTree* srcAddr, unsigned offset); +void genStructPutArgRepMovs(GenTreePutArgStk* putArgStkNode); +void genStructPutArgUnroll(GenTreePutArgStk* putArgStkNode); +void genStoreRegToStackArg(var_types type, regNumber reg, int offset); +#endif // FEATURE_PUT_STRUCT_ARG_STK void genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset); @@ -191,6 +217,14 @@ void genCallInstruction(GenTreePtr call); void genJmpMethod(GenTreePtr jmp); +BasicBlock* genCallFinally(BasicBlock* block, BasicBlock* lblk); + +#if FEATURE_EH_FUNCLETS +void genEHCatchRet(BasicBlock* block); +#else // !FEATURE_EH_FUNCLETS +void genEHFinallyOrFilterRet(BasicBlock* block); +#endif // !FEATURE_EH_FUNCLETS + void genMultiRegCallStoreToLocal(GenTreePtr treeNode); // Deals with codegen for muti-register struct returns. @@ -212,9 +246,19 @@ bool genIsRegCandidateLocal(GenTreePtr tree) return (varDsc->lvIsRegCandidate()); } +#ifdef FEATURE_PUT_STRUCT_ARG_STK +#ifdef _TARGET_X86_ +bool m_pushStkArg; +#else // !_TARGET_X86_ +unsigned m_stkArgVarNum; +unsigned m_stkArgOffset; +#endif // !_TARGET_X86_ +#endif // !FEATURE_PUT_STRUCT_ARG_STK + #ifdef DEBUG GenTree* lastConsumedNode; -void genCheckConsumeNode(GenTree* treeNode); +void genNumberOperandUse(GenTree* const operand, int& useNum) const; +void genCheckConsumeNode(GenTree* const node); #else // !DEBUG inline void genCheckConsumeNode(GenTree* treeNode) { diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index a41c28695b..8e0af48799 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -24,114 +24,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "gcinfo.h" #include "gcinfoencoder.h" -// Get the register assigned to the given node - -regNumber CodeGenInterface::genGetAssignedReg(GenTreePtr tree) -{ - return tree->gtRegNum; -} - -//------------------------------------------------------------------------ -// genSpillVar: Spill a local variable -// -// Arguments: -// tree - the lclVar node for the variable being spilled -// -// Return Value: -// None. -// -// Assumptions: -// The lclVar must be a register candidate (lvRegCandidate) - -void CodeGen::genSpillVar(GenTreePtr tree) -{ - unsigned varNum = tree->gtLclVarCommon.gtLclNum; - LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); - - assert(varDsc->lvIsRegCandidate()); - - // We don't actually need to spill if it is already living in memory - bool needsSpill = ((tree->gtFlags & GTF_VAR_DEF) == 0 && varDsc->lvIsInReg()); - if (needsSpill) - { - var_types lclTyp = varDsc->TypeGet(); - if (varDsc->lvNormalizeOnStore()) - { - lclTyp = genActualType(lclTyp); - } - emitAttr size = emitTypeSize(lclTyp); - - bool restoreRegVar = false; - if (tree->gtOper == GT_REG_VAR) - { - tree->SetOper(GT_LCL_VAR); - restoreRegVar = true; - } - - // mask off the flag to generate the right spill code, then bring it back - tree->gtFlags &= ~GTF_REG_VAL; - - instruction storeIns = ins_Store(tree->TypeGet(), compiler->isSIMDTypeLocalAligned(varNum)); -#if CPU_LONG_USES_REGPAIR - if (varTypeIsMultiReg(tree)) - { - assert(varDsc->lvRegNum == genRegPairLo(tree->gtRegPair)); - assert(varDsc->lvOtherReg == genRegPairHi(tree->gtRegPair)); - regNumber regLo = genRegPairLo(tree->gtRegPair); - regNumber regHi = genRegPairHi(tree->gtRegPair); - inst_TT_RV(storeIns, tree, regLo); - inst_TT_RV(storeIns, tree, regHi, 4); - } - else -#endif - { - assert(varDsc->lvRegNum == tree->gtRegNum); - inst_TT_RV(storeIns, tree, tree->gtRegNum, 0, size); - } - tree->gtFlags |= GTF_REG_VAL; - - if (restoreRegVar) - { - tree->SetOper(GT_REG_VAR); - } - - genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree)); - gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask()); - - if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex)) - { -#ifdef DEBUG - if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) - { - JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum); - } - else - { - JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum); - } -#endif - VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); - } - } - - tree->gtFlags &= ~GTF_SPILL; - varDsc->lvRegNum = REG_STK; - if (varTypeIsMultiReg(tree)) - { - varDsc->lvOtherReg = REG_STK; - } -} - -// inline -void CodeGenInterface::genUpdateVarReg(LclVarDsc* varDsc, GenTreePtr tree) -{ - assert(tree->OperIsScalarLocal() || (tree->gtOper == GT_COPY)); - varDsc->lvRegNum = tree->gtRegNum; -} - -/*****************************************************************************/ -/*****************************************************************************/ - /***************************************************************************** * * Generate code that will set the given register to the integer constant. @@ -231,6 +123,8 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) } regNumber regGSCheck; + regMaskTP regMaskGSCheck = RBM_NONE; + if (!pushReg) { // Non-tail call: we can use any callee trash register that is not @@ -251,8 +145,11 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) else { #ifdef _TARGET_X86_ - NYI_X86("Tail calls from methods that need GS check"); - regGSCheck = REG_NA; + // It doesn't matter which register we pick, since we're going to save and restore it + // around the check. + // TODO-CQ: Can we optimize the choice of register to avoid doing the push/pop sometimes? + regGSCheck = REG_EAX; + regMaskGSCheck = RBM_EAX; #else // !_TARGET_X86_ // Tail calls from methods that need GS check: We need to preserve registers while // emitting GS cookie check for a tail prefixed call or a jmp. To emit GS cookie @@ -287,8 +184,13 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) #endif // !_TARGET_X86_ } + regMaskTP byrefPushedRegs = RBM_NONE; + regMaskTP norefPushedRegs = RBM_NONE; + regMaskTP pushedRegs = RBM_NONE; + if (compiler->gsGlobalSecurityCookieAddr == nullptr) { +#if defined(_TARGET_AMD64_) // If GS cookie value fits within 32-bits we can use 'cmp mem64, imm32'. // Otherwise, load the value into a reg and use 'cmp mem64, reg64'. if ((int)compiler->gsGlobalSecurityCookieVal != (ssize_t)compiler->gsGlobalSecurityCookieVal) @@ -297,7 +199,9 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0); } else +#endif // defined(_TARGET_AMD64_) { + assert((int)compiler->gsGlobalSecurityCookieVal == (ssize_t)compiler->gsGlobalSecurityCookieVal); getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0, (int)compiler->gsGlobalSecurityCookieVal); } @@ -305,6 +209,9 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) else { // Ngen case - GS cookie value needs to be accessed through an indirection. + + pushedRegs = genPushRegs(regMaskGSCheck, &byrefPushedRegs, &norefPushedRegs); + instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr); getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSCheck, regGSCheck, 0); getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0); @@ -315,821 +222,180 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) inst_JMP(jmpEqual, gsCheckBlk); genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN); genDefineTempLabel(gsCheckBlk); -} -/***************************************************************************** - * - * Generate code for all the basic blocks in the function. - */ + genPopRegs(pushedRegs, byrefPushedRegs, norefPushedRegs); +} -void CodeGen::genCodeForBBlist() +BasicBlock* CodeGen::genCallFinally(BasicBlock* block, BasicBlock* lblk) { - unsigned varNum; - LclVarDsc* varDsc; - - unsigned savedStkLvl; - -#ifdef DEBUG - genInterruptibleUsed = true; - - // You have to be careful if you create basic blocks from now on - compiler->fgSafeBasicBlockCreation = false; - - // This stress mode is not comptible with fully interruptible GC - if (genInterruptible && compiler->opts.compStackCheckOnCall) - { - compiler->opts.compStackCheckOnCall = false; - } - - // This stress mode is not comptible with fully interruptible GC - if (genInterruptible && compiler->opts.compStackCheckOnRet) - { - compiler->opts.compStackCheckOnRet = false; - } -#endif // DEBUG - - // Prepare the blocks for exception handling codegen: mark the blocks that needs labels. - genPrepForEHCodegen(); - - assert(!compiler->fgFirstBBScratch || - compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first. - - /* Initialize the spill tracking logic */ - - regSet.rsSpillBeg(); - -#ifdef DEBUGGING_SUPPORT - /* Initialize the line# tracking logic */ +#if FEATURE_EH_FUNCLETS + // Generate a call to the finally, like this: + // mov rcx,qword ptr [rbp + 20H] // Load rcx with PSPSym + // call finally-funclet + // jmp finally-return // Only for non-retless finally calls + // The jmp can be a NOP if we're going to the next block. + // If we're generating code for the main function (not a funclet), and there is no localloc, + // then RSP at this point is the same value as that stored in the PSPSym. So just copy RSP + // instead of loading the PSPSym in this case, or if PSPSym is not used (CoreRT ABI). - if (compiler->opts.compScopeInfo) + if ((compiler->lvaPSPSym == BAD_VAR_NUM) || + (!compiler->compLocallocUsed && (compiler->funCurrentFunc()->funKind == FUNC_ROOT))) { - siInit(); + inst_RV_RV(INS_mov, REG_ARG_0, REG_SPBASE, TYP_I_IMPL); } -#endif - - // The current implementation of switch tables requires the first block to have a label so it - // can generate offsets to the switch label targets. - // TODO-XArch-CQ: remove this when switches have been re-implemented to not use this. - if (compiler->fgHasSwitch) + else { - compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET; + getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0, compiler->lvaPSPSym, 0); } + getEmitter()->emitIns_J(INS_call, block->bbJumpDest); - genPendingCallLabel = nullptr; - - /* Initialize the pointer tracking code */ - - gcInfo.gcRegPtrSetInit(); - gcInfo.gcVarPtrSetInit(); - - /* If any arguments live in registers, mark those regs as such */ - - for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++) + if (block->bbFlags & BBF_RETLESS_CALL) { - /* Is this variable a parameter assigned to a register? */ - - if (!varDsc->lvIsParam || !varDsc->lvRegister) - { - continue; - } + // We have a retless call, and the last instruction generated was a call. + // If the next block is in a different EH region (or is the end of the code + // block), then we need to generate a breakpoint here (since it will never + // get executed) to get proper unwind behavior. - /* Is the argument live on entry to the method? */ - - if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex)) + if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext)) { - continue; - } - - /* Is this a floating-point argument? */ - - if (varDsc->IsFloatRegType()) - { - continue; + instGen(INS_BREAKPOINT); // This should never get executed } - - noway_assert(!varTypeIsFloating(varDsc->TypeGet())); - - /* Mark the register as holding the variable */ - - regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum); } - - unsigned finallyNesting = 0; - - // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without - // allocation at the start of each basic block. - VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler)); - - /*------------------------------------------------------------------------- - * - * Walk the basic blocks and generate code for each one - * - */ - - BasicBlock* block; - BasicBlock* lblk; /* previous block */ - - for (lblk = nullptr, block = compiler->fgFirstBB; block != nullptr; lblk = block, block = block->bbNext) + else { -#ifdef DEBUG - if (compiler->verbose) - { - printf("\n=============== Generating "); - block->dspBlockHeader(compiler, true, true); - compiler->fgDispBBLiveness(block); - } -#endif // DEBUG - - // Figure out which registers hold variables on entry to this block - - regSet.ClearMaskVars(); - gcInfo.gcRegGCrefSetCur = RBM_NONE; - gcInfo.gcRegByrefSetCur = RBM_NONE; - - compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(block); - - genUpdateLife(block->bbLiveIn); - - // Even if liveness didn't change, we need to update the registers containing GC references. - // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't - // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change - // here. That would require handling the changes in recordVarLocationsAtStartOfBB(). - - regMaskTP newLiveRegSet = RBM_NONE; - regMaskTP newRegGCrefSet = RBM_NONE; - regMaskTP newRegByrefSet = RBM_NONE; -#ifdef DEBUG - VARSET_TP VARSET_INIT_NOCOPY(removedGCVars, VarSetOps::MakeEmpty(compiler)); - VARSET_TP VARSET_INIT_NOCOPY(addedGCVars, VarSetOps::MakeEmpty(compiler)); -#endif - VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex); - while (iter.NextElem(compiler, &varIndex)) - { - unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; - LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); - - if (varDsc->lvIsInReg()) - { - newLiveRegSet |= varDsc->lvRegMask(); - if (varDsc->lvType == TYP_REF) - { - newRegGCrefSet |= varDsc->lvRegMask(); - } - else if (varDsc->lvType == TYP_BYREF) - { - newRegByrefSet |= varDsc->lvRegMask(); - } -#ifdef DEBUG - if (verbose && VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex)) - { - VarSetOps::AddElemD(compiler, removedGCVars, varIndex); - } -#endif // DEBUG - VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex); - } - else if (compiler->lvaIsGCTracked(varDsc)) - { -#ifdef DEBUG - if (verbose && !VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex)) - { - VarSetOps::AddElemD(compiler, addedGCVars, varIndex); - } -#endif // DEBUG - VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex); - } - } - - regSet.rsMaskVars = newLiveRegSet; - -#ifdef DEBUG - if (compiler->verbose) - { - if (!VarSetOps::IsEmpty(compiler, addedGCVars)) - { - printf("\t\t\t\t\t\t\tAdded GCVars: "); - dumpConvertedVarSet(compiler, addedGCVars); - printf("\n"); - } - if (!VarSetOps::IsEmpty(compiler, removedGCVars)) - { - printf("\t\t\t\t\t\t\tRemoved GCVars: "); - dumpConvertedVarSet(compiler, removedGCVars); - printf("\n"); - } - } -#endif // DEBUG - - gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUGARG(true)); - gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUGARG(true)); - - /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to - represent the exception object (TYP_REF). - We mark REG_EXCEPTION_OBJECT as holding a GC object on entry - to the block, it will be the first thing evaluated - (thanks to GTF_ORDER_SIDEEFF). - */ - - if (handlerGetsXcptnObj(block->bbCatchTyp)) - { - for (GenTree* node : LIR::AsRange(block)) - { - if (node->OperGet() == GT_CATCH_ARG) - { - gcInfo.gcMarkRegSetGCref(RBM_EXCEPTION_OBJECT); - break; - } - } - } - - /* Start a new code output block */ - - genUpdateCurrentFunclet(block); - - if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD) - { - getEmitter()->emitLoopAlign(); - } - -#ifdef DEBUG - if (compiler->opts.dspCode) - { - printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum); - } -#endif - - block->bbEmitCookie = nullptr; - - if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL)) - { - /* Mark a label and update the current set of live GC refs */ - - block->bbEmitCookie = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, - gcInfo.gcRegByrefSetCur, FALSE); - } - - if (block == compiler->fgFirstColdBlock) - { -#ifdef DEBUG - if (compiler->verbose) - { - printf("\nThis is the start of the cold region of the method\n"); - } -#endif - // We should never have a block that falls through into the Cold section - noway_assert(!lblk->bbFallsThrough()); - - // We require the block that starts the Cold section to have a label - noway_assert(block->bbEmitCookie); - getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie); - } - - /* Both stacks are always empty on entry to a basic block */ - - genStackLevel = 0; - - savedStkLvl = genStackLevel; - - /* Tell everyone which basic block we're working on */ - - compiler->compCurBB = block; - -#ifdef DEBUGGING_SUPPORT - siBeginBlock(block); - - // BBF_INTERNAL blocks don't correspond to any single IL instruction. - if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) && - !compiler->fgBBisScratch(block)) // If the block is the distinguished first scratch block, then no need to - // emit a NO_MAPPING entry, immediately after the prolog. - { - genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true); - } - - bool firstMapping = true; -#endif // DEBUGGING_SUPPORT - - /*--------------------------------------------------------------------- - * - * Generate code for each statement-tree in the block - * - */ - CLANG_FORMAT_COMMENT_ANCHOR; - -#if FEATURE_EH_FUNCLETS - if (block->bbFlags & BBF_FUNCLET_BEG) - { - genReserveFuncletProlog(block); - } -#endif // FEATURE_EH_FUNCLETS - - // Clear compCurStmt and compCurLifeTree. - compiler->compCurStmt = nullptr; - compiler->compCurLifeTree = nullptr; - - // Traverse the block in linear order, generating code for each node as we - // as we encounter it. - CLANG_FORMAT_COMMENT_ANCHOR; - -#ifdef DEBUGGING_SUPPORT - IL_OFFSETX currentILOffset = BAD_IL_OFFSET; -#endif - for (GenTree* node : LIR::AsRange(block).NonPhiNodes()) - { -#ifdef DEBUGGING_SUPPORT - // Do we have a new IL offset? - if (node->OperGet() == GT_IL_OFFSET) - { - genEnsureCodeEmitted(currentILOffset); - currentILOffset = node->gtStmt.gtStmtILoffsx; - genIPmappingAdd(currentILOffset, firstMapping); - firstMapping = false; - } -#endif // DEBUGGING_SUPPORT - -#ifdef DEBUG - if (node->OperGet() == GT_IL_OFFSET) - { - noway_assert(node->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize || - node->gtStmt.gtStmtLastILoffs == BAD_IL_OFFSET); - - if (compiler->opts.dspCode && compiler->opts.dspInstrs && - node->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET) - { - while (genCurDispOffset <= node->gtStmt.gtStmtLastILoffs) - { - genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> "); - } - } - } -#endif // DEBUG - - genCodeForTreeNode(node); - if (node->gtHasReg() && node->gtLsraInfo.isLocalDefUse) - { - genConsumeReg(node); - } - } // end for each node in block - -#ifdef DEBUG - // The following set of register spill checks and GC pointer tracking checks used to be - // performed at statement boundaries. Now, with LIR, there are no statements, so they are - // performed at the end of each block. - // TODO: could these checks be performed more frequently? E.g., at each location where - // the register allocator says there are no live non-variable registers. Perhaps this could - // be done by (a) keeping a running count of live non-variable registers by using - // gtLsraInfo.srcCount and gtLsraInfo.dstCount to decrement and increment the count, respectively, - // and running the checks when the count is zero. Or, (b) use the map maintained by LSRA - // (operandToLocationInfoMap) to mark a node somehow when, after the execution of that node, - // there will be no live non-variable registers. - - regSet.rsSpillChk(); - - /* Make sure we didn't bungle pointer register tracking */ - - regMaskTP ptrRegs = gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur; - regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars; - - // If return is a GC-type, clear it. Note that if a common - // epilog is generated (genReturnBB) it has a void return - // even though we might return a ref. We can't use the compRetType - // as the determiner because something we are tracking as a byref - // might be used as a return value of a int function (which is legal) - GenTree* blockLastNode = block->lastNode(); - if ((blockLastNode != nullptr) && (blockLastNode->gtOper == GT_RETURN) && - (varTypeIsGC(compiler->info.compRetType) || - (blockLastNode->gtOp.gtOp1 != nullptr && varTypeIsGC(blockLastNode->gtOp.gtOp1->TypeGet())))) - { - nonVarPtrRegs &= ~RBM_INTRET; - } - - if (nonVarPtrRegs) - { - printf("Regset after BB%02u gcr=", block->bbNum); - printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars); - compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars); - printf(", byr="); - printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars); - compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars); - printf(", regVars="); - printRegMaskInt(regSet.rsMaskVars); - compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars); - printf("\n"); - } - - noway_assert(nonVarPtrRegs == RBM_NONE); -#endif // DEBUG - -#if defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_) - if (block->bbNext == nullptr) - { - // Unit testing of the AMD64 emitter: generate a bunch of instructions into the last block - // (it's as good as any, but better than the prolog, which can only be a single instruction - // group) then use COMPlus_JitLateDisasm=* to see if the late disassembler - // thinks the instructions are the same as we do. - genAmd64EmitterUnitTests(); - } -#endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_ARM64_) - -#ifdef DEBUGGING_SUPPORT - // It is possible to reach the end of the block without generating code for the current IL offset. - // For example, if the following IR ends the current block, no code will have been generated for - // offset 21: - // - // ( 0, 0) [000040] ------------ il_offset void IL offset: 21 - // - // N001 ( 0, 0) [000039] ------------ nop void - // - // This can lead to problems when debugging the generated code. To prevent these issues, make sure - // we've generated code for the last IL offset we saw in the block. - genEnsureCodeEmitted(currentILOffset); + // Because of the way the flowgraph is connected, the liveness info for this one instruction + // after the call is not (can not be) correct in cases where a variable has a last use in the + // handler. So turn off GC reporting for this single instruction. + getEmitter()->emitDisableGC(); - if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0)) + // Now go to where the finally funclet needs to return to. + if (block->bbNext->bbJumpDest == block->bbNext->bbNext) { - siEndBlock(block); - - /* Is this the last block, and are there any open scopes left ? */ - - bool isLastBlockProcessed = (block->bbNext == nullptr); - if (block->isBBCallAlwaysPair()) - { - isLastBlockProcessed = (block->bbNext->bbNext == nullptr); - } - - if (isLastBlockProcessed && siOpenScopeList.scNext) - { - /* This assert no longer holds, because we may insert a throw - block to demarcate the end of a try or finally region when they - are at the end of the method. It would be nice if we could fix - our code so that this throw block will no longer be necessary. */ - - // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize); - - siCloseAllOpenScopes(); - } + // Fall-through. + // TODO-XArch-CQ: Can we get rid of this instruction, and just have the call return directly + // to the next instruction? This would depend on stack walking from within the finally + // handler working without this instruction being in this special EH region. + instGen(INS_nop); } - -#endif // DEBUGGING_SUPPORT - - genStackLevel -= savedStkLvl; - -#ifdef DEBUG - // compCurLife should be equal to the liveOut set, except that we don't keep - // it up to date for vars that are not register candidates - // (it would be nice to have a xor set function) - - VARSET_TP VARSET_INIT_NOCOPY(extraLiveVars, VarSetOps::Diff(compiler, block->bbLiveOut, compiler->compCurLife)); - VarSetOps::UnionD(compiler, extraLiveVars, VarSetOps::Diff(compiler, compiler->compCurLife, block->bbLiveOut)); - VARSET_ITER_INIT(compiler, extraLiveVarIter, extraLiveVars, extraLiveVarIndex); - while (extraLiveVarIter.NextElem(compiler, &extraLiveVarIndex)) + else { - unsigned varNum = compiler->lvaTrackedToVarNum[extraLiveVarIndex]; - LclVarDsc* varDsc = compiler->lvaTable + varNum; - assert(!varDsc->lvIsRegCandidate()); + inst_JMP(EJ_jmp, block->bbNext->bbJumpDest); } -#endif - - /* Both stacks should always be empty on exit from a basic block */ - noway_assert(genStackLevel == 0); - -#ifdef _TARGET_AMD64_ - // On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several - // situations, to support proper exception handling semantics. This is mostly to ensure that when the stack - // walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region. - // The document "X64 and ARM ABIs.docx" has more details. The situations: - // 1. If the call instruction is in a different EH region as the instruction that follows it. - // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might - // be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters - // here.) - // We handle case #1 here, and case #2 in the emitter. - if (getEmitter()->emitIsLastInsCall()) - { - // Ok, the last instruction generated is a call instruction. Do any of the other conditions hold? - // Note: we may be generating a few too many NOPs for the case of call preceding an epilog. Technically, - // if the next block is a BBJ_RETURN, an epilog will be generated, but there may be some instructions - // generated before the OS epilog starts, such as a GS cookie check. - if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext)) - { - // We only need the NOP if we're not going to generate any more code as part of the block end. - - switch (block->bbJumpKind) - { - case BBJ_ALWAYS: - case BBJ_THROW: - case BBJ_CALLFINALLY: - case BBJ_EHCATCHRET: - // We're going to generate more code below anyway, so no need for the NOP. - - case BBJ_RETURN: - case BBJ_EHFINALLYRET: - case BBJ_EHFILTERRET: - // These are the "epilog follows" case, handled in the emitter. - - break; - - case BBJ_NONE: - if (block->bbNext == nullptr) - { - // Call immediately before the end of the code; we should never get here . - instGen(INS_BREAKPOINT); // This should never get executed - } - else - { - // We need the NOP - instGen(INS_nop); - } - break; - - case BBJ_COND: - case BBJ_SWITCH: - // These can't have a call as the last instruction! - - default: - noway_assert(!"Unexpected bbJumpKind"); - break; - } - } - } -#endif // _TARGET_AMD64_ - - /* Do we need to generate a jump or return? */ - - switch (block->bbJumpKind) - { - case BBJ_ALWAYS: - inst_JMP(EJ_jmp, block->bbJumpDest); - break; - - case BBJ_RETURN: - genExitCode(block); - break; - - case BBJ_THROW: - // If we have a throw at the end of a function or funclet, we need to emit another instruction - // afterwards to help the OS unwinder determine the correct context during unwind. - // We insert an unexecuted breakpoint instruction in several situations - // following a throw instruction: - // 1. If the throw is the last instruction of the function or funclet. This helps - // the OS unwinder determine the correct context during an unwind from the - // thrown exception. - // 2. If this is this is the last block of the hot section. - // 3. If the subsequent block is a special throw block. - // 4. On AMD64, if the next block is in a different EH region. - if ((block->bbNext == nullptr) || (block->bbNext->bbFlags & BBF_FUNCLET_BEG) || - !BasicBlock::sameEHRegion(block, block->bbNext) || - (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) || - block->bbNext == compiler->fgFirstColdBlock) - { - instGen(INS_BREAKPOINT); // This should never get executed - } - - break; - - case BBJ_CALLFINALLY: - -#if FEATURE_EH_FUNCLETS - - // Generate a call to the finally, like this: - // mov rcx,qword ptr [rbp + 20H] // Load rcx with PSPSym - // call finally-funclet - // jmp finally-return // Only for non-retless finally calls - // The jmp can be a NOP if we're going to the next block. - // If we're generating code for the main function (not a funclet), and there is no localloc, - // then RSP at this point is the same value as that stored in the PSPsym. So just copy RSP - // instead of loading the PSPSym in this case. - if (!compiler->compLocallocUsed && (compiler->funCurrentFunc()->funKind == FUNC_ROOT)) - { - inst_RV_RV(INS_mov, REG_ARG_0, REG_SPBASE, TYP_I_IMPL); - } - else - { - getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0, compiler->lvaPSPSym, 0); - } - getEmitter()->emitIns_J(INS_call, block->bbJumpDest); + getEmitter()->emitEnableGC(); + } - if (block->bbFlags & BBF_RETLESS_CALL) - { - // We have a retless call, and the last instruction generated was a call. - // If the next block is in a different EH region (or is the end of the code - // block), then we need to generate a breakpoint here (since it will never - // get executed) to get proper unwind behavior. +#else // !FEATURE_EH_FUNCLETS - if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext)) - { - instGen(INS_BREAKPOINT); // This should never get executed - } - } - else - { - // Because of the way the flowgraph is connected, the liveness info for this one instruction - // after the call is not (can not be) correct in cases where a variable has a last use in the - // handler. So turn off GC reporting for this single instruction. - getEmitter()->emitDisableGC(); + // If we are about to invoke a finally locally from a try block, we have to set the ShadowSP slot + // corresponding to the finally's nesting level. When invoked in response to an exception, the + // EE does this. + // + // We have a BBJ_CALLFINALLY followed by a BBJ_ALWAYS. + // + // We will emit : + // mov [ebp - (n + 1)], 0 + // mov [ebp - n ], 0xFC + // push &step + // jmp finallyBlock + // ... + // step: + // mov [ebp - n ], 0 + // jmp leaveTarget + // ... + // leaveTarget: + + noway_assert(isFramePointerUsed()); + + // Get the nesting level which contains the finally + unsigned finallyNesting = 0; + compiler->fgGetNestingLevel(block, &finallyNesting); - // Now go to where the finally funclet needs to return to. - if (block->bbNext->bbJumpDest == block->bbNext->bbNext) - { - // Fall-through. - // TODO-XArch-CQ: Can we get rid of this instruction, and just have the call return directly - // to the next instruction? This would depend on stack walking from within the finally - // handler working without this instruction being in this special EH region. - instGen(INS_nop); - } - else - { - inst_JMP(EJ_jmp, block->bbNext->bbJumpDest); - } + // The last slot is reserved for ICodeManager::FixContext(ppEndRegion) + unsigned filterEndOffsetSlotOffs; + filterEndOffsetSlotOffs = (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE); - getEmitter()->emitEnableGC(); - } + unsigned curNestingSlotOffs; + curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE)); -#else // !FEATURE_EH_FUNCLETS + // Zero out the slot for the next nesting level + instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar, + curNestingSlotOffs - TARGET_POINTER_SIZE); + instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar, + curNestingSlotOffs); - // If we are about to invoke a finally locally from a try block, we have to set the ShadowSP slot - // corresponding to the finally's nesting level. When invoked in response to an exception, the - // EE does this. - // - // We have a BBJ_CALLFINALLY followed by a BBJ_ALWAYS. - // - // We will emit : - // mov [ebp - (n + 1)], 0 - // mov [ebp - n ], 0xFC - // push &step - // jmp finallyBlock - // ... - // step: - // mov [ebp - n ], 0 - // jmp leaveTarget - // ... - // leaveTarget: - - noway_assert(isFramePointerUsed()); - - // Get the nesting level which contains the finally - compiler->fgGetNestingLevel(block, &finallyNesting); - - // The last slot is reserved for ICodeManager::FixContext(ppEndRegion) - unsigned filterEndOffsetSlotOffs; - filterEndOffsetSlotOffs = - (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE); - - unsigned curNestingSlotOffs; - curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE)); - - // Zero out the slot for the next nesting level - instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar, - curNestingSlotOffs - TARGET_POINTER_SIZE); - instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar, - curNestingSlotOffs); - - // Now push the address where the finally funclet should return to directly. - if (!(block->bbFlags & BBF_RETLESS_CALL)) - { - assert(block->isBBCallAlwaysPair()); - getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest); - } - else - { - // EE expects a DWORD, so we give him 0 - inst_IV(INS_push_hide, 0); - } + // Now push the address where the finally funclet should return to directly. + if (!(block->bbFlags & BBF_RETLESS_CALL)) + { + assert(block->isBBCallAlwaysPair()); + getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest); + } + else + { + // EE expects a DWORD, so we give him 0 + inst_IV(INS_push_hide, 0); + } - // Jump to the finally BB - inst_JMP(EJ_jmp, block->bbJumpDest); + // Jump to the finally BB + inst_JMP(EJ_jmp, block->bbJumpDest); #endif // !FEATURE_EH_FUNCLETS - // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the - // jump target using bbJumpDest - that is already used to point - // to the finally block. So just skip past the BBJ_ALWAYS unless the - // block is RETLESS. - if (!(block->bbFlags & BBF_RETLESS_CALL)) - { - assert(block->isBBCallAlwaysPair()); - - lblk = block; - block = block->bbNext; - } + // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the + // jump target using bbJumpDest - that is already used to point + // to the finally block. So just skip past the BBJ_ALWAYS unless the + // block is RETLESS. + if (!(block->bbFlags & BBF_RETLESS_CALL)) + { + assert(block->isBBCallAlwaysPair()); - break; + lblk = block; + block = block->bbNext; + } + return block; +} #if FEATURE_EH_FUNCLETS - - case BBJ_EHCATCHRET: - // Set RAX to the address the VM should return to after the catch. - // Generate a RIP-relative - // lea reg, [rip + disp32] ; the RIP is implicit - // which will be position-indepenent. - getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, block->bbJumpDest, REG_INTRET); - __fallthrough; - - case BBJ_EHFINALLYRET: - case BBJ_EHFILTERRET: - genReserveFuncletEpilog(block); - break; +void CodeGen::genEHCatchRet(BasicBlock* block) +{ + // Set RAX to the address the VM should return to after the catch. + // Generate a RIP-relative + // lea reg, [rip + disp32] ; the RIP is implicit + // which will be position-indepenent. + getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, block->bbJumpDest, REG_INTRET); +} #else // !FEATURE_EH_FUNCLETS - case BBJ_EHCATCHRET: - noway_assert(!"Unexpected BBJ_EHCATCHRET"); // not used on x86 - - case BBJ_EHFINALLYRET: - case BBJ_EHFILTERRET: - { - // The last statement of the block must be a GT_RETFILT, which has already been generated. - assert(block->lastNode() != nullptr); - assert(block->lastNode()->OperGet() == GT_RETFILT); - - if (block->bbJumpKind == BBJ_EHFINALLYRET) - { - assert(block->lastNode()->gtOp.gtOp1 == nullptr); // op1 == nullptr means endfinally - - // Return using a pop-jmp sequence. As the "try" block calls - // the finally with a jmp, this leaves the x86 call-ret stack - // balanced in the normal flow of path. - - noway_assert(isFramePointerRequired()); - inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL); - inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL); - } - else - { - assert(block->bbJumpKind == BBJ_EHFILTERRET); - - // The return value has already been computed. - instGen_Return(0); - } - } - break; - -#endif // !FEATURE_EH_FUNCLETS - - case BBJ_NONE: - case BBJ_COND: - case BBJ_SWITCH: - break; - - default: - noway_assert(!"Unexpected bbJumpKind"); - break; - } - -#ifdef DEBUG - compiler->compCurBB = nullptr; -#endif - - } //------------------ END-FOR each block of the method ------------------- - - /* Nothing is live at this point */ - genUpdateLife(VarSetOps::MakeEmpty(compiler)); - - /* Finalize the spill tracking logic */ - - regSet.rsSpillEnd(); - - /* Finalize the temp tracking logic */ - - compiler->tmpEnd(); +void CodeGen::genEHFinallyOrFilterRet(BasicBlock* block) +{ + // The last statement of the block must be a GT_RETFILT, which has already been generated. + assert(block->lastNode() != nullptr); + assert(block->lastNode()->OperGet() == GT_RETFILT); -#ifdef DEBUG - if (compiler->verbose) + if (block->bbJumpKind == BBJ_EHFINALLYRET) { - printf("\n# "); - printf("compCycleEstimate = %6d, compSizeEstimate = %5d ", compiler->compCycleEstimate, - compiler->compSizeEstimate); - printf("%s\n", compiler->info.compFullName); - } -#endif -} + assert(block->lastNode()->gtOp.gtOp1 == nullptr); // op1 == nullptr means endfinally -// return the child that has the same reg as the dst (if any) -// other child returned (out param) in 'other' -GenTree* sameRegAsDst(GenTree* tree, GenTree*& other /*out*/) -{ - if (tree->gtRegNum == REG_NA) - { - other = nullptr; - return nullptr; - } + // Return using a pop-jmp sequence. As the "try" block calls + // the finally with a jmp, this leaves the x86 call-ret stack + // balanced in the normal flow of path. - GenTreePtr op1 = tree->gtOp.gtOp1; - GenTreePtr op2 = tree->gtOp.gtOp2; - if (op1->gtRegNum == tree->gtRegNum) - { - other = op2; - return op1; - } - if (op2->gtRegNum == tree->gtRegNum) - { - other = op1; - return op2; + noway_assert(isFramePointerRequired()); + inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL); + inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL); } else { - other = nullptr; - return nullptr; + assert(block->bbJumpKind == BBJ_EHFILTERRET); + + // The return value has already been computed. + instGen_Return(0); } } +#endif // !FEATURE_EH_FUNCLETS + // Move an immediate value into an integer register void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags) @@ -1227,7 +493,10 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre // Generate code to get the high N bits of a N*N=2N bit multiplication result void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) { - assert(!(treeNode->gtFlags & GTF_UNSIGNED)); + if (treeNode->OperGet() == GT_MULHI) + { + assert(!(treeNode->gtFlags & GTF_UNSIGNED)); + } assert(!treeNode->gtOverflowEx()); regNumber targetReg = treeNode->gtRegNum; @@ -1247,8 +516,7 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) GenTree* rmOp = op2; // Set rmOp to the contained memory operand (if any) - // - if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == targetReg))) + if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == REG_RAX))) { regOp = op2; rmOp = op1; @@ -1256,25 +524,131 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) assert(!regOp->isContained()); // Setup targetReg when neither of the source operands was a matching register - if (regOp->gtRegNum != targetReg) + if (regOp->gtRegNum != REG_RAX) { - inst_RV_RV(ins_Copy(targetType), targetReg, regOp->gtRegNum, targetType); + inst_RV_RV(ins_Copy(targetType), REG_RAX, regOp->gtRegNum, targetType); } - emit->emitInsBinary(INS_imulEAX, size, treeNode, rmOp); + instruction ins; + if ((treeNode->gtFlags & GTF_UNSIGNED) == 0) + { + ins = INS_imulEAX; + } + else + { + ins = INS_mulEAX; + } + emit->emitInsBinary(ins, size, treeNode, rmOp); // Move the result to the desired register, if necessary - if (targetReg != REG_RDX) + if (treeNode->OperGet() == GT_MULHI && targetReg != REG_RDX) { inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType); } } -// generate code for a DIV or MOD operation +#ifdef _TARGET_X86_ +//------------------------------------------------------------------------ +// genCodeForLongUMod: Generate code for a tree of the form +// `(umod (gt_long x y) (const int))` +// +// Arguments: +// node - the node for which to generate code +// +void CodeGen::genCodeForLongUMod(GenTreeOp* node) +{ + assert(node != nullptr); + assert(node->OperGet() == GT_UMOD); + assert(node->TypeGet() == TYP_INT); + + GenTreeOp* const dividend = node->gtOp1->AsOp(); + assert(dividend->OperGet() == GT_LONG); + assert(varTypeIsLong(dividend)); + + genConsumeOperands(node); + + GenTree* const dividendLo = dividend->gtOp1; + GenTree* const dividendHi = dividend->gtOp2; + assert(!dividendLo->isContained()); + assert(!dividendHi->isContained()); + + GenTree* const divisor = node->gtOp2; + assert(divisor->gtSkipReloadOrCopy()->OperGet() == GT_CNS_INT); + assert(!divisor->gtSkipReloadOrCopy()->isContained()); + assert(divisor->gtSkipReloadOrCopy()->AsIntCon()->gtIconVal >= 2); + assert(divisor->gtSkipReloadOrCopy()->AsIntCon()->gtIconVal <= 0x3fffffff); + + // dividendLo must be in RAX; dividendHi must be in RDX + genCopyRegIfNeeded(dividendLo, REG_EAX); + genCopyRegIfNeeded(dividendHi, REG_EDX); + + // At this point, EAX:EDX contains the 64bit dividend and op2->gtRegNum + // contains the 32bit divisor. We want to generate the following code: + // + // cmp edx, divisor->gtRegNum + // jb noOverflow + // + // mov temp, eax + // mov eax, edx + // xor edx, edx + // div divisor->gtRegNum + // mov eax, temp + // + // noOverflow: + // div divisor->gtRegNum + // + // This works because (a * 2^32 + b) % c = ((a % c) * 2^32 + b) % c. + + BasicBlock* const noOverflow = genCreateTempLabel(); + + // cmp edx, divisor->gtRegNum + // jb noOverflow + inst_RV_RV(INS_cmp, REG_EDX, divisor->gtRegNum); + inst_JMP(EJ_jb, noOverflow); + + // mov temp, eax + // mov eax, edx + // xor edx, edx + // div divisor->gtRegNum + // mov eax, temp + const regNumber tempReg = genRegNumFromMask(node->gtRsvdRegs); + inst_RV_RV(INS_mov, tempReg, REG_EAX, TYP_INT); + inst_RV_RV(INS_mov, REG_EAX, REG_EDX, TYP_INT); + instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX); + inst_RV(INS_div, divisor->gtRegNum, TYP_INT); + inst_RV_RV(INS_mov, REG_EAX, tempReg, TYP_INT); + + // noOverflow: + // div divisor->gtRegNum + genDefineTempLabel(noOverflow); + inst_RV(INS_div, divisor->gtRegNum, TYP_INT); + + const regNumber targetReg = node->gtRegNum; + if (targetReg != REG_EDX) + { + inst_RV_RV(INS_mov, targetReg, REG_RDX, TYP_INT); + } + genProduceReg(node); +} +#endif // _TARGET_X86_ + +//------------------------------------------------------------------------ +// genCodeForDivMod: Generate code for a DIV or MOD operation. +// +// Arguments: +// treeNode - the node to generate the code for // void CodeGen::genCodeForDivMod(GenTreeOp* treeNode) { - GenTree* dividend = treeNode->gtOp1; + GenTree* dividend = treeNode->gtOp1; +#ifdef _TARGET_X86_ + if (varTypeIsLong(dividend->TypeGet())) + { + genCodeForLongUMod(treeNode); + return; + } +#endif // _TARGET_X86_ + GenTree* divisor = treeNode->gtOp2; genTreeOps oper = treeNode->OperGet(); emitAttr size = emitTypeSize(treeNode); @@ -1319,10 +693,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* treeNode) else { // dividend must be in RAX - if (dividend->gtRegNum != REG_RAX) - { - inst_RV_RV(INS_mov, REG_RAX, dividend->gtRegNum, targetType); - } + genCopyRegIfNeeded(dividend, REG_RAX); // zero or sign extend rax to rdx if (oper == GT_UMOD || oper == GT_UDIV) @@ -1395,7 +766,7 @@ void CodeGen::genCodeForBinary(GenTree* treeNode) assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD || oper == GT_SUB); #else // !defined(_TARGET_64BIT_) assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD_LO || oper == GT_ADD_HI || - oper == GT_SUB_LO || oper == GT_SUB_HI || oper == GT_MUL_HI || oper == GT_DIV_HI || oper == GT_MOD_HI || + oper == GT_SUB_LO || oper == GT_SUB_HI || oper == GT_MUL_LONG || oper == GT_DIV_HI || oper == GT_MOD_HI || oper == GT_ADD || oper == GT_SUB); #endif // !defined(_TARGET_64BIT_) @@ -1443,7 +814,7 @@ void CodeGen::genCodeForBinary(GenTree* treeNode) } // now we know there are 3 different operands so attempt to use LEA else if (oper == GT_ADD && !varTypeIsFloating(treeNode) && !treeNode->gtOverflowEx() // LEA does not set flags - && (op2->isContainedIntOrIImmed() || !op2->isContained())) + && (op2->isContainedIntOrIImmed() || !op2->isContained()) && !treeNode->gtSetFlags()) { if (op2->isContainedIntOrIImmed()) { @@ -1833,7 +1204,7 @@ void CodeGen::genReturn(GenTreePtr treeNode) // // Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN: // In flowgraph and other places assert that the last node of a block marked as - // GT_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to + // BBJ_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to // maintain such an invariant irrespective of whether profiler hook needed or not. // Also, there is not much to be gained by materializing it as an explicit node. if (compiler->compCurBB == compiler->genReturnBB) @@ -1913,9 +1284,11 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) switch (treeNode->gtOper) { +#ifndef JIT32_GCENCODER case GT_START_NONGC: getEmitter()->emitDisableGC(); break; +#endif // !defined(JIT32_GCENCODER) case GT_PROF_HOOK: #ifdef PROFILING_SUPPORTED @@ -1996,14 +1369,18 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) // genCodeForShift() calls genProduceReg() break; - case GT_CAST: #if !defined(_TARGET_64BIT_) - // We will NYI in DecomposeNode() if we are cast TO a long type, but we do not - // yet support casting FROM a long type either, and that's simpler to catch - // here. - NYI_IF(varTypeIsLong(treeNode->gtOp.gtOp1), "Casts from TYP_LONG"); -#endif // !defined(_TARGET_64BIT_) + case GT_LSH_HI: + case GT_RSH_LO: + // TODO-X86-CQ: This only handles the case where the operand being shifted is in a register. We don't + // need sourceHi to be always in reg in case of GT_LSH_HI (because it could be moved from memory to + // targetReg if sourceHi is a contained mem-op). Similarly for GT_RSH_LO, sourceLo could be marked as + // contained memory-op. Even if not a memory-op, we could mark it as reg-optional. + genCodeForShiftLong(treeNode); + break; +#endif + case GT_CAST: if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1)) { // Casts float/double <--> double/float @@ -2037,7 +1414,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH)) { - assert((treeNode->InReg()) || (treeNode->gtFlags & GTF_SPILLED)); + assert(treeNode->InReg() || (treeNode->gtFlags & GTF_SPILLED)); } // If this is a register candidate that has been spilled, genConsumeReg() will @@ -2047,6 +1424,15 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) { assert(!isRegCandidate); +#if defined(FEATURE_SIMD) && defined(_TARGET_X86_) + // Loading of TYP_SIMD12 (i.e. Vector3) variable + if (treeNode->TypeGet() == TYP_SIMD12) + { + genLoadLclTypeSIMD12(treeNode); + break; + } +#endif // defined(FEATURE_SIMD) && defined(_TARGET_X86_) + emit->emitIns_R_S(ins_Load(treeNode->TypeGet(), compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), emitTypeSize(treeNode), treeNode->gtRegNum, lcl->gtLclNum, 0); genProduceReg(treeNode); @@ -2075,7 +1461,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) // Loading of TYP_SIMD12 (i.e. Vector3) field if (treeNode->TypeGet() == TYP_SIMD12) { - genLoadLclFldTypeSIMD12(treeNode); + genLoadLclTypeSIMD12(treeNode); break; } #endif @@ -2243,6 +1629,9 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) break; case GT_MULHI: +#ifdef _TARGET_X86_ + case GT_MUL_LONG: +#endif genCodeForMulHi(treeNode->AsOp()); genProduceReg(treeNode); break; @@ -2408,18 +1797,18 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) // X86 Long comparison else if (varTypeIsLong(op1Type)) { - // When not materializing the result in a register, the compare logic is generated - // when we generate the GT_JTRUE. - if (treeNode->gtRegNum != REG_NA) - { - genCompareLong(treeNode); - } - else - { - // We generate the compare when we generate the GT_JTRUE, but we need to consume - // the operands now. - genConsumeOperands(treeNode->AsOp()); - } +#ifdef DEBUG + // The result of an unlowered long compare on a 32-bit target must either be + // a) materialized into a register, or + // b) unused. + // + // A long compare that has a result that is used but not materialized into a register should + // have been handled by Lowering::LowerCompare. + + LIR::Use use; + assert((treeNode->gtRegNum != REG_NA) || !LIR::AsRange(compiler->compCurBB).TryGetUse(treeNode, &use)); +#endif + genCompareLong(treeNode); } #endif // !defined(_TARGET_64BIT_) else @@ -2437,52 +1826,60 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) assert(compiler->compCurBB->bbJumpKind == BBJ_COND); #if !defined(_TARGET_64BIT_) - // For long compares, we emit special logic - if (varTypeIsLong(cmp->gtGetOp1())) - { - genJTrueLong(cmp); - } - else + // Long-typed compares should have been handled by Lowering::LowerCompare. + assert(!varTypeIsLong(cmp->gtGetOp1())); #endif - { - // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp - // is governed by a flag NOT by the inherent type of the node - // TODO-XArch-CQ: Check if we can use the currently set flags. - emitJumpKind jumpKind[2]; - bool branchToTrueLabel[2]; - genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel); - BasicBlock* skipLabel = nullptr; - if (jumpKind[0] != EJ_NONE) - { - BasicBlock* jmpTarget; - if (branchToTrueLabel[0]) - { - jmpTarget = compiler->compCurBB->bbJumpDest; - } - else - { - // This case arises only for ordered GT_EQ right now - assert((cmp->gtOper == GT_EQ) && ((cmp->gtFlags & GTF_RELOP_NAN_UN) == 0)); - skipLabel = genCreateTempLabel(); - jmpTarget = skipLabel; - } - - inst_JMP(jumpKind[0], jmpTarget); - } + // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp + // is governed by a flag NOT by the inherent type of the node + // TODO-XArch-CQ: Check if we can use the currently set flags. + emitJumpKind jumpKind[2]; + bool branchToTrueLabel[2]; + genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel); - if (jumpKind[1] != EJ_NONE) + BasicBlock* skipLabel = nullptr; + if (jumpKind[0] != EJ_NONE) + { + BasicBlock* jmpTarget; + if (branchToTrueLabel[0]) { - // the second conditional branch always has to be to the true label - assert(branchToTrueLabel[1]); - inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest); + jmpTarget = compiler->compCurBB->bbJumpDest; } - - if (skipLabel != nullptr) + else { - genDefineTempLabel(skipLabel); + // This case arises only for ordered GT_EQ right now + assert((cmp->gtOper == GT_EQ) && ((cmp->gtFlags & GTF_RELOP_NAN_UN) == 0)); + skipLabel = genCreateTempLabel(); + jmpTarget = skipLabel; } + + inst_JMP(jumpKind[0], jmpTarget); + } + + if (jumpKind[1] != EJ_NONE) + { + // the second conditional branch always has to be to the true label + assert(branchToTrueLabel[1]); + inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest); } + + if (skipLabel != nullptr) + { + genDefineTempLabel(skipLabel); + } + } + break; + + case GT_JCC: + { + GenTreeJumpCC* jcc = treeNode->AsJumpCC(); + + assert(compiler->compCurBB->bbJumpKind == BBJ_COND); + + CompareKind compareKind = ((jcc->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED; + emitJumpKind jumpKind = genJumpKindForOper(jcc->gtCondition, compareKind); + + inst_JMP(jumpKind, compiler->compCurBB->bbJumpDest); } break; @@ -2572,12 +1969,13 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) break; case GT_LIST: + case GT_FIELD_LIST: case GT_ARGPLACE: // Nothing to do break; case GT_PUTARG_STK: - genPutArgStk(treeNode); + genPutArgStk(treeNode->AsPutArgStk()); break; case GT_PUTARG_REG: @@ -2608,7 +2006,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) case GT_LOCKADD: case GT_XCHG: case GT_XADD: - genLockedInstructions(treeNode); + genLockedInstructions(treeNode->AsOp()); break; case GT_MEMORYBARRIER: @@ -2795,7 +2193,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) { #ifdef DEBUG char message[256]; - sprintf(message, "Unimplemented node type %s\n", GenTree::NodeName(treeNode->OperGet())); + _snprintf_s(message, _countof(message), _TRUNCATE, "Unimplemented node type %s\n", + GenTree::NodeName(treeNode->OperGet())); #endif assert(!"Unknown node in codegen"); } @@ -3330,8 +2729,10 @@ ALLOC_DONE: BAILOUT: // Write the lvaLocAllocSPvar stack frame slot - noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM); - getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0); + if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM) + { + getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0); + } #if STACK_PROBES if (compiler->opts.compNeedStackProbes) @@ -3356,10 +2757,15 @@ BAILOUT: void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode) { +#ifdef JIT32_GCENCODER + assert(!storeBlkNode->gtBlkOpGcUnsafe); +#else if (storeBlkNode->gtBlkOpGcUnsafe) { getEmitter()->emitDisableGC(); } +#endif // JIT32_GCENCODER + bool isCopyBlk = storeBlkNode->OperIsCopyBlkOp(); switch (storeBlkNode->gtBlkOpKind) @@ -3399,23 +2805,40 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode) default: unreached(); } + +#ifndef JIT32_GCENCODER if (storeBlkNode->gtBlkOpGcUnsafe) { getEmitter()->emitEnableGC(); } +#endif // !defined(JIT32_GCENCODER) } -// Generate code for InitBlk using rep stos. +// +//------------------------------------------------------------------------ +// genCodeForInitBlkRepStos: Generate code for InitBlk using rep stos. +// +// Arguments: +// initBlkNode - The Block store for which we are generating code. +// // Preconditions: -// The size of the buffers must be a constant and also less than INITBLK_STOS_LIMIT bytes. -// Any value larger than that, we'll use the helper even if both the -// fill byte and the size are integer constants. +// On x64: +// The size of the buffers must be a constant and also less than INITBLK_STOS_LIMIT bytes. +// Any value larger than that, we'll use the helper even if both the fill byte and the +// size are integer constants. +// On x86: +// The size must either be a non-constant or less than INITBLK_STOS_LIMIT bytes. +// void CodeGen::genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode) { - // Make sure we got the arguments of the initblk/initobj operation in the right registers + // Make sure we got the arguments of the initblk/initobj operation in the right registers. unsigned size = initBlkNode->Size(); GenTreePtr dstAddr = initBlkNode->Addr(); GenTreePtr initVal = initBlkNode->Data(); + if (initVal->OperIsInitVal()) + { + initVal = initVal->gtGetOp1(); + } #ifdef DEBUG assert(!dstAddr->isContained()); @@ -3428,7 +2851,8 @@ void CodeGen::genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode) #ifdef _TARGET_AMD64_ assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT); #else - assert(size > CPBLK_UNROLL_LIMIT); + // Note that a size of zero means a non-constant size. + assert((size == 0) || (size > CPBLK_UNROLL_LIMIT)); #endif } @@ -3449,9 +2873,13 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode) unsigned size = initBlkNode->Size(); GenTreePtr dstAddr = initBlkNode->Addr(); GenTreePtr initVal = initBlkNode->Data(); + if (initVal->OperIsInitVal()) + { + initVal = initVal->gtGetOp1(); + } assert(!dstAddr->isContained()); - assert(!initVal->isContained()); + assert(!initVal->isContained() || (initVal->IsIntegralConst(0) && ((size & 0xf) == 0))); assert(size != 0); assert(size <= INITBLK_UNROLL_LIMIT); assert(initVal->gtSkipReloadOrCopy()->IsCnsIntOrI()); @@ -3512,9 +2940,11 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode) offset += 4; emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset); offset += 4; -#else // !_TARGET_X86_ +#else // !_TARGET_X86_ + emit->emitIns_AR_R(INS_mov, EA_8BYTE, valReg, dstAddr->gtRegNum, offset); offset += 8; + #endif // !_TARGET_X86_ } if ((size & 4) != 0) @@ -3544,6 +2974,10 @@ void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode) unsigned blockSize = initBlkNode->Size(); GenTreePtr dstAddr = initBlkNode->Addr(); GenTreePtr initVal = initBlkNode->Data(); + if (initVal->OperIsInitVal()) + { + initVal = initVal->gtGetOp1(); + } assert(!dstAddr->isContained()); assert(!initVal->isContained()); @@ -3760,21 +3194,145 @@ void CodeGen::genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode) instGen(INS_r_movsb); } -#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +#ifdef FEATURE_PUT_STRUCT_ARG_STK +//------------------------------------------------------------------------ +// CodeGen::genMove8IfNeeded: Conditionally move 8 bytes of a struct to the argument area +// +// Arguments: +// size - The size of bytes remaining to be moved +// longTmpReg - The tmp register to be used for the long value +// srcAddr - The address of the source struct +// offset - The current offset being copied +// +// Return Value: +// Returns the number of bytes moved (8 or 0). +// +// Notes: +// This is used in the PutArgStkKindUnroll case, to move any bytes that are +// not an even multiple of 16. +// On x86, longTmpReg must be an xmm reg; on x64 it must be an integer register. +// This is checked by genStoreRegToStackArg. +// +int CodeGen::genMove8IfNeeded(unsigned size, regNumber longTmpReg, GenTree* srcAddr, unsigned offset) +{ +#ifdef _TARGET_X86_ + instruction longMovIns = INS_movq; +#else // !_TARGET_X86_ + instruction longMovIns = INS_mov; +#endif // !_TARGET_X86_ + if ((size & 8) != 0) + { + genCodeForLoadOffset(longMovIns, EA_8BYTE, longTmpReg, srcAddr, offset); + genStoreRegToStackArg(TYP_LONG, longTmpReg, offset); + return 8; + } + return 0; +} + +//------------------------------------------------------------------------ +// CodeGen::genMove4IfNeeded: Conditionally move 4 bytes of a struct to the argument area +// +// Arguments: +// size - The size of bytes remaining to be moved +// intTmpReg - The tmp register to be used for the long value +// srcAddr - The address of the source struct +// offset - The current offset being copied +// +// Return Value: +// Returns the number of bytes moved (4 or 0). +// +// Notes: +// This is used in the PutArgStkKindUnroll case, to move any bytes that are +// not an even multiple of 16. +// intTmpReg must be an integer register. +// This is checked by genStoreRegToStackArg. +// +int CodeGen::genMove4IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset) +{ + if ((size & 4) != 0) + { + genCodeForLoadOffset(INS_mov, EA_4BYTE, intTmpReg, srcAddr, offset); + genStoreRegToStackArg(TYP_INT, intTmpReg, offset); + return 4; + } + return 0; +} + +//------------------------------------------------------------------------ +// CodeGen::genMove2IfNeeded: Conditionally move 2 bytes of a struct to the argument area +// +// Arguments: +// size - The size of bytes remaining to be moved +// intTmpReg - The tmp register to be used for the long value +// srcAddr - The address of the source struct +// offset - The current offset being copied +// +// Return Value: +// Returns the number of bytes moved (2 or 0). +// +// Notes: +// This is used in the PutArgStkKindUnroll case, to move any bytes that are +// not an even multiple of 16. +// intTmpReg must be an integer register. +// This is checked by genStoreRegToStackArg. +// +int CodeGen::genMove2IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset) +{ + if ((size & 2) != 0) + { + genCodeForLoadOffset(INS_mov, EA_2BYTE, intTmpReg, srcAddr, offset); + genStoreRegToStackArg(TYP_SHORT, intTmpReg, offset); + return 2; + } + return 0; +} + +//------------------------------------------------------------------------ +// CodeGen::genMove1IfNeeded: Conditionally move 1 byte of a struct to the argument area +// +// Arguments: +// size - The size of bytes remaining to be moved +// intTmpReg - The tmp register to be used for the long value +// srcAddr - The address of the source struct +// offset - The current offset being copied +// +// Return Value: +// Returns the number of bytes moved (1 or 0). +// +// Notes: +// This is used in the PutArgStkKindUnroll case, to move any bytes that are +// not an even multiple of 16. +// intTmpReg must be an integer register. +// This is checked by genStoreRegToStackArg. +// +int CodeGen::genMove1IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset) +{ + + if ((size & 1) != 0) + { + genCodeForLoadOffset(INS_mov, EA_1BYTE, intTmpReg, srcAddr, offset); + genStoreRegToStackArg(TYP_BYTE, intTmpReg, offset); + return 1; + } + return 0; +} //---------------------------------------------------------------------------------------------------------------// // genStructPutArgUnroll: Generates code for passing a struct arg on stack by value using loop unrolling. // // Arguments: // putArgNode - the PutArgStk tree. -// baseVarNum - the base var number, relative to which the by-val struct will be copied on the stack. +// +// Notes: +// m_stkArgVarNum must be set to the base var number, relative to which the by-val struct will be copied to the +// stack. // // TODO-Amd64-Unix: Try to share code with copyblk. // Need refactoring of copyblk before it could be used for putarg_stk. // The difference for now is that a putarg_stk contains its children, while cpyblk does not. // This creates differences in code. After some significant refactoring it could be reused. // -void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseVarNum) +void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode) { // We will never call this method for SIMD types, which are stored directly // in genPutStructArgStk(). @@ -3801,14 +3359,43 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseV unsigned offset = 0; + regNumber xmmTmpReg = REG_NA; + regNumber intTmpReg = REG_NA; + regNumber longTmpReg = REG_NA; +#ifdef _TARGET_X86_ + // On x86 we use an XMM register for both 16 and 8-byte chunks, but if it's + // less than 16 bytes, we will just be using pushes + if (size >= 8) + { + xmmTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT); + longTmpReg = xmmTmpReg; + } + if ((size & 0x7) != 0) + { + intTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT); + } +#else // !_TARGET_X86_ + // On x64 we use an XMM register only for 16-byte chunks. + if (size >= XMM_REGSIZE_BYTES) + { + xmmTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT); + } + if ((size & 0xf) != 0) + { + intTmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT); + longTmpReg = intTmpReg; + } +#endif // !_TARGET_X86_ + // If the size of this struct is larger than 16 bytes // let's use SSE2 to be able to do 16 byte at a time // loads and stores. if (size >= XMM_REGSIZE_BYTES) { +#ifdef _TARGET_X86_ + assert(!m_pushStkArg); +#endif // _TARGET_X86_ assert(putArgNode->gtRsvdRegs != RBM_NONE); - regNumber xmmReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT); - assert(genIsValidFloatReg(xmmReg)); size_t slots = size / XMM_REGSIZE_BYTES; assert(putArgNode->gtGetOp1()->isContained()); @@ -3820,11 +3407,10 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseV while (slots-- > 0) { // Load - genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, src->gtGetOp1(), - offset); // Load the address of the child of the Obj node. + genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmTmpReg, src->gtGetOp1(), offset); // Store - emit->emitIns_S_R(INS_movdqu, EA_8BYTE, xmmReg, baseVarNum, putArgOffset + offset); + genStoreRegToStackArg(TYP_STRUCT, xmmTmpReg, offset); offset += XMM_REGSIZE_BYTES; } @@ -3833,41 +3419,29 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseV // Fill the remainder (15 bytes or less) if there's one. if ((size & 0xf) != 0) { - // Grab the integer temp register to emit the remaining loads and stores. - regNumber tmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT); - assert(genIsValidIntReg(tmpReg)); - - if ((size & 8) != 0) - { - genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, src->gtOp.gtOp1, offset); - - emit->emitIns_S_R(INS_mov, EA_8BYTE, tmpReg, baseVarNum, putArgOffset + offset); - - offset += 8; - } - - if ((size & 4) != 0) - { - genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, src->gtOp.gtOp1, offset); - - emit->emitIns_S_R(INS_mov, EA_4BYTE, tmpReg, baseVarNum, putArgOffset + offset); - - offset += 4; - } - - if ((size & 2) != 0) - { - genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, src->gtOp.gtOp1, offset); - - emit->emitIns_S_R(INS_mov, EA_2BYTE, tmpReg, baseVarNum, putArgOffset + offset); - - offset += 2; +#ifdef _TARGET_X86_ + if (m_pushStkArg) + { + // This case is currently supported only for the case where the total size is + // less than XMM_REGSIZE_BYTES. We need to push the remaining chunks in reverse + // order. However, morph has ensured that we have a struct that is an even + // multiple of TARGET_POINTER_SIZE, so we don't need to worry about alignment. + assert(((size & 0xc) == size) && (offset == 0)); + // If we have a 4 byte chunk, load it from either offset 0 or 8, depending on + // whether we've got an 8 byte chunk, and then push it on the stack. + unsigned pushedBytes = genMove4IfNeeded(size, intTmpReg, src->gtOp.gtOp1, size & 0x8); + // Now if we have an 8 byte chunk, load it from offset 0 (it's the first chunk) + // and push it on the stack. + pushedBytes += genMove8IfNeeded(size, longTmpReg, src->gtOp.gtOp1, 0); } - - if ((size & 1) != 0) + else +#endif // _TARGET_X86_ { - genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, src->gtOp.gtOp1, offset); - emit->emitIns_S_R(INS_mov, EA_1BYTE, tmpReg, baseVarNum, putArgOffset + offset); + offset += genMove8IfNeeded(size, longTmpReg, src->gtOp.gtOp1, offset); + offset += genMove4IfNeeded(size, intTmpReg, src->gtOp.gtOp1, offset); + offset += genMove2IfNeeded(size, intTmpReg, src->gtOp.gtOp1, offset); + offset += genMove1IfNeeded(size, intTmpReg, src->gtOp.gtOp1, offset); + assert(offset == size); } } } @@ -3877,17 +3451,16 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseV // // Arguments: // putArgNode - the PutArgStk tree. -// baseVarNum - the base var number, relative to which the by-val struct bits will go. // // Preconditions: // The size argument of the PutArgStk (for structs) is a constant and is between // CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes. +// m_stkArgVarNum must be set to the base var number, relative to which the by-val struct bits will go. // -void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode, unsigned baseVarNum) +void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode) { assert(putArgNode->TypeGet() == TYP_STRUCT); assert(putArgNode->getArgSize() > CPBLK_UNROLL_LIMIT); - assert(baseVarNum != BAD_VAR_NUM); // Make sure we got the arguments of the cpblk operation in the right registers GenTreePtr dstAddr = putArgNode; @@ -3897,7 +3470,7 @@ void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode, unsigned base assert(putArgNode->gtRsvdRegs == (RBM_RDI | RBM_RCX | RBM_RSI)); assert(srcAddr->isContained()); - genConsumePutStructArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX, baseVarNum); + genConsumePutStructArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX); instGen(INS_r_movsb); } @@ -3906,12 +3479,14 @@ void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode, unsigned base // must be cleared to zeroes. The native compiler doesn't clear the upper bits // and there is no way to know if the caller is native or not. So, the upper // 32 bits of Vector argument on stack are always cleared to zero. -#ifdef FEATURE_SIMD +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD) void CodeGen::genClearStackVec3ArgUpperBits() { #ifdef DEBUG if (verbose) + { printf("*************** In genClearStackVec3ArgUpperBits()\n"); + } #endif assert(compiler->compGeneratingProlog); @@ -3948,12 +3523,13 @@ void CodeGen::genClearStackVec3ArgUpperBits() } } } -#endif // FEATURE_SIMD -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING +#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD) +#endif // FEATURE_PUT_STRUCT_ARG_STK // Generate code for CpObj nodes wich copy structs that have interleaved // GC pointers. -// This will generate a sequence of movsq instructions for the cases of non-gc members +// This will generate a sequence of movsp instructions for the cases of non-gc members. +// Note that movsp is an alias for movsd on x86 and movsq on x64. // and calls to the BY_REF_ASSIGN helper otherwise. void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) { @@ -3961,6 +3537,7 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) GenTreePtr dstAddr = cpObjNode->Addr(); GenTreePtr source = cpObjNode->Data(); GenTreePtr srcAddr = nullptr; + var_types srcAddrType = TYP_BYREF; bool sourceIsLocal = false; assert(source->isContained()); @@ -3973,24 +3550,12 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) { noway_assert(source->IsLocal()); sourceIsLocal = true; - // TODO: Consider making the addrForm() method in Rationalize public, e.g. in GenTree. - // OR: transform source to GT_IND(GT_LCL_VAR_ADDR) - if (source->OperGet() == GT_LCL_VAR) - { - source->SetOper(GT_LCL_VAR_ADDR); - } - else - { - assert(source->OperGet() == GT_LCL_FLD); - source->SetOper(GT_LCL_FLD_ADDR); - } - srcAddr = source; } bool dstOnStack = dstAddr->OperIsLocalAddr(); #ifdef DEBUG - bool isRepMovsqUsed = false; + bool isRepMovspUsed = false; assert(!dstAddr->isContained()); @@ -3998,44 +3563,40 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) // with CpObj, so this requires special logic. assert(cpObjNode->gtGcPtrCount > 0); - // MovSq instruction is used for copying non-gcref fields and it needs - // src = RSI and dst = RDI. + // MovSp (alias for movsq on x64 and movsd on x86) instruction is used for copying non-gcref fields + // and it needs src = RSI and dst = RDI. // Either these registers must not contain lclVars, or they must be dying or marked for spill. // This is because these registers are incremented as we go through the struct. - GenTree* actualSrcAddr = srcAddr->gtSkipReloadOrCopy(); - GenTree* actualDstAddr = dstAddr->gtSkipReloadOrCopy(); - unsigned srcLclVarNum = BAD_VAR_NUM; - unsigned dstLclVarNum = BAD_VAR_NUM; - bool isSrcAddrLiveOut = false; - bool isDstAddrLiveOut = false; - if (genIsRegCandidateLocal(actualSrcAddr)) - { - srcLclVarNum = actualSrcAddr->AsLclVarCommon()->gtLclNum; - isSrcAddrLiveOut = ((actualSrcAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0); - } - if (genIsRegCandidateLocal(actualDstAddr)) - { - dstLclVarNum = actualDstAddr->AsLclVarCommon()->gtLclNum; - isDstAddrLiveOut = ((actualDstAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0); - } - assert((actualSrcAddr->gtRegNum != REG_RSI) || !isSrcAddrLiveOut || - ((srcLclVarNum == dstLclVarNum) && !isDstAddrLiveOut)); - assert((actualDstAddr->gtRegNum != REG_RDI) || !isDstAddrLiveOut || - ((srcLclVarNum == dstLclVarNum) && !isSrcAddrLiveOut)); + if (!sourceIsLocal) + { + GenTree* actualSrcAddr = srcAddr->gtSkipReloadOrCopy(); + GenTree* actualDstAddr = dstAddr->gtSkipReloadOrCopy(); + unsigned srcLclVarNum = BAD_VAR_NUM; + unsigned dstLclVarNum = BAD_VAR_NUM; + bool isSrcAddrLiveOut = false; + bool isDstAddrLiveOut = false; + if (genIsRegCandidateLocal(actualSrcAddr)) + { + srcLclVarNum = actualSrcAddr->AsLclVarCommon()->gtLclNum; + isSrcAddrLiveOut = ((actualSrcAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0); + } + if (genIsRegCandidateLocal(actualDstAddr)) + { + dstLclVarNum = actualDstAddr->AsLclVarCommon()->gtLclNum; + isDstAddrLiveOut = ((actualDstAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0); + } + assert((actualSrcAddr->gtRegNum != REG_RSI) || !isSrcAddrLiveOut || + ((srcLclVarNum == dstLclVarNum) && !isDstAddrLiveOut)); + assert((actualDstAddr->gtRegNum != REG_RDI) || !isDstAddrLiveOut || + ((srcLclVarNum == dstLclVarNum) && !isSrcAddrLiveOut)); + srcAddrType = srcAddr->TypeGet(); + } #endif // DEBUG - // Consume these registers. + // Consume the operands and get them into the right registers. // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing"). - if (sourceIsLocal) - { - inst_RV_TT(INS_lea, REG_RSI, source, 0, EA_BYREF); - genConsumeBlockOp(cpObjNode, REG_RDI, REG_NA, REG_NA); - } - else - { - genConsumeBlockOp(cpObjNode, REG_RDI, REG_RSI, REG_NA); - } - gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddr->TypeGet()); + genConsumeBlockOp(cpObjNode, REG_RDI, REG_RSI, REG_NA); + gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddrType); gcInfo.gcMarkRegPtrVal(REG_RDI, dstAddr->TypeGet()); unsigned slots = cpObjNode->gtSlots; @@ -4046,23 +3607,23 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) if (slots >= CPOBJ_NONGC_SLOTS_LIMIT) { #ifdef DEBUG - // If the destination of the CpObj is on the stack - // make sure we allocated RCX to emit rep movsq. - regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT); - assert(tmpReg == REG_RCX); - isRepMovsqUsed = true; + // If the destination of the CpObj is on the stack, make sure we allocated + // RCX to emit the movsp (alias for movsd or movsq for 32 and 64 bits respectively). + assert((cpObjNode->gtRsvdRegs & RBM_RCX) != 0); + regNumber tmpReg = REG_RCX; + isRepMovspUsed = true; #endif // DEBUG getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, slots); - instGen(INS_r_movsq); + instGen(INS_r_movsp); } else { - // For small structs, it's better to emit a sequence of movsq than to - // emit a rep movsq instruction. + // For small structs, it's better to emit a sequence of movsp than to + // emit a rep movsp instruction. while (slots > 0) { - instGen(INS_movsq); + instGen(INS_movsp); slots--; } } @@ -4078,7 +3639,7 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) switch (gcPtrs[i]) { case TYPE_GC_NONE: - // Let's see if we can use rep movsq instead of a sequence of movsq instructions + // Let's see if we can use rep movsp instead of a sequence of movsp instructions // to save cycles and code size. { unsigned nonGcSlotCount = 0; @@ -4090,12 +3651,12 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) } while (i < slots && gcPtrs[i] == TYPE_GC_NONE); // If we have a very small contiguous non-gc region, it's better just to - // emit a sequence of movsq instructions + // emit a sequence of movsp instructions if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT) { while (nonGcSlotCount > 0) { - instGen(INS_movsq); + instGen(INS_movsp); nonGcSlotCount--; } } @@ -4103,13 +3664,13 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) { #ifdef DEBUG // Otherwise, we can save code-size and improve CQ by emitting - // rep movsq - regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT); - assert(tmpReg == REG_RCX); - isRepMovsqUsed = true; + // rep movsp (alias for movsd/movsq for x86/x64) + assert((cpObjNode->gtRsvdRegs & RBM_RCX) != 0); + regNumber tmpReg = REG_RCX; + isRepMovspUsed = true; #endif // DEBUG getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount); - instGen(INS_r_movsq); + instGen(INS_r_movsp); } } break; @@ -4235,7 +3796,7 @@ void CodeGen::genJumpTable(GenTree* treeNode) // generate code for the locked operations: // GT_LOCKADD, GT_XCHG, GT_XADD -void CodeGen::genLockedInstructions(GenTree* treeNode) +void CodeGen::genLockedInstructions(GenTreeOp* treeNode) { GenTree* data = treeNode->gtOp.gtOp2; GenTree* addr = treeNode->gtOp.gtOp1; @@ -4244,11 +3805,6 @@ void CodeGen::genLockedInstructions(GenTree* treeNode) regNumber addrReg = addr->gtRegNum; instruction ins; - // all of these nodes implicitly do an indirection on op1 - // so create a temporary node to feed into the pattern matching - GenTreeIndir i = indirForm(data->TypeGet(), addr); - genConsumeReg(addr); - // The register allocator should have extended the lifetime of the address // so that it is not used as the target. noway_assert(addrReg != targetReg); @@ -4258,7 +3814,7 @@ void CodeGen::genLockedInstructions(GenTree* treeNode) assert(targetReg != REG_NA || treeNode->OperGet() == GT_LOCKADD || !genIsRegCandidateLocal(data) || (data->gtFlags & GTF_VAR_DEATH) != 0); - genConsumeIfReg(data); + genConsumeOperands(treeNode); if (targetReg != REG_NA && dataReg != REG_NA && dataReg != targetReg) { inst_RV_RV(ins_Copy(data->TypeGet()), targetReg, dataReg); @@ -4284,6 +3840,10 @@ void CodeGen::genLockedInstructions(GenTree* treeNode) default: unreached(); } + + // all of these nodes implicitly do an indirection on op1 + // so create a temporary node to feed into the pattern matching + GenTreeIndir i = indirForm(data->TypeGet(), addr); getEmitter()->emitInsBinary(ins, emitTypeSize(data), &i, data); if (treeNode->gtRegNum != REG_NA) @@ -4459,22 +4019,22 @@ void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) GenTreePtr arrObj = arrOffset->gtArrObj; regNumber tgtReg = arrOffset->gtRegNum; - - noway_assert(tgtReg != REG_NA); + assert(tgtReg != REG_NA); unsigned dim = arrOffset->gtCurrDim; unsigned rank = arrOffset->gtArrRank; var_types elemType = arrOffset->gtArrElemType; - // We will use a temp register for the offset*scale+effectiveIndex computation. - regMaskTP tmpRegMask = arrOffset->gtRsvdRegs; - regNumber tmpReg = genRegNumFromMask(tmpRegMask); - // First, consume the operands in the correct order. regNumber offsetReg = REG_NA; + regNumber tmpReg = REG_NA; if (!offsetNode->IsIntegralConst(0)) { offsetReg = genConsumeReg(offsetNode); + + // We will use a temp register for the offset*scale+effectiveIndex computation. + regMaskTP tmpRegMask = arrOffset->gtRsvdRegs; + tmpReg = genRegNumFromMask(tmpRegMask); } else { @@ -4495,6 +4055,9 @@ void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) if (!offsetNode->IsIntegralConst(0)) { + assert(tmpReg != REG_NA); + assert(arrReg != REG_NA); + // Evaluate tgtReg = offsetReg*dim_size + indexReg. // tmpReg is used to load dim_size and the result of the multiplication. // Note that dim_size will never be negative. @@ -4617,6 +4180,12 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) case GT_SUB_HI: ins = INS_sbb; break; + case GT_LSH_HI: + ins = INS_shld; + break; + case GT_RSH_LO: + ins = INS_shrd; + break; #endif // !defined(_TARGET_64BIT_) default: unreached(); @@ -4654,6 +4223,7 @@ void CodeGen::genCodeForShift(GenTreePtr tree) regNumber operandReg = operand->gtRegNum; GenTreePtr shiftBy = tree->gtGetOp2(); + if (shiftBy->isContainedIntOrIImmed()) { // First, move the operand to the destination register and @@ -4672,12 +4242,7 @@ void CodeGen::genCodeForShift(GenTreePtr tree) // We must have the number of bits to shift stored in ECX, since we constrained this node to // sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single // register destination requirement. - regNumber shiftReg = shiftBy->gtRegNum; - if (shiftReg != REG_RCX) - { - // Issue the mov to RCX: - inst_RV_RV(INS_mov, REG_RCX, shiftReg, shiftBy->TypeGet()); - } + genCopyRegIfNeeded(shiftBy, REG_RCX); // The operand to be shifted must not be in ECX noway_assert(operandReg != REG_RCX); @@ -4692,6 +4257,67 @@ void CodeGen::genCodeForShift(GenTreePtr tree) genProduceReg(tree); } +#ifdef _TARGET_X86_ +//------------------------------------------------------------------------ +// genCodeForShiftLong: Generates the code sequence for a GenTree node that +// represents a three operand bit shift or rotate operation (<<Hi, >>Lo). +// +// Arguments: +// tree - the bit shift node (that specifies the type of bit shift to perform). +// +// Assumptions: +// a) All GenTrees are register allocated. +// b) The shift-by-amount in tree->gtOp.gtOp2 is a contained constant +// +void CodeGen::genCodeForShiftLong(GenTreePtr tree) +{ + // Only the non-RMW case here. + genTreeOps oper = tree->OperGet(); + assert(oper == GT_LSH_HI || oper == GT_RSH_LO); + + GenTree* operand = tree->gtOp.gtOp1; + assert(operand->OperGet() == GT_LONG); + assert(!operand->gtOp.gtOp1->isContained()); + assert(!operand->gtOp.gtOp2->isContained()); + + GenTree* operandLo = operand->gtGetOp1(); + GenTree* operandHi = operand->gtGetOp2(); + + regNumber regLo = operandLo->gtRegNum; + regNumber regHi = operandHi->gtRegNum; + + genConsumeOperands(tree->AsOp()); + + var_types targetType = tree->TypeGet(); + instruction ins = genGetInsForOper(oper, targetType); + + GenTreePtr shiftBy = tree->gtGetOp2(); + + assert(shiftBy->isContainedIntOrIImmed()); + + unsigned int count = shiftBy->AsIntConCommon()->IconValue(); + + regNumber regResult = (oper == GT_LSH_HI) ? regHi : regLo; + + if (regResult != tree->gtRegNum) + { + inst_RV_RV(INS_mov, tree->gtRegNum, regResult, targetType); + } + + if (oper == GT_LSH_HI) + { + inst_RV_RV_IV(ins, emitTypeSize(targetType), tree->gtRegNum, regLo, count); + } + else + { + assert(oper == GT_RSH_LO); + inst_RV_RV_IV(ins, emitTypeSize(targetType), tree->gtRegNum, regHi, count); + } + + genProduceReg(tree); +} +#endif + //------------------------------------------------------------------------ // genCodeForShiftRMW: Generates the code sequence for a GT_STOREIND GenTree node that // represents a RMW bit shift or rotate operation (<<, >>, >>>, rol, ror), for example: @@ -4739,182 +4365,13 @@ void CodeGen::genCodeForShiftRMW(GenTreeStoreInd* storeInd) // sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single // register destination requirement. regNumber shiftReg = shiftBy->gtRegNum; - if (shiftReg != REG_RCX) - { - // Issue the mov to RCX: - inst_RV_RV(INS_mov, REG_RCX, shiftReg, shiftBy->TypeGet()); - } + genCopyRegIfNeeded(shiftBy, REG_RCX); // The shiftBy operand is implicit, so call the unary version of emitInsRMW. getEmitter()->emitInsRMW(ins, attr, storeInd); } } -void CodeGen::genUnspillRegIfNeeded(GenTree* tree) -{ - regNumber dstReg = tree->gtRegNum; - GenTree* unspillTree = tree; - - if (tree->gtOper == GT_RELOAD) - { - unspillTree = tree->gtOp.gtOp1; - } - - if ((unspillTree->gtFlags & GTF_SPILLED) != 0) - { - if (genIsRegCandidateLocal(unspillTree)) - { - // Reset spilled flag, since we are going to load a local variable from its home location. - unspillTree->gtFlags &= ~GTF_SPILLED; - - GenTreeLclVarCommon* lcl = unspillTree->AsLclVarCommon(); - LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum]; - - // Load local variable from its home location. - // In most cases the tree type will indicate the correct type to use for the load. - // However, if it is NOT a normalizeOnLoad lclVar (i.e. NOT a small int that always gets - // widened when loaded into a register), and its size is not the same as genActualType of - // the type of the lclVar, then we need to change the type of the tree node when loading. - // This situation happens due to "optimizations" that avoid a cast and - // simply retype the node when using long type lclVar as an int. - // While loading the int in that case would work for this use of the lclVar, if it is - // later used as a long, we will have incorrectly truncated the long. - // In the normalizeOnLoad case ins_Load will return an appropriate sign- or zero- - // extending load. - - var_types treeType = unspillTree->TypeGet(); - if (treeType != genActualType(varDsc->lvType) && !varTypeIsGC(treeType) && !varDsc->lvNormalizeOnLoad()) - { - assert(!varTypeIsGC(varDsc)); - var_types spillType = genActualType(varDsc->lvType); - unspillTree->gtType = spillType; - inst_RV_TT(ins_Load(spillType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree); - unspillTree->gtType = treeType; - } - else - { - inst_RV_TT(ins_Load(treeType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree); - } - - unspillTree->SetInReg(); - - // TODO-Review: We would like to call: - // genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(tree)); - // instead of the following code, but this ends up hitting this assert: - // assert((regSet.rsMaskVars & regMask) == 0); - // due to issues with LSRA resolution moves. - // So, just force it for now. This probably indicates a condition that creates a GC hole! - // - // Extra note: I think we really want to call something like gcInfo.gcUpdateForRegVarMove, - // because the variable is not really going live or dead, but that method is somewhat poorly - // factored because it, in turn, updates rsMaskVars which is part of RegSet not GCInfo. - // TODO-Cleanup: This code exists in other CodeGen*.cpp files, and should be moved to CodeGenCommon.cpp. - - // Don't update the variable's location if we are just re-spilling it again. - - if ((unspillTree->gtFlags & GTF_SPILL) == 0) - { - genUpdateVarReg(varDsc, tree); -#ifdef DEBUG - if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) - { - JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->gtLclNum); - } -#endif // DEBUG - VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); - -#ifdef DEBUG - if (compiler->verbose) - { - printf("\t\t\t\t\t\t\tV%02u in reg ", lcl->gtLclNum); - varDsc->PrintVarReg(); - printf(" is becoming live "); - compiler->printTreeID(unspillTree); - printf("\n"); - } -#endif // DEBUG - - regSet.AddMaskVars(genGetRegMask(varDsc)); - } - - gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet()); - } - else if (unspillTree->IsMultiRegCall()) - { - GenTreeCall* call = unspillTree->AsCall(); - ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); - unsigned regCount = retTypeDesc->GetReturnRegCount(); - GenTreeCopyOrReload* reloadTree = nullptr; - if (tree->OperGet() == GT_RELOAD) - { - reloadTree = tree->AsCopyOrReload(); - } - - // In case of multi-reg call node, GTF_SPILLED flag on it indicates that - // one or more of its result regs are spilled. Call node needs to be - // queried to know which specific result regs to be unspilled. - for (unsigned i = 0; i < regCount; ++i) - { - unsigned flags = call->GetRegSpillFlagByIdx(i); - if ((flags & GTF_SPILLED) != 0) - { - var_types dstType = retTypeDesc->GetReturnRegType(i); - regNumber unspillTreeReg = call->GetRegNumByIdx(i); - - if (reloadTree != nullptr) - { - dstReg = reloadTree->GetRegNumByIdx(i); - if (dstReg == REG_NA) - { - dstReg = unspillTreeReg; - } - } - else - { - dstReg = unspillTreeReg; - } - - TempDsc* t = regSet.rsUnspillInPlace(call, unspillTreeReg, i); - getEmitter()->emitIns_R_S(ins_Load(dstType), emitActualTypeSize(dstType), dstReg, t->tdTempNum(), - 0); - compiler->tmpRlsTemp(t); - gcInfo.gcMarkRegPtrVal(dstReg, dstType); - } - } - - unspillTree->gtFlags &= ~GTF_SPILLED; - unspillTree->SetInReg(); - } - else - { - TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum); - getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitActualTypeSize(unspillTree->TypeGet()), dstReg, - t->tdTempNum(), 0); - compiler->tmpRlsTemp(t); - - unspillTree->gtFlags &= ~GTF_SPILLED; - unspillTree->SetInReg(); - gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet()); - } - } -} - -// Do Liveness update for a subnodes that is being consumed by codegen -// including the logic for reload in case is needed and also takes care -// of locating the value on the desired register. -void CodeGen::genConsumeRegAndCopy(GenTree* tree, regNumber needReg) -{ - if (needReg == REG_NA) - { - return; - } - regNumber treeReg = genConsumeReg(tree); - if (treeReg != needReg) - { - inst_RV_RV(INS_mov, needReg, treeReg, tree->TypeGet()); - } -} - void CodeGen::genRegCopy(GenTree* treeNode) { assert(treeNode->OperGet() == GT_COPY); @@ -5022,662 +4479,6 @@ void CodeGen::genRegCopy(GenTree* treeNode) genProduceReg(treeNode); } -// Check that registers are consumed in the right order for the current node being generated. -#ifdef DEBUG -void CodeGen::genCheckConsumeNode(GenTree* treeNode) -{ - // GT_PUTARG_REG is consumed out of order. - if (treeNode->gtSeqNum != 0 && treeNode->OperGet() != GT_PUTARG_REG) - { - if (lastConsumedNode != nullptr) - { - if (treeNode == lastConsumedNode) - { - if (verbose) - { - printf("Node was consumed twice:\n "); - compiler->gtDispTree(treeNode, nullptr, nullptr, true); - } - } - else - { - if (verbose && (lastConsumedNode->gtSeqNum > treeNode->gtSeqNum)) - { - printf("Nodes were consumed out-of-order:\n"); - compiler->gtDispTree(lastConsumedNode, nullptr, nullptr, true); - compiler->gtDispTree(treeNode, nullptr, nullptr, true); - } - // assert(lastConsumedNode->gtSeqNum < treeNode->gtSeqNum); - } - } - lastConsumedNode = treeNode; - } -} -#endif // DEBUG - -//-------------------------------------------------------------------- -// genConsumeReg: Do liveness update for a subnode that is being -// consumed by codegen. -// -// Arguments: -// tree - GenTree node -// -// Return Value: -// Returns the reg number of tree. -// In case of multi-reg call node returns the first reg number -// of the multi-reg return. -regNumber CodeGen::genConsumeReg(GenTree* tree) -{ - if (tree->OperGet() == GT_COPY) - { - genRegCopy(tree); - } - - // Handle the case where we have a lclVar that needs to be copied before use (i.e. because it - // interferes with one of the other sources (or the target, if it's a "delayed use" register)). - // TODO-Cleanup: This is a special copyReg case in LSRA - consider eliminating these and - // always using GT_COPY to make the lclVar location explicit. - // Note that we have to do this before calling genUpdateLife because otherwise if we spill it - // the lvRegNum will be set to REG_STK and we will lose track of what register currently holds - // the lclVar (normally when a lclVar is spilled it is then used from its former register - // location, which matches the gtRegNum on the node). - // (Note that it doesn't matter if we call this before or after genUnspillRegIfNeeded - // because if it's on the stack it will always get reloaded into tree->gtRegNum). - if (genIsRegCandidateLocal(tree)) - { - GenTreeLclVarCommon* lcl = tree->AsLclVarCommon(); - LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()]; - if (varDsc->lvRegNum != REG_STK && varDsc->lvRegNum != tree->gtRegNum) - { - inst_RV_RV(INS_mov, tree->gtRegNum, varDsc->lvRegNum); - } - } - - genUnspillRegIfNeeded(tree); - - // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar - genUpdateLife(tree); - - assert(tree->gtHasReg()); - - // there are three cases where consuming a reg means clearing the bit in the live mask - // 1. it was not produced by a local - // 2. it was produced by a local that is going dead - // 3. it was produced by a local that does not live in that reg (like one allocated on the stack) - - if (genIsRegCandidateLocal(tree)) - { - GenTreeLclVarCommon* lcl = tree->AsLclVarCommon(); - LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()]; - assert(varDsc->lvLRACandidate); - - if ((tree->gtFlags & GTF_VAR_DEATH) != 0) - { - gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->lvRegNum)); - } - else if (varDsc->lvRegNum == REG_STK) - { - // We have loaded this into a register only temporarily - gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum)); - } - } - else - { - gcInfo.gcMarkRegSetNpt(tree->gtGetRegMask()); - } - - genCheckConsumeNode(tree); - return tree->gtRegNum; -} - -// Do liveness update for an address tree: one of GT_LEA, GT_LCL_VAR, or GT_CNS_INT (for call indirect). -void CodeGen::genConsumeAddress(GenTree* addr) -{ - if (!addr->isContained()) - { - genConsumeReg(addr); - } - else if (addr->OperGet() == GT_LEA) - { - genConsumeAddrMode(addr->AsAddrMode()); - } -} - -// do liveness update for a subnode that is being consumed by codegen -void CodeGen::genConsumeAddrMode(GenTreeAddrMode* addr) -{ - genConsumeOperands(addr); -} - -void CodeGen::genConsumeRegs(GenTree* tree) -{ -#if !defined(_TARGET_64BIT_) - if (tree->OperGet() == GT_LONG) - { - genConsumeRegs(tree->gtGetOp1()); - genConsumeRegs(tree->gtGetOp2()); - return; - } -#endif // !defined(_TARGET_64BIT_) - - if (tree->isContained()) - { - if (tree->isContainedSpillTemp()) - { - // spill temps are un-tracked and hence no need to update life - } - else if (tree->isIndir()) - { - genConsumeAddress(tree->AsIndir()->Addr()); - } - else if (tree->OperGet() == GT_AND) - { - // This is the special contained GT_AND that we created in Lowering::LowerCmp() - // Now we need to consume the operands of the GT_AND node. - genConsumeOperands(tree->AsOp()); - } - else if (tree->OperGet() == GT_LCL_VAR) - { - // A contained lcl var must be living on stack and marked as reg optional. - unsigned varNum = tree->AsLclVarCommon()->GetLclNum(); - LclVarDsc* varDsc = compiler->lvaTable + varNum; - - noway_assert(varDsc->lvRegNum == REG_STK); - noway_assert(tree->IsRegOptional()); - - // Update the life of reg optional lcl var. - genUpdateLife(tree); - } - else - { - assert(tree->OperIsLeaf()); - } - } - else - { - genConsumeReg(tree); - } -} - -//------------------------------------------------------------------------ -// genConsumeOperands: Do liveness update for the operands of a unary or binary tree -// -// Arguments: -// tree - the GenTreeOp whose operands will have their liveness updated. -// -// Return Value: -// None. -// -// Notes: -// Note that this logic is localized here because we must do the liveness update in -// the correct execution order. This is important because we may have two operands -// that involve the same lclVar, and if one is marked "lastUse" we must handle it -// after the first. - -void CodeGen::genConsumeOperands(GenTreeOp* tree) -{ - GenTree* firstOp = tree->gtOp1; - GenTree* secondOp = tree->gtOp2; - if ((tree->gtFlags & GTF_REVERSE_OPS) != 0) - { - assert(secondOp != nullptr); - firstOp = secondOp; - secondOp = tree->gtOp1; - } - if (firstOp != nullptr) - { - genConsumeRegs(firstOp); - } - if (secondOp != nullptr) - { - genConsumeRegs(secondOp); - } -} - -#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING -//------------------------------------------------------------------------ -// genConsumePutStructArgStk: Do liveness update for the operands of a PutArgStk node. -// Also loads in the right register the addresses of the -// src/dst for rep mov operation. -// -// Arguments: -// putArgNode - the PUTARG_STK tree. -// dstReg - the dstReg for the rep move operation. -// srcReg - the srcReg for the rep move operation. -// sizeReg - the sizeReg for the rep move operation. -// baseVarNum - the varnum for the local used for placing the "by-value" args on the stack. -// -// Return Value: -// None. -// -// Note: sizeReg can be REG_NA when this function is used to consume the dstReg and srcReg -// for copying on the stack a struct with references. -// The source address/offset is determined from the address on the GT_OBJ node, while -// the destination address is the address contained in 'baseVarNum' plus the offset -// provided in the 'putArgNode'. - -void CodeGen::genConsumePutStructArgStk( - GenTreePutArgStk* putArgNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg, unsigned baseVarNum) -{ - assert(varTypeIsStruct(putArgNode)); - assert(baseVarNum != BAD_VAR_NUM); - - // The putArgNode children are always contained. We should not consume any registers. - assert(putArgNode->gtGetOp1()->isContained()); - - GenTree* dstAddr = putArgNode; - - // Get the source address. - GenTree* src = putArgNode->gtGetOp1(); - assert((src->gtOper == GT_OBJ) || ((src->gtOper == GT_IND && varTypeIsSIMD(src)))); - GenTree* srcAddr = src->gtGetOp1(); - - size_t size = putArgNode->getArgSize(); - - assert(dstReg != REG_NA); - assert(srcReg != REG_NA); - - // Consume the registers only if they are not contained or set to REG_NA. - if (srcAddr->gtRegNum != REG_NA) - { - genConsumeReg(srcAddr); - } - - // If the op1 is already in the dstReg - nothing to do. - // Otherwise load the op1 (GT_ADDR) into the dstReg to copy the struct on the stack by value. - if (dstAddr->gtRegNum != dstReg) - { - // Generate LEA instruction to load the stack of the outgoing var + SlotNum offset (or the incoming arg area - // for tail calls) in RDI. - // Destination is always local (on the stack) - use EA_PTRSIZE. - getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, dstReg, baseVarNum, putArgNode->getArgOffset()); - } - - if (srcAddr->gtRegNum != srcReg) - { - if (srcAddr->OperIsLocalAddr()) - { - // The OperLocalAddr is always contained. - assert(srcAddr->isContained()); - GenTreeLclVarCommon* lclNode = srcAddr->AsLclVarCommon(); - - // Generate LEA instruction to load the LclVar address in RSI. - // Source is known to be on the stack. Use EA_PTRSIZE. - unsigned int offset = 0; - if (srcAddr->OperGet() == GT_LCL_FLD_ADDR) - { - offset = srcAddr->AsLclFld()->gtLclOffs; - } - getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, srcReg, lclNode->gtLclNum, offset); - } - else - { - assert(srcAddr->gtRegNum != REG_NA); - // Source is not known to be on the stack. Use EA_BYREF. - getEmitter()->emitIns_R_R(INS_mov, EA_BYREF, srcReg, srcAddr->gtRegNum); - } - } - - if (sizeReg != REG_NA) - { - inst_RV_IV(INS_mov, sizeReg, size, EA_8BYTE); - } -} -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING - -//------------------------------------------------------------------------ -// genConsumeBlockSize: Ensure that the block size is in the given register -// -// Arguments: -// blkNode - The block node -// sizeReg - The register into which the block's size should go -// - -void CodeGen::genConsumeBlockSize(GenTreeBlk* blkNode, regNumber sizeReg) -{ - if (sizeReg != REG_NA) - { - unsigned blockSize = blkNode->Size(); - if (blockSize != 0) - { - assert(blkNode->gtRsvdRegs == genRegMask(sizeReg)); - genSetRegToIcon(sizeReg, blockSize); - } - else - { - noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK); - genConsumeReg(blkNode->AsDynBlk()->gtDynamicSize); - } - } -} - -//------------------------------------------------------------------------ -// genConsumeBlockDst: Ensure that the block destination address is in its -// allocated register. -// Arguments: -// blkNode - The block node -// - -void CodeGen::genConsumeBlockDst(GenTreeBlk* blkNode) -{ - GenTree* dstAddr = blkNode->Addr(); - genConsumeReg(dstAddr); -} - -//------------------------------------------------------------------------ -// genConsumeBlockSrc: Ensure that the block source address is in its -// allocated register if it is non-local. -// Arguments: -// blkNode - The block node -// -// Return Value: -// Returns the source address node, if it is non-local, -// and nullptr otherwise. - -GenTree* CodeGen::genConsumeBlockSrc(GenTreeBlk* blkNode) -{ - GenTree* src = blkNode->Data(); - if (blkNode->OperIsCopyBlkOp()) - { - // For a CopyBlk we need the address of the source. - if (src->OperGet() == GT_IND) - { - src = src->gtOp.gtOp1; - } - else - { - // This must be a local. - // For this case, there is no source address register, as it is a - // stack-based address. - assert(src->OperIsLocal()); - return nullptr; - } - } - genConsumeReg(src); - return src; -} - -//------------------------------------------------------------------------ -// genConsumeBlockOp: Ensure that the block's operands are enregistered -// as needed. -// Arguments: -// blkNode - The block node -// -// Notes: -// This ensures that the operands are consumed in the proper order to -// obey liveness modeling. - -void CodeGen::genConsumeBlockOp(GenTreeBlk* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg) -{ - // We have to consume the registers, and perform any copies, in the actual execution order. - // The nominal order is: dst, src, size. However this may have been changed - // with reverse flags on the blkNode and the setting of gtEvalSizeFirst in the case of a dynamic - // block size. - // Note that the register allocator ensures that the registers ON THE NODES will not interfere - // with one another if consumed (i.e. reloaded or moved to their ASSIGNED reg) in execution order. - // Further, it ensures that they will not interfere with one another if they are then copied - // to the REQUIRED register (if a fixed register requirement) in execution order. This requires, - // then, that we first consume all the operands, then do any necessary moves. - - GenTree* dstAddr = blkNode->Addr(); - GenTree* src = nullptr; - unsigned blockSize = blkNode->Size(); - GenTree* size = nullptr; - bool evalSizeFirst = true; - - if (blkNode->OperGet() == GT_STORE_DYN_BLK) - { - evalSizeFirst = blkNode->AsDynBlk()->gtEvalSizeFirst; - size = blkNode->AsDynBlk()->gtDynamicSize; - } - - // First, consusme all the sources in order - if (evalSizeFirst) - { - genConsumeBlockSize(blkNode, sizeReg); - } - if (blkNode->IsReverseOp()) - { - src = genConsumeBlockSrc(blkNode); - genConsumeBlockDst(blkNode); - } - else - { - genConsumeBlockDst(blkNode); - src = genConsumeBlockSrc(blkNode); - } - if (!evalSizeFirst) - { - genConsumeBlockSize(blkNode, sizeReg); - } - // Next, perform any necessary moves. - if (evalSizeFirst && (size != nullptr) && (size->gtRegNum != sizeReg)) - { - inst_RV_RV(INS_mov, sizeReg, size->gtRegNum, size->TypeGet()); - } - if (blkNode->IsReverseOp()) - { - if ((src != nullptr) && (src->gtRegNum != srcReg)) - { - inst_RV_RV(INS_mov, srcReg, src->gtRegNum, src->TypeGet()); - } - if (dstAddr->gtRegNum != dstReg) - { - inst_RV_RV(INS_mov, dstReg, dstAddr->gtRegNum, dstAddr->TypeGet()); - } - } - else - { - if (dstAddr->gtRegNum != dstReg) - { - inst_RV_RV(INS_mov, dstReg, dstAddr->gtRegNum, dstAddr->TypeGet()); - } - if ((src != nullptr) && (src->gtRegNum != srcReg)) - { - inst_RV_RV(INS_mov, srcReg, src->gtRegNum, src->TypeGet()); - } - } - if (!evalSizeFirst && size != nullptr && (size->gtRegNum != sizeReg)) - { - inst_RV_RV(INS_mov, sizeReg, size->gtRegNum, size->TypeGet()); - } -} - -//------------------------------------------------------------------------- -// genProduceReg: do liveness update for register produced by the current -// node in codegen. -// -// Arguments: -// tree - Gentree node -// -// Return Value: -// None. -void CodeGen::genProduceReg(GenTree* tree) -{ - if (tree->gtFlags & GTF_SPILL) - { - // Code for GT_COPY node gets generated as part of consuming regs by its parent. - // A GT_COPY node in turn produces reg result and it should never be marked to - // spill. - // - // Similarly GT_RELOAD node gets generated as part of consuming regs by its - // parent and should never be marked for spilling. - noway_assert(!tree->IsCopyOrReload()); - - if (genIsRegCandidateLocal(tree)) - { - // Store local variable to its home location. - tree->gtFlags &= ~GTF_REG_VAL; - // Ensure that lclVar stores are typed correctly. - unsigned varNum = tree->gtLclVarCommon.gtLclNum; - assert(!compiler->lvaTable[varNum].lvNormalizeOnStore() || - (tree->TypeGet() == genActualType(compiler->lvaTable[varNum].TypeGet()))); - inst_TT_RV(ins_Store(tree->gtType, compiler->isSIMDTypeLocalAligned(varNum)), tree, tree->gtRegNum); - } - else - { - // In case of multi-reg call node, spill flag on call node - // indicates that one or more of its allocated regs need to - // be spilled. Call node needs to be further queried to - // know which of its result regs needs to be spilled. - if (tree->IsMultiRegCall()) - { - GenTreeCall* call = tree->AsCall(); - ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); - unsigned regCount = retTypeDesc->GetReturnRegCount(); - - for (unsigned i = 0; i < regCount; ++i) - { - unsigned flags = call->GetRegSpillFlagByIdx(i); - if ((flags & GTF_SPILL) != 0) - { - regNumber reg = call->GetRegNumByIdx(i); - call->SetInReg(); - regSet.rsSpillTree(reg, call, i); - gcInfo.gcMarkRegSetNpt(genRegMask(reg)); - } - } - } - else - { - tree->SetInReg(); - regSet.rsSpillTree(tree->gtRegNum, tree); - gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum)); - } - - tree->gtFlags |= GTF_SPILLED; - tree->gtFlags &= ~GTF_SPILL; - - return; - } - } - - genUpdateLife(tree); - - // If we've produced a register, mark it as a pointer, as needed. - if (tree->gtHasReg()) - { - // We only mark the register in the following cases: - // 1. It is not a register candidate local. In this case, we're producing a - // register from a local, but the local is not a register candidate. Thus, - // we must be loading it as a temp register, and any "last use" flag on - // the register wouldn't be relevant. - // 2. The register candidate local is going dead. There's no point to mark - // the register as live, with a GC pointer, if the variable is dead. - if (!genIsRegCandidateLocal(tree) || ((tree->gtFlags & GTF_VAR_DEATH) == 0)) - { - // Multi-reg call node will produce more than one register result. - // Mark all the regs produced by call node. - if (tree->IsMultiRegCall()) - { - GenTreeCall* call = tree->AsCall(); - ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); - unsigned regCount = retTypeDesc->GetReturnRegCount(); - - for (unsigned i = 0; i < regCount; ++i) - { - regNumber reg = call->GetRegNumByIdx(i); - var_types type = retTypeDesc->GetReturnRegType(i); - gcInfo.gcMarkRegPtrVal(reg, type); - } - } - else if (tree->IsCopyOrReloadOfMultiRegCall()) - { - // we should never see reload of multi-reg call here - // because GT_RELOAD gets generated in reg consuming path. - noway_assert(tree->OperGet() == GT_COPY); - - // A multi-reg GT_COPY node produces those regs to which - // copy has taken place. - GenTreeCopyOrReload* copy = tree->AsCopyOrReload(); - GenTreeCall* call = copy->gtGetOp1()->AsCall(); - ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc(); - unsigned regCount = retTypeDesc->GetReturnRegCount(); - - for (unsigned i = 0; i < regCount; ++i) - { - var_types type = retTypeDesc->GetReturnRegType(i); - regNumber fromReg = call->GetRegNumByIdx(i); - regNumber toReg = copy->GetRegNumByIdx(i); - - if (toReg != REG_NA) - { - gcInfo.gcMarkRegPtrVal(toReg, type); - } - } - } - else - { - gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet()); - } - } - } - tree->SetInReg(); -} - -// transfer gc/byref status of src reg to dst reg -void CodeGen::genTransferRegGCState(regNumber dst, regNumber src) -{ - regMaskTP srcMask = genRegMask(src); - regMaskTP dstMask = genRegMask(dst); - - if (gcInfo.gcRegGCrefSetCur & srcMask) - { - gcInfo.gcMarkRegSetGCref(dstMask); - } - else if (gcInfo.gcRegByrefSetCur & srcMask) - { - gcInfo.gcMarkRegSetByref(dstMask); - } - else - { - gcInfo.gcMarkRegSetNpt(dstMask); - } -} - -// generates an ip-relative call or indirect call via reg ('call reg') -// pass in 'addr' for a relative call or 'base' for a indirect register call -// methHnd - optional, only used for pretty printing -// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC) -void CodeGen::genEmitCall(int callType, - CORINFO_METHOD_HANDLE methHnd, - INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) void* addr X86_ARG(ssize_t argSize), - emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), - IL_OFFSETX ilOffset, - regNumber base, - bool isJump, - bool isNoGC) -{ -#if !defined(_TARGET_X86_) - ssize_t argSize = 0; -#endif // !defined(_TARGET_X86_) - getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, argSize, - retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), gcInfo.gcVarPtrSetCur, - gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset, base, REG_NA, 0, 0, isJump, - emitter::emitNoGChelper(compiler->eeGetHelperNum(methHnd))); -} - -// generates an indirect call via addressing mode (call []) given an indir node -// methHnd - optional, only used for pretty printing -// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC) -void CodeGen::genEmitCall(int callType, - CORINFO_METHOD_HANDLE methHnd, - INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) GenTreeIndir* indir X86_ARG(ssize_t argSize), - emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize), - IL_OFFSETX ilOffset) -{ -#if !defined(_TARGET_X86_) - ssize_t argSize = 0; -#endif // !defined(_TARGET_X86_) - genConsumeAddress(indir->Addr()); - - getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) nullptr, - argSize, retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), - gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset, - indir->Base() ? indir->Base()->gtRegNum : REG_NA, - indir->Index() ? indir->Index()->gtRegNum : REG_NA, indir->Scale(), indir->Offset()); -} - //------------------------------------------------------------------------ // genStoreInd: Generate code for a GT_STOREIND node. // @@ -5724,16 +4525,10 @@ void CodeGen::genStoreInd(GenTreePtr node) noway_assert(data->gtRegNum != REG_ARG_0); // addr goes in REG_ARG_0 - if (addr->gtRegNum != REG_ARG_0) - { - inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet()); - } + genCopyRegIfNeeded(addr, REG_ARG_0); // data goes in REG_ARG_1 - if (data->gtRegNum != REG_ARG_1) - { - inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet()); - } + genCopyRegIfNeeded(data, REG_ARG_1); genGCWriteBarrier(storeInd, writeBarrierForm); } @@ -5821,6 +4616,23 @@ void CodeGen::genStoreInd(GenTreePtr node) assert(rmwSrc == data->gtGetOp2()); genCodeForShiftRMW(storeInd); } + else if (!compiler->opts.compDbgCode && data->OperGet() == GT_ADD && + (rmwSrc->IsIntegralConst(1) || rmwSrc->IsIntegralConst(-1))) + { + // Generate "inc/dec [mem]" instead of "add/sub [mem], 1". + // + // Notes: + // 1) Global morph transforms GT_SUB(x, +/-1) into GT_ADD(x, -/+1). + // 2) TODO-AMD64: Debugger routine NativeWalker::Decode() runs into + // an assert while decoding ModR/M byte of "inc dword ptr [rax]". + // It is not clear whether Decode() can handle all possible + // addr modes with inc/dec. For this reason, inc/dec [mem] + // is not generated while generating debuggable code. Update + // the above if condition once Decode() routine is fixed. + assert(rmwSrc->isContainedIntOrIImmed()); + instruction ins = rmwSrc->IsIntegralConst(1) ? INS_inc : INS_dec; + getEmitter()->emitInsRMW(ins, emitTypeSize(storeInd), storeInd); + } else { // generate code for remaining binary RMW memory ops like add/sub/and/or/xor @@ -5905,10 +4717,7 @@ bool CodeGen::genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarri // call write_barrier_helper_reg // addr goes in REG_ARG_0 - if (addr->gtRegNum != REG_WRITE_BARRIER) // REVIEW: can it ever not already by in this register? - { - inst_RV_RV(INS_mov, REG_WRITE_BARRIER, addr->gtRegNum, addr->TypeGet()); - } + genCopyRegIfNeeded(addr, REG_WRITE_BARRIER); unsigned tgtAnywhere = 0; if (writeBarrierForm != GCInfo::WBF_BarrierUnchecked) @@ -5943,10 +4752,28 @@ void CodeGen::genCallInstruction(GenTreePtr node) // all virtuals should have been expanded into a control expression assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr); + // Insert a GS check if necessary + if (call->IsTailCallViaHelper()) + { + if (compiler->getNeedsGSSecurityCookie()) + { +#if FEATURE_FIXED_OUT_ARGS + // If either of the conditions below is true, we will need a temporary register in order to perform the GS + // cookie check. When FEATURE_FIXED_OUT_ARGS is disabled, we save and restore the temporary register using + // push/pop. When FEATURE_FIXED_OUT_ARGS is enabled, however, we need an alternative solution. For now, + // though, the tail prefix is ignored on all platforms that use fixed out args, so we should never hit this + // case. + assert(compiler->gsGlobalSecurityCookieAddr == nullptr); + assert((int)compiler->gsGlobalSecurityCookieVal == (ssize_t)compiler->gsGlobalSecurityCookieVal); +#endif + genEmitGSCookieCheck(true); + } + } + // Consume all the arg regs for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) { - assert(list->IsList()); + assert(list->OperIsList()); GenTreePtr argNode = list->Current(); @@ -5960,13 +4787,13 @@ void CodeGen::genCallInstruction(GenTreePtr node) #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING // Deal with multi register passed struct args. - if (argNode->OperGet() == GT_LIST) + if (argNode->OperGet() == GT_FIELD_LIST) { - GenTreeArgList* argListPtr = argNode->AsArgList(); - unsigned iterationNum = 0; - for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++) + GenTreeFieldList* fieldListPtr = argNode->AsFieldList(); + unsigned iterationNum = 0; + for (; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), iterationNum++) { - GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1; + GenTreePtr putArgRegNode = fieldListPtr->gtOp.gtOp1; assert(putArgRegNode->gtOper == GT_PUTARG_REG); regNumber argReg = REG_NA; @@ -6036,20 +4863,34 @@ void CodeGen::genCallInstruction(GenTreePtr node) { assert((arg->gtGetOp1()->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp2()->OperGet() == GT_PUTARG_STK)); } + if ((arg->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp1()->OperGet() == GT_FIELD_LIST)) + { + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg); + assert(curArgTabEntry); + stackArgBytes += curArgTabEntry->numSlots * TARGET_POINTER_SIZE; + } + else #endif // defined(_TARGET_X86_) -#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING - if (genActualType(arg->TypeGet()) == TYP_STRUCT) +#ifdef FEATURE_PUT_STRUCT_ARG_STK + if (genActualType(arg->TypeGet()) == TYP_STRUCT) { assert(arg->OperGet() == GT_PUTARG_STK); - GenTreeObj* obj = arg->gtGetOp1()->AsObj(); - stackArgBytes = compiler->info.compCompHnd->getClassSize(obj->gtClass); + GenTreeObj* obj = arg->gtGetOp1()->AsObj(); + unsigned argBytes = (unsigned)roundUp(obj->gtBlkSize, TARGET_POINTER_SIZE); +#ifdef DEBUG + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg); + assert((curArgTabEntry->numSlots * TARGET_POINTER_SIZE) == argBytes); +#endif // DEBUG + stackArgBytes += argBytes; } else -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + { +#endif // FEATURE_PUT_STRUCT_ARG_STK stackArgBytes += genTypeSize(genActualType(arg->TypeGet())); + } } args = args->gtOp.gtOp2; } @@ -6098,10 +4939,7 @@ void CodeGen::genCallInstruction(GenTreePtr node) assert(target != nullptr); genConsumeReg(target); - if (target->gtRegNum != REG_RAX) - { - inst_RV_RV(INS_mov, REG_RAX, target->gtRegNum); - } + genCopyRegIfNeeded(target, REG_RAX); return; } @@ -6141,7 +4979,6 @@ void CodeGen::genCallInstruction(GenTreePtr node) bool fPossibleSyncHelperCall = false; CorInfoHelpFunc helperNum = CORINFO_HELP_UNDEF; -#ifdef DEBUGGING_SUPPORT // We need to propagate the IL offset information to the call instruction, so we can emit // an IL to native mapping record for the call, to support managed return value debugging. // We don't want tail call helper calls that were converted from normal calls to get a record, @@ -6150,7 +4987,6 @@ void CodeGen::genCallInstruction(GenTreePtr node) { (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset); } -#endif // DEBUGGING_SUPPORT #if defined(_TARGET_X86_) // If the callee pops the arguments, we pass a positive value as the argSize, and the emitter will @@ -6167,7 +5003,38 @@ void CodeGen::genCallInstruction(GenTreePtr node) if (target != nullptr) { - if (target->isContainedIndir()) +#ifdef _TARGET_X86_ + if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT)) + { + // On x86, we need to generate a very specific pattern for indirect VSD calls: + // + // 3-byte nop + // call dword ptr [eax] + // + // Where EAX is also used as an argument to the stub dispatch helper. Make + // sure that the call target address is computed into EAX in this case. + + assert(REG_VIRTUAL_STUB_PARAM == REG_VIRTUAL_STUB_TARGET); + + assert(target->isContainedIndir()); + assert(target->OperGet() == GT_IND); + + GenTree* addr = target->AsIndir()->Addr(); + assert(!addr->isContained()); + + genConsumeReg(addr); + genCopyRegIfNeeded(addr, REG_VIRTUAL_STUB_TARGET); + + getEmitter()->emitIns_Nop(3); + getEmitter()->emitIns_Call(emitter::EmitCallType(emitter::EC_INDIR_ARD), methHnd, + INDEBUG_LDISASM_COMMA(sigInfo) nullptr, argSizeForEmitter, + retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), + gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, + ilOffset, REG_VIRTUAL_STUB_TARGET, REG_NA, 1, 0); + } + else +#endif + if (target->isContainedIndir()) { if (target->AsIndir()->HasBase() && target->AsIndir()->Base()->isContainedIntOrIImmed()) { @@ -6977,8 +5844,6 @@ void CodeGen::genCompareLong(GenTreePtr treeNode) genConsumeOperands(tree); - assert(targetReg != REG_NA); - GenTreePtr loOp1 = op1->gtGetOp1(); GenTreePtr hiOp1 = op1->gtGetOp2(); GenTreePtr loOp2 = op2->gtGetOp1(); @@ -6992,6 +5857,12 @@ void CodeGen::genCompareLong(GenTreePtr treeNode) // Emit the compare instruction getEmitter()->emitInsBinary(ins, cmpAttr, hiOp1, hiOp2); + // If the result is not being materialized in a register, we're done. + if (targetReg == REG_NA) + { + return; + } + // Generate the first jump for the high compare CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED; @@ -7015,10 +5886,6 @@ void CodeGen::genCompareLong(GenTreePtr treeNode) emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED); inst_SET(jumpKindLo, targetReg); - // Set the higher bytes to 0 - inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE)); - genProduceReg(tree); - inst_JMP(EJ_jmp, labelFinal); // Define the label for hi jump target here. If we have jumped here, we want to set @@ -7027,11 +5894,10 @@ void CodeGen::genCompareLong(GenTreePtr treeNode) genDefineTempLabel(labelHi); inst_SET(genJumpKindForOper(tree->gtOper, compareKind), targetReg); + genDefineTempLabel(labelFinal); // Set the higher bytes to 0 inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE)); genProduceReg(tree); - - genDefineTempLabel(labelFinal); } else { @@ -7062,152 +5928,6 @@ void CodeGen::genCompareLong(GenTreePtr treeNode) genProduceReg(tree); } } - -//------------------------------------------------------------------------ -// genJTrueLong: Generate code for comparing two longs on x86 for the case where the result -// is not manifested in a register. -// -// Arguments: -// treeNode - the compare tree -// -// Return Value: -// None. -// Comments: -// For long compares, we need to compare the high parts of operands first, then the low parts. -// We only have to do the low compare if the high parts of the operands are equal. -// -// In the case where the result of a rel-op is not realized in a register, we generate: -// -// Opcode x86 equivalent Comment -// ------ -------------- ------- -// -// GT_LT; unsigned cmp hiOp1,hiOp2 -// jb trueLabel -// ja falseLabel -// cmp loOp1,loOp2 -// jb trueLabel -// falseLabel: -// -// GT_LE; unsigned cmp hiOp1,hiOp2 -// jb trueLabel -// ja falseLabel -// cmp loOp1,loOp2 -// jbe trueLabel -// falseLabel: -// -// GT_GT; unsigned cmp hiOp1,hiOp2 -// ja trueLabel -// jb falseLabel -// cmp loOp1,loOp2 -// ja trueLabel -// falseLabel: -// -// GT_GE; unsigned cmp hiOp1,hiOp2 -// ja trueLabel -// jb falseLabel -// cmp loOp1,loOp2 -// jae trueLabel -// falseLabel: -// -// GT_LT; signed cmp hiOp1,hiOp2 -// jl trueLabel -// jg falseLabel -// cmp loOp1,loOp2 -// jb trueLabel -// falseLabel: -// -// GT_LE; signed cmp hiOp1,hiOp2 -// jl trueLabel -// jg falseLabel -// cmp loOp1,loOp2 -// jbe trueLabel -// falseLabel: -// -// GT_GT; signed cmp hiOp1,hiOp2 -// jg trueLabel -// jl falseLabel -// cmp loOp1,loOp2 -// ja trueLabel -// falseLabel: -// -// GT_GE; signed cmp hiOp1,hiOp2 -// jg trueLabel -// jl falseLabel -// cmp loOp1,loOp2 -// jae trueLabel -// falseLabel: -// -// GT_EQ; cmp hiOp1,hiOp2 -// jne falseLabel -// cmp loOp1,loOp2 -// je trueLabel -// falseLabel: -// -// GT_NE; cmp hiOp1,hiOp2 -// jne labelTrue -// cmp loOp1,loOp2 -// jne trueLabel -// falseLabel: -// -// TODO-X86-CQ: Check if hi or lo parts of op2 are 0 and change the compare to a test. -void CodeGen::genJTrueLong(GenTreePtr treeNode) -{ - assert(treeNode->OperIsCompare()); - - GenTreeOp* tree = treeNode->AsOp(); - GenTreePtr op1 = tree->gtOp1; - GenTreePtr op2 = tree->gtOp2; - - assert(varTypeIsLong(op1->TypeGet())); - assert(varTypeIsLong(op2->TypeGet())); - - regNumber targetReg = treeNode->gtRegNum; - - assert(targetReg == REG_NA); - - GenTreePtr loOp1 = op1->gtGetOp1(); - GenTreePtr hiOp1 = op1->gtGetOp2(); - GenTreePtr loOp2 = op2->gtGetOp1(); - GenTreePtr hiOp2 = op2->gtGetOp2(); - - // Emit the compare instruction - getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, hiOp1, hiOp2); - - // Generate the first jump for the high compare - CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED; - - // TODO-X86-CQ: If the next block is a BBJ_ALWAYS, we can set falseLabel = compiler->compCurBB->bbNext->bbJumpDest. - BasicBlock* falseLabel = genCreateTempLabel(); - - emitJumpKind jumpKindHi[2]; - - // Generate the jumps for the high compare - genJumpKindsForTreeLongHi(tree, jumpKindHi); - - BasicBlock* trueLabel = compiler->compCurBB->bbJumpDest; - - if (jumpKindHi[0] != EJ_NONE) - { - inst_JMP(jumpKindHi[0], trueLabel); - } - - if (jumpKindHi[1] != EJ_NONE) - { - inst_JMP(jumpKindHi[1], falseLabel); - } - - // The low jump must be unsigned - emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED); - - // Emit the comparison and the jump to the trueLabel - getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, loOp1, loOp2); - - inst_JMP(jumpKindLo, trueLabel); - - // Generate falseLabel, which is the false path. We will jump here if the high compare is false - // or fall through if the low compare is false. - genDefineTempLabel(falseLabel); -} #endif //! defined(_TARGET_64BIT_) //------------------------------------------------------------------------ @@ -7339,19 +6059,77 @@ void CodeGen::genCompareInt(GenTreePtr treeNode) { assert(treeNode->OperIsCompare()); - GenTreeOp* tree = treeNode->AsOp(); - GenTreePtr op1 = tree->gtOp1; - GenTreePtr op2 = tree->gtOp2; - var_types op1Type = op1->TypeGet(); - var_types op2Type = op2->TypeGet(); + GenTreeOp* tree = treeNode->AsOp(); + GenTreePtr op1 = tree->gtOp1; + GenTreePtr op2 = tree->gtOp2; + var_types op1Type = op1->TypeGet(); + var_types op2Type = op2->TypeGet(); + regNumber targetReg = treeNode->gtRegNum; + + // Case of op1 == 0 or op1 != 0: + // Optimize generation of 'test' instruction if op1 sets flags. + // + // Note that if LSRA has inserted any GT_RELOAD/GT_COPY before + // op1, it will not modify the flags set by codegen of op1. + // Similarly op1 could also be reg-optional at its use and + // it was spilled after producing its result in a register. + // Spill code too will not modify the flags set by op1. + GenTree* realOp1 = op1->gtSkipReloadOrCopy(); + if (realOp1->gtSetFlags()) + { + // op1 must set ZF and SF flags + assert(realOp1->gtSetZSFlags()); + + // Must be (in)equality against zero. + assert(tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE); + assert(op2->IsIntegralConst(0)); + assert(op2->isContained()); + + // Just consume the operands + genConsumeOperands(tree); + + // No need to generate test instruction since + // op1 sets flags + + // Are we evaluating this into a register? + if (targetReg != REG_NA) + { + genSetRegToCond(targetReg, tree); + genProduceReg(tree); + } + + return; + } + +#ifdef FEATURE_SIMD + // If we have GT_JTRUE(GT_EQ/NE(GT_SIMD((in)Equality, v1, v2), true/false)), + // then we don't need to generate code for GT_EQ/GT_NE, since SIMD (in)Equality intrinsic + // would set or clear Zero flag. + if ((targetReg == REG_NA) && (tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE)) + { + // Is it a SIMD (in)Equality that doesn't need to materialize result into a register? + if ((op1->gtRegNum == REG_NA) && op1->IsSIMDEqualityOrInequality()) + { + // Must be comparing against true or false. + assert(op2->IsIntegralConst(0) || op2->IsIntegralConst(1)); + assert(op2->isContainedIntOrIImmed()); + + // In this case SIMD (in)Equality will set or clear + // Zero flag, based on which GT_JTRUE would generate + // the right conditional jump. + return; + } + } +#endif // FEATURE_SIMD genConsumeOperands(tree); instruction ins; emitAttr cmpAttr; - regNumber targetReg = treeNode->gtRegNum; - assert(!op1->isContainedIntOrIImmed()); // We no longer support swapping op1 and op2 to generate cmp reg, imm + // TODO-CQ: We should be able to support swapping op1 and op2 to generate cmp reg, imm. + // https://github.com/dotnet/coreclr/issues/7270 + assert(!op1->isContainedIntOrIImmed()); // We no longer support assert(!varTypeIsFloating(op2Type)); #ifdef _TARGET_X86_ @@ -7387,7 +6165,7 @@ void CodeGen::genCompareInt(GenTreePtr treeNode) { // Do we have a short compare against a constant in op2? // - // We checked for this case in LowerCmp() and if we can perform a small + // We checked for this case in TreeNodeInfoInitCmp() and if we can perform a small // compare immediate we labeled this compare with a GTF_RELOP_SMALL // and for unsigned small non-equality compares the GTF_UNSIGNED flag. // @@ -7442,12 +6220,11 @@ void CodeGen::genCompareInt(GenTreePtr treeNode) if (op1->isContained()) { // op1 can be a contained memory op - // or the special contained GT_AND that we created in Lowering::LowerCmp() + // or the special contained GT_AND that we created in Lowering::TreeNodeInfoInitCmp() // - if ((op1->OperGet() == GT_AND)) + if ((op1->OperGet() == GT_AND) && op1->gtGetOp2()->isContainedIntOrIImmed() && + ((tree->OperGet() == GT_EQ) || (tree->OperGet() == GT_NE))) { - noway_assert(op1->gtOp.gtOp2->isContainedIntOrIImmed()); - ins = INS_test; // we will generate "test andOp1, andOp2CnsVal" op2 = op1->gtOp.gtOp2; // must assign op2 before we overwrite op1 op1 = op1->gtOp.gtOp1; // overwrite op1 @@ -7561,6 +6338,93 @@ void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree) } } +#if !defined(_TARGET_64BIT_) +//------------------------------------------------------------------------ +// genIntToIntCast: Generate code for long to int casts on x86. +// +// Arguments: +// cast - The GT_CAST node +// +// Return Value: +// None. +// +// Assumptions: +// The cast node and its sources (via GT_LONG) must have been assigned registers. +// The destination cannot be a floating point type or a small integer type. +// +void CodeGen::genLongToIntCast(GenTree* cast) +{ + assert(cast->OperGet() == GT_CAST); + + GenTree* src = cast->gtGetOp1(); + noway_assert(src->OperGet() == GT_LONG); + + genConsumeRegs(src); + + var_types srcType = ((cast->gtFlags & GTF_UNSIGNED) != 0) ? TYP_ULONG : TYP_LONG; + var_types dstType = cast->CastToType(); + regNumber loSrcReg = src->gtGetOp1()->gtRegNum; + regNumber hiSrcReg = src->gtGetOp2()->gtRegNum; + regNumber dstReg = cast->gtRegNum; + + assert((dstType == TYP_INT) || (dstType == TYP_UINT)); + assert(genIsValidIntReg(loSrcReg)); + assert(genIsValidIntReg(hiSrcReg)); + assert(genIsValidIntReg(dstReg)); + + if (cast->gtOverflow()) + { + // + // Generate an overflow check for [u]long to [u]int casts: + // + // long -> int - check if the upper 33 bits are all 0 or all 1 + // + // ulong -> int - check if the upper 33 bits are all 0 + // + // long -> uint - check if the upper 32 bits are all 0 + // ulong -> uint - check if the upper 32 bits are all 0 + // + + if ((srcType == TYP_LONG) && (dstType == TYP_INT)) + { + BasicBlock* allOne = genCreateTempLabel(); + BasicBlock* success = genCreateTempLabel(); + + inst_RV_RV(INS_test, loSrcReg, loSrcReg, TYP_INT, EA_4BYTE); + inst_JMP(EJ_js, allOne); + + inst_RV_RV(INS_test, hiSrcReg, hiSrcReg, TYP_INT, EA_4BYTE); + genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); + inst_JMP(EJ_jmp, success); + + genDefineTempLabel(allOne); + inst_RV_IV(INS_cmp, hiSrcReg, -1, EA_4BYTE); + genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); + + genDefineTempLabel(success); + } + else + { + if ((srcType == TYP_ULONG) && (dstType == TYP_INT)) + { + inst_RV_RV(INS_test, loSrcReg, loSrcReg, TYP_INT, EA_4BYTE); + genJumpToThrowHlpBlk(EJ_js, SCK_OVERFLOW); + } + + inst_RV_RV(INS_test, hiSrcReg, hiSrcReg, TYP_INT, EA_4BYTE); + genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); + } + } + + if (dstReg != loSrcReg) + { + inst_RV_RV(INS_mov, dstReg, loSrcReg, TYP_INT, EA_4BYTE); + } + + genProduceReg(cast); +} +#endif + //------------------------------------------------------------------------ // genIntToIntCast: Generate code for an integer cast // This method handles integer overflow checking casts @@ -7584,13 +6448,22 @@ void CodeGen::genIntToIntCast(GenTreePtr treeNode) { assert(treeNode->OperGet() == GT_CAST); - GenTreePtr castOp = treeNode->gtCast.CastOp(); - regNumber targetReg = treeNode->gtRegNum; - regNumber sourceReg = castOp->gtRegNum; - var_types dstType = treeNode->CastToType(); - bool isUnsignedDst = varTypeIsUnsigned(dstType); - var_types srcType = genActualType(castOp->TypeGet()); - bool isUnsignedSrc = varTypeIsUnsigned(srcType); + GenTreePtr castOp = treeNode->gtCast.CastOp(); + var_types srcType = genActualType(castOp->TypeGet()); + +#if !defined(_TARGET_64BIT_) + if (varTypeIsLong(srcType)) + { + genLongToIntCast(treeNode); + return; + } +#endif // !defined(_TARGET_64BIT_) + + regNumber targetReg = treeNode->gtRegNum; + regNumber sourceReg = castOp->gtRegNum; + var_types dstType = treeNode->CastToType(); + bool isUnsignedDst = varTypeIsUnsigned(dstType); + bool isUnsignedSrc = varTypeIsUnsigned(srcType); // if necessary, force the srcType to unsigned when the GT_UNSIGNED flag is set if (!isUnsignedSrc && (treeNode->gtFlags & GTF_UNSIGNED) != 0) @@ -7948,7 +6821,7 @@ void CodeGen::genFloatToFloatCast(GenTreePtr treeNode) assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); genConsumeOperands(treeNode->AsOp()); - if (srcType == dstType && targetReg == op1->gtRegNum) + if (srcType == dstType && (!op1->isContained() && (targetReg == op1->gtRegNum))) { // source and destinations types are the same and also reside in the same register. // we just need to consume and produce the reg in this case. @@ -7999,7 +6872,8 @@ void CodeGen::genIntToFloatCast(GenTreePtr treeNode) assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); #if !defined(_TARGET_64BIT_) - NYI_IF(varTypeIsLong(srcType), "Conversion from long to float"); + // We expect morph to replace long to float/double casts with helper calls + noway_assert(!varTypeIsLong(srcType)); #endif // !defined(_TARGET_64BIT_) // Since xarch emitter doesn't handle reporting gc-info correctly while casting away gc-ness we @@ -8225,27 +7099,27 @@ void CodeGen::genCkfinite(GenTreePtr treeNode) // // For TYP_DOUBLE, we'll generate (for targetReg != op1->gtRegNum): // movaps targetReg, op1->gtRegNum - // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY - // mov_xmm2i tmpReg, targetReg // tmpReg <= Y + // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY + // mov_xmm2i tmpReg, targetReg // tmpReg <= Y // and tmpReg, <mask> // cmp tmpReg, <mask> // je <throw block> // movaps targetReg, op1->gtRegNum // copy the value again, instead of un-shuffling it // // For TYP_DOUBLE with (targetReg == op1->gtRegNum): - // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY - // mov_xmm2i tmpReg, targetReg // tmpReg <= Y + // shufps targetReg, targetReg, 0xB1 // WZYX => ZWXY + // mov_xmm2i tmpReg, targetReg // tmpReg <= Y // and tmpReg, <mask> // cmp tmpReg, <mask> // je <throw block> - // shufps targetReg, targetReg, 0xB1 // ZWXY => WZYX + // shufps targetReg, targetReg, 0xB1 // ZWXY => WZYX // // For TYP_FLOAT, it's the same as _TARGET_64BIT_: - // mov_xmm2i tmpReg, targetReg // tmpReg <= low 32 bits + // mov_xmm2i tmpReg, targetReg // tmpReg <= low 32 bits // and tmpReg, <mask> // cmp tmpReg, <mask> // je <throw block> - // movaps targetReg, op1->gtRegNum // only if targetReg != op1->gtRegNum + // movaps targetReg, op1->gtRegNum // only if targetReg != op1->gtRegNum regNumber copyToTmpSrcReg; // The register we'll copy to the integer temp. @@ -8613,7 +7487,7 @@ unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode) #if FEATURE_FIXED_OUT_ARGS baseVarNum = compiler->lvaOutgoingArgSpaceVar; #else // !FEATURE_FIXED_OUT_ARGS - NYI_X86("Stack args for x86/RyuJIT"); + assert(!"No BaseVarForPutArgStk on x86"); baseVarNum = BAD_VAR_NUM; #endif // !FEATURE_FIXED_OUT_ARGS } @@ -8621,8 +7495,74 @@ unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode) return baseVarNum; } -//--------------------------------------------------------------------- // -// genPutStructArgStk - generate code for passing an arg on the stack. +#ifdef _TARGET_X86_ +//--------------------------------------------------------------------- +// adjustStackForPutArgStk: +// adjust the stack pointer for a putArgStk node if necessary. +// +// Arguments: +// putArgStk - the putArgStk node. +// +// Returns: true if the stack pointer was adjusted; false otherwise. +// +bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk) +{ +#ifdef FEATURE_SIMD + if (varTypeIsSIMD(putArgStk)) + { + const unsigned argSize = genTypeSize(putArgStk); + inst_RV_IV(INS_sub, REG_SPBASE, argSize, EA_PTRSIZE); + genStackLevel += argSize; + m_pushStkArg = false; + return true; + } +#endif // FEATURE_SIMD + + const unsigned argSize = putArgStk->getArgSize(); + + // If the gtPutArgStkKind is one of the push types, we do not pre-adjust the stack. + // This is set in Lowering, and is true if and only if: + // - This argument contains any GC pointers OR + // - It is a GT_FIELD_LIST OR + // - It is less than 16 bytes in size. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + switch (putArgStk->gtPutArgStkKind) + { + case GenTreePutArgStk::Kind::RepInstr: + case GenTreePutArgStk::Kind::Unroll: + assert((putArgStk->gtNumberReferenceSlots == 0) && (putArgStk->gtGetOp1()->OperGet() != GT_FIELD_LIST) && + (argSize >= 16)); + break; + case GenTreePutArgStk::Kind::Push: + case GenTreePutArgStk::Kind::PushAllSlots: + assert((putArgStk->gtNumberReferenceSlots != 0) || (putArgStk->gtGetOp1()->OperGet() == GT_FIELD_LIST) || + (argSize < 16)); + break; + case GenTreePutArgStk::Kind::Invalid: + default: + assert(!"Uninitialized GenTreePutArgStk::Kind"); + break; + } +#endif // DEBUG + + if (putArgStk->isPushKind()) + { + m_pushStkArg = true; + return false; + } + else + { + m_pushStkArg = false; + inst_RV_IV(INS_sub, REG_SPBASE, argSize, EA_PTRSIZE); + genStackLevel += argSize; + return true; + } +} + +//--------------------------------------------------------------------- +// genPutArgStkFieldList - generate code for passing an arg on the stack. // // Arguments // treeNode - the GT_PUTARG_STK node @@ -8631,25 +7571,224 @@ unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode) // Return value: // None // -void CodeGen::genPutArgStk(GenTreePtr treeNode) +void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk) { - var_types targetType = treeNode->TypeGet(); + GenTreeFieldList* const fieldList = putArgStk->gtOp1->AsFieldList(); + assert(fieldList != nullptr); + + // Set m_pushStkArg and pre-adjust the stack if necessary. + const bool preAdjustedStack = genAdjustStackForPutArgStk(putArgStk); + // For now, we only support the "push" case; we will push a full slot for the first field of each slot + // within the struct. + assert((putArgStk->isPushKind()) && !preAdjustedStack && m_pushStkArg); + + // If we have pre-adjusted the stack and are simply storing the fields in order) set the offset to 0. + // (Note that this mode is not currently being used.) + // If we are pushing the arguments (i.e. we have not pre-adjusted the stack), then we are pushing them + // in reverse order, so we start with the current field offset at the size of the struct arg (which must be + // a multiple of the target pointer size). + unsigned currentOffset = (preAdjustedStack) ? 0 : putArgStk->getArgSize(); + unsigned prevFieldOffset = currentOffset; + regNumber tmpReg = REG_NA; + if (putArgStk->gtRsvdRegs != RBM_NONE) + { + assert(genCountBits(putArgStk->gtRsvdRegs) == 1); + tmpReg = genRegNumFromMask(putArgStk->gtRsvdRegs); + assert(genIsValidIntReg(tmpReg)); + } + for (GenTreeFieldList* current = fieldList; current != nullptr; current = current->Rest()) + { + GenTree* const fieldNode = current->Current(); + const unsigned fieldOffset = current->gtFieldOffset; + var_types fieldType = current->gtFieldType; + + // Long-typed nodes should have been handled by the decomposition pass, and lowering should have sorted the + // field list in descending order by offset. + assert(!varTypeIsLong(fieldType)); + assert(fieldOffset <= prevFieldOffset); + + // Consume the register, if any, for this field. Note that genConsumeRegs() will appropriately + // update the liveness info for a lclVar that has been marked RegOptional, which hasn't been + // assigned a register, and which is therefore contained. + // Unlike genConsumeReg(), it handles the case where no registers are being consumed. + genConsumeRegs(fieldNode); + regNumber argReg = fieldNode->isContainedSpillTemp() ? REG_NA : fieldNode->gtRegNum; + + // If the field is slot-like, we can use a push instruction to store the entire register no matter the type. + // + // The GC encoder requires that the stack remain 4-byte aligned at all times. Round the adjustment up + // to the next multiple of 4. If we are going to generate a `push` instruction, the adjustment must + // not require rounding. + // NOTE: if the field is of GC type, we must use a push instruction, since the emitter is not otherwise + // able to detect stores into the outgoing argument area of the stack on x86. + const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevFieldOffset - fieldOffset) >= 4); + int adjustment = roundUp(currentOffset - fieldOffset, 4); + if (fieldIsSlot) + { + fieldType = genActualType(fieldType); + unsigned pushSize = genTypeSize(fieldType); + assert((pushSize % 4) == 0); + adjustment -= pushSize; + while (adjustment != 0) + { + inst_IV(INS_push, 0); + currentOffset -= pushSize; + genStackLevel += pushSize; + adjustment -= pushSize; + } + m_pushStkArg = true; + } + else + { + m_pushStkArg = false; + // We always "push" floating point fields (i.e. they are full slot values that don't + // require special handling). + assert(varTypeIsIntegralOrI(fieldNode)); + // If we can't push this field, it needs to be in a register so that we can store + // it to the stack location. + assert(tmpReg != REG_NA); + if (adjustment != 0) + { + // This moves the stack pointer to fieldOffset. + // For this case, we must adjust the stack and generate stack-relative stores rather than pushes. + // Adjust the stack pointer to the next slot boundary. + inst_RV_IV(INS_sub, REG_SPBASE, adjustment, EA_PTRSIZE); + currentOffset -= adjustment; + genStackLevel += adjustment; + } + + // Does it need to be in a byte register? + // If so, we'll use tmpReg, which must have been allocated as a byte register. + // If it's already in a register, but not a byteable one, then move it. + if (varTypeIsByte(fieldType) && ((argReg == REG_NA) || ((genRegMask(argReg) & RBM_BYTE_REGS) == 0))) + { + noway_assert((genRegMask(tmpReg) & RBM_BYTE_REGS) != 0); + if (argReg != REG_NA) + { + inst_RV_RV(INS_mov, tmpReg, argReg, fieldType); + argReg = tmpReg; + } + } + } + + if (argReg == REG_NA) + { + if (m_pushStkArg) + { + if (fieldNode->isContainedSpillTemp()) + { + assert(fieldNode->IsRegOptional()); + TempDsc* tmp = getSpillTempDsc(fieldNode); + getEmitter()->emitIns_S(INS_push, emitActualTypeSize(fieldNode->TypeGet()), tmp->tdTempNum(), 0); + compiler->tmpRlsTemp(tmp); + } + else + { + assert(varTypeIsIntegralOrI(fieldNode)); + switch (fieldNode->OperGet()) + { + case GT_LCL_VAR: + inst_TT(INS_push, fieldNode, 0, 0, emitActualTypeSize(fieldNode->TypeGet())); + break; + case GT_CNS_INT: + if (fieldNode->IsIconHandle()) + { + inst_IV_handle(INS_push, fieldNode->gtIntCon.gtIconVal); + } + else + { + inst_IV(INS_push, fieldNode->gtIntCon.gtIconVal); + } + break; + default: + unreached(); + } + } + currentOffset -= TARGET_POINTER_SIZE; + genStackLevel += TARGET_POINTER_SIZE; + } + else + { + // The stack has been adjusted and we will load the field to tmpReg and then store it on the stack. + assert(varTypeIsIntegralOrI(fieldNode)); + switch (fieldNode->OperGet()) + { + case GT_LCL_VAR: + inst_RV_TT(INS_mov, tmpReg, fieldNode); + break; + case GT_CNS_INT: + genSetRegToConst(tmpReg, fieldNode->TypeGet(), fieldNode); + break; + default: + unreached(); + } + genStoreRegToStackArg(fieldType, tmpReg, fieldOffset - currentOffset); + } + } + else + { + genStoreRegToStackArg(fieldType, argReg, fieldOffset - currentOffset); + if (m_pushStkArg) + { + // We always push a slot-rounded size + currentOffset -= genTypeSize(fieldType); + } + } + + prevFieldOffset = fieldOffset; + } + if (currentOffset != 0) + { + // We don't expect padding at the beginning of a struct, but it could happen with explicit layout. + inst_RV_IV(INS_sub, REG_SPBASE, currentOffset, EA_PTRSIZE); + genStackLevel += currentOffset; + } +} +#endif // _TARGET_X86_ + +//--------------------------------------------------------------------- +// genPutArgStk - generate code for passing an arg on the stack. +// +// Arguments +// treeNode - the GT_PUTARG_STK node +// targetType - the type of the treeNode +// +// Return value: +// None +// +void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk) +{ + var_types targetType = putArgStk->TypeGet(); + #ifdef _TARGET_X86_ - noway_assert(targetType != TYP_STRUCT); + +#ifdef FEATURE_SIMD + if (targetType == TYP_SIMD12) + { + genPutArgStkSIMD12(putArgStk); + return; + } +#endif // FEATURE_SIMD + + if (varTypeIsStruct(targetType)) + { + (void)genAdjustStackForPutArgStk(putArgStk); + genPutStructArgStk(putArgStk); + return; + } // The following logic is applicable for x86 arch. - assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet())); + assert(!varTypeIsFloating(targetType) || (targetType == putArgStk->gtOp1->TypeGet())); - GenTreePtr data = treeNode->gtOp.gtOp1; + GenTreePtr data = putArgStk->gtOp1; // On a 32-bit target, all of the long arguments have been decomposed into // a separate putarg_stk for each of the upper and lower halves. noway_assert(targetType != TYP_LONG); - int argSize = genTypeSize(genActualType(targetType)); - genStackLevel += argSize; + const unsigned argSize = putArgStk->getArgSize(); + assert((argSize % TARGET_POINTER_SIZE) == 0); - // TODO-Cleanup: Handle this in emitInsMov() in emitXArch.cpp? if (data->isContainedIntOrIImmed()) { if (data->IsIconHandle()) @@ -8660,53 +7799,50 @@ void CodeGen::genPutArgStk(GenTreePtr treeNode) { inst_IV(INS_push, data->gtIntCon.gtIconVal); } + genStackLevel += argSize; } - else if (data->isContained()) + else if (data->OperGet() == GT_FIELD_LIST) { - NYI_X86("Contained putarg_stk of non-constant"); + genPutArgStkFieldList(putArgStk); } else { + // We should not see any contained nodes that are not immediates. + assert(!data->isContained()); genConsumeReg(data); - if (varTypeIsIntegralOrI(targetType)) - { - inst_RV(INS_push, data->gtRegNum, targetType); - } - else - { - // Decrement SP. - inst_RV_IV(INS_sub, REG_SPBASE, argSize, emitActualTypeSize(TYP_I_IMPL)); - getEmitter()->emitIns_AR_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, REG_SPBASE, 0); - } + genPushReg(targetType, data->gtRegNum); } #else // !_TARGET_X86_ { - unsigned baseVarNum = getBaseVarForPutArgStk(treeNode); + unsigned baseVarNum = getBaseVarForPutArgStk(putArgStk); #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING if (varTypeIsStruct(targetType)) { - genPutStructArgStk(treeNode, baseVarNum); + m_stkArgVarNum = baseVarNum; + m_stkArgOffset = putArgStk->getArgOffset(); + genPutStructArgStk(putArgStk); + m_stkArgVarNum = BAD_VAR_NUM; return; } #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING noway_assert(targetType != TYP_STRUCT); - assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet())); + assert(!varTypeIsFloating(targetType) || (targetType == putArgStk->gtOp1->TypeGet())); // Get argument offset on stack. // Here we cross check that argument offset hasn't changed from lowering to codegen since // we are storing arg slot number in GT_PUTARG_STK node in lowering phase. - int argOffset = treeNode->AsPutArgStk()->getArgOffset(); + int argOffset = putArgStk->getArgOffset(); #ifdef DEBUG - fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode); + fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(putArgStk->gtCall, putArgStk); assert(curArgTabEntry); assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE); #endif - GenTreePtr data = treeNode->gtGetOp1(); + GenTreePtr data = putArgStk->gtOp1; if (data->isContained()) { @@ -8723,7 +7859,125 @@ void CodeGen::genPutArgStk(GenTreePtr treeNode) #endif // !_TARGET_X86_ } -#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#ifdef _TARGET_X86_ +// genPushReg: Push a register value onto the stack and adjust the stack level +// +// Arguments: +// type - the type of value to be stored +// reg - the register containing the value +// +// Notes: +// For TYP_LONG, the srcReg must be a floating point register. +// Otherwise, the register type must be consistent with the given type. +// +void CodeGen::genPushReg(var_types type, regNumber srcReg) +{ + unsigned size = genTypeSize(type); + if (varTypeIsIntegralOrI(type) && type != TYP_LONG) + { + assert(genIsValidIntReg(srcReg)); + inst_RV(INS_push, srcReg, type); + } + else + { + instruction ins; + emitAttr attr = emitTypeSize(type); + if (type == TYP_LONG) + { + // On x86, the only way we can push a TYP_LONG from a register is if it is in an xmm reg. + // This is only used when we are pushing a struct from memory to memory, and basically is + // handling an 8-byte "chunk", as opposed to strictly a long type. + ins = INS_movq; + } + else + { + ins = ins_Store(type); + } + assert(genIsValidFloatReg(srcReg)); + inst_RV_IV(INS_sub, REG_SPBASE, size, EA_PTRSIZE); + getEmitter()->emitIns_AR_R(ins, attr, srcReg, REG_SPBASE, 0); + } + genStackLevel += size; +} +#endif // _TARGET_X86_ + +#if defined(FEATURE_PUT_STRUCT_ARG_STK) +// genStoreRegToStackArg: Store a register value into the stack argument area +// +// Arguments: +// type - the type of value to be stored +// reg - the register containing the value +// offset - the offset from the base (see Assumptions below) +// +// Notes: +// A type of TYP_STRUCT instructs this method to store a 16-byte chunk +// at the given offset (i.e. not the full struct). +// +// Assumptions: +// The caller must set the context appropriately before calling this method: +// - On x64, m_stkArgVarNum must be set according to whether this is a regular or tail call. +// - On x86, the caller must set m_pushStkArg if this method should push the argument. +// Otherwise, the argument is stored at the given offset from sp. +// +// TODO: In the below code the load and store instructions are for 16 bytes, but the +// type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but +// this probably needs to be changed. +// +void CodeGen::genStoreRegToStackArg(var_types type, regNumber srcReg, int offset) +{ + assert(srcReg != REG_NA); + instruction ins; + emitAttr attr; + unsigned size; + + if (type == TYP_STRUCT) + { + ins = INS_movdqu; + // This should be changed! + attr = EA_8BYTE; + size = 16; + } + else + { +#ifdef FEATURE_SIMD + if (varTypeIsSIMD(type)) + { + assert(genIsValidFloatReg(srcReg)); + ins = ins_Store(type); // TODO-CQ: pass 'aligned' correctly + } + else +#endif // FEATURE_SIMD +#ifdef _TARGET_X86_ + if (type == TYP_LONG) + { + assert(genIsValidFloatReg(srcReg)); + ins = INS_movq; + } + else +#endif // _TARGET_X86_ + { + assert((varTypeIsFloating(type) && genIsValidFloatReg(srcReg)) || + (varTypeIsIntegralOrI(type) && genIsValidIntReg(srcReg))); + ins = ins_Store(type); + } + attr = emitTypeSize(type); + size = genTypeSize(type); + } + +#ifdef _TARGET_X86_ + if (m_pushStkArg) + { + genPushReg(type, srcReg); + } + else + { + getEmitter()->emitIns_AR_R(ins, attr, srcReg, REG_SPBASE, offset); + } +#else // !_TARGET_X86_ + assert(m_stkArgVarNum != BAD_VAR_NUM); + getEmitter()->emitIns_S_R(ins, attr, srcReg, m_stkArgVarNum, m_stkArgOffset + offset); +#endif // !_TARGET_X86_ +} //--------------------------------------------------------------------- // genPutStructArgStk - generate code for copying a struct arg on the stack by value. @@ -8731,42 +7985,39 @@ void CodeGen::genPutArgStk(GenTreePtr treeNode) // it generates the gcinfo as well. // // Arguments -// treeNode - the GT_PUTARG_STK node -// baseVarNum - the variable number relative to which to put the argument on the stack. -// For tail calls this is the baseVarNum = 0. -// For non tail calls this is the outgoingArgSpace. -// -// Return value: -// None +// putArgStk - the GT_PUTARG_STK node // -void CodeGen::genPutStructArgStk(GenTreePtr treeNode, unsigned baseVarNum) +// Notes: +// In the case of fixed out args, the caller must have set m_stkArgVarNum to the variable number +// corresponding to the argument area (where we will put the argument on the stack). +// For tail calls this is the baseVarNum = 0. +// For non tail calls this is the outgoingArgSpace. +void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk) { - assert(treeNode->OperGet() == GT_PUTARG_STK); - assert(baseVarNum != BAD_VAR_NUM); - - var_types targetType = treeNode->TypeGet(); + var_types targetType = putArgStk->TypeGet(); if (varTypeIsSIMD(targetType)) { - regNumber srcReg = genConsumeReg(treeNode->gtGetOp1()); + regNumber srcReg = genConsumeReg(putArgStk->gtGetOp1()); assert((srcReg != REG_NA) && (genIsValidFloatReg(srcReg))); - getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), srcReg, baseVarNum, - treeNode->AsPutArgStk()->getArgOffset()); + genStoreRegToStackArg(targetType, srcReg, 0); return; } assert(targetType == TYP_STRUCT); - GenTreePutArgStk* putArgStk = treeNode->AsPutArgStk(); if (putArgStk->gtNumberReferenceSlots == 0) { switch (putArgStk->gtPutArgStkKind) { - case GenTreePutArgStk::PutArgStkKindRepInstr: - genStructPutArgRepMovs(putArgStk, baseVarNum); + case GenTreePutArgStk::Kind::RepInstr: + genStructPutArgRepMovs(putArgStk); break; - case GenTreePutArgStk::PutArgStkKindUnroll: - genStructPutArgUnroll(putArgStk, baseVarNum); + case GenTreePutArgStk::Kind::Unroll: + genStructPutArgUnroll(putArgStk); + break; + case GenTreePutArgStk::Kind::Push: + genStructPutArgUnroll(putArgStk); break; default: unreached(); @@ -8775,108 +8026,150 @@ void CodeGen::genPutStructArgStk(GenTreePtr treeNode, unsigned baseVarNum) else { // No need to disable GC the way COPYOBJ does. Here the refs are copied in atomic operations always. + CLANG_FORMAT_COMMENT_ANCHOR; - // Consume these registers. - // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing"). - genConsumePutStructArgStk(putArgStk, REG_RDI, REG_RSI, REG_NA, baseVarNum); - GenTreePtr dstAddr = putArgStk; - GenTreePtr src = putArgStk->gtOp.gtOp1; - assert(src->OperGet() == GT_OBJ); - GenTreePtr srcAddr = src->gtGetOp1(); +#ifdef _TARGET_X86_ + // On x86, any struct that has contains GC references must be stored to the stack using `push` instructions so + // that the emitter properly detects the need to update the method's GC information. + // + // Strictly speaking, it is only necessary to use `push` to store the GC references themselves, so for structs + // with large numbers of consecutive non-GC-ref-typed fields, we may be able to improve the code size in the + // future. + assert(m_pushStkArg); - unsigned slots = putArgStk->gtNumSlots; + GenTree* srcAddr = putArgStk->gtGetOp1()->gtGetOp1(); + BYTE* gcPtrs = putArgStk->gtGcPtrs; + const unsigned numSlots = putArgStk->gtNumSlots; - // We are always on the stack we don't need to use the write barrier. - BYTE* gcPtrs = putArgStk->gtGcPtrs; - unsigned gcPtrCount = putArgStk->gtNumberReferenceSlots; + regNumber srcRegNum = srcAddr->gtRegNum; + const bool srcAddrInReg = srcRegNum != REG_NA; - unsigned i = 0; - unsigned copiedSlots = 0; - while (i < slots) + unsigned srcLclNum = 0; + unsigned srcLclOffset = 0; + if (srcAddrInReg) { - switch (gcPtrs[i]) + genConsumeReg(srcAddr); + } + else + { + assert(srcAddr->OperIsLocalAddr()); + + srcLclNum = srcAddr->AsLclVarCommon()->gtLclNum; + if (srcAddr->OperGet() == GT_LCL_FLD_ADDR) { - case TYPE_GC_NONE: - // Let's see if we can use rep movsq instead of a sequence of movsq instructions - // to save cycles and code size. - { - unsigned nonGcSlotCount = 0; + srcLclOffset = srcAddr->AsLclFld()->gtLclOffs; + } + } - do - { - nonGcSlotCount++; - i++; - } while (i < slots && gcPtrs[i] == TYPE_GC_NONE); + for (int i = numSlots - 1; i >= 0; --i) + { + emitAttr slotAttr; + if (gcPtrs[i] == TYPE_GC_NONE) + { + slotAttr = EA_4BYTE; + } + else if (gcPtrs[i] == TYPE_GC_REF) + { + slotAttr = EA_GCREF; + } + else + { + assert(gcPtrs[i] == TYPE_GC_BYREF); + slotAttr = EA_BYREF; + } - // If we have a very small contiguous non-gc region, it's better just to - // emit a sequence of movsq instructions - if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT) - { - copiedSlots += nonGcSlotCount; - while (nonGcSlotCount > 0) - { - instGen(INS_movsq); - nonGcSlotCount--; - } - } - else - { - getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount); - copiedSlots += nonGcSlotCount; - instGen(INS_r_movsq); - } - } - break; + const unsigned offset = i * 4; + if (srcAddrInReg) + { + getEmitter()->emitIns_AR_R(INS_push, slotAttr, REG_NA, srcRegNum, offset); + } + else + { + getEmitter()->emitIns_S(INS_push, slotAttr, srcLclNum, srcLclOffset + offset); + } + genStackLevel += 4; + } +#else // !defined(_TARGET_X86_) + + // Consume these registers. + // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing"). + genConsumePutStructArgStk(putArgStk, REG_RDI, REG_RSI, REG_NA); + + const bool srcIsLocal = putArgStk->gtOp1->AsObj()->gtOp1->OperIsLocalAddr(); + const emitAttr srcAddrAttr = srcIsLocal ? EA_PTRSIZE : EA_BYREF; + +#if DEBUG + unsigned numGCSlotsCopied = 0; +#endif // DEBUG + + BYTE* gcPtrs = putArgStk->gtGcPtrs; + const unsigned numSlots = putArgStk->gtNumSlots; + for (unsigned i = 0; i < numSlots;) + { + if (gcPtrs[i] == TYPE_GC_NONE) + { + // Let's see if we can use rep movsp (alias for movsd or movsq for 32 and 64 bits respectively) + // instead of a sequence of movsp instructions to save cycles and code size. + unsigned adjacentNonGCSlotCount = 0; + do + { + adjacentNonGCSlotCount++; + i++; + } while ((i < numSlots) && (gcPtrs[i] == TYPE_GC_NONE)); - case TYPE_GC_REF: // Is an object ref - case TYPE_GC_BYREF: // Is an interior pointer - promote it but don't scan it + // If we have a very small contiguous non-ref region, it's better just to + // emit a sequence of movsp instructions + if (adjacentNonGCSlotCount < CPOBJ_NONGC_SLOTS_LIMIT) { - // We have a GC (byref or ref) pointer - // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movsq instruction, - // but the logic for emitting a GC info record is not available (it is internal for the emitter - // only.) See emitGCVarLiveUpd function. If we could call it separately, we could do - // instGen(INS_movsq); and emission of gc info. - - var_types memType; - if (gcPtrs[i] == TYPE_GC_REF) - { - memType = TYP_REF; - } - else + for (; adjacentNonGCSlotCount > 0; adjacentNonGCSlotCount--) { - assert(gcPtrs[i] == TYPE_GC_BYREF); - memType = TYP_BYREF; + instGen(INS_movsp); } + } + else + { + getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, adjacentNonGCSlotCount); + instGen(INS_r_movsp); + } + } + else + { + assert((gcPtrs[i] == TYPE_GC_REF) || (gcPtrs[i] == TYPE_GC_BYREF)); + + // We have a GC (byref or ref) pointer + // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movsp instruction, + // but the logic for emitting a GC info record is not available (it is internal for the emitter + // only.) See emitGCVarLiveUpd function. If we could call it separately, we could do + // instGen(INS_movsp); and emission of gc info. - getEmitter()->emitIns_R_AR(ins_Load(memType), emitTypeSize(memType), REG_RCX, REG_RSI, 0); - getEmitter()->emitIns_S_R(ins_Store(memType), emitTypeSize(memType), REG_RCX, baseVarNum, - ((copiedSlots + putArgStk->gtSlotNum) * TARGET_POINTER_SIZE)); + var_types memType = (gcPtrs[i] == TYPE_GC_REF) ? TYP_REF : TYP_BYREF; + getEmitter()->emitIns_R_AR(ins_Load(memType), emitTypeSize(memType), REG_RCX, REG_RSI, 0); + genStoreRegToStackArg(memType, REG_RCX, i * TARGET_POINTER_SIZE); + +#ifdef DEBUG + numGCSlotsCopied++; +#endif // DEBUG + i++; + if (i < numSlots) + { // Source for the copy operation. // If a LocalAddr, use EA_PTRSIZE - copy from stack. // If not a LocalAddr, use EA_BYREF - the source location is not on the stack. - getEmitter()->emitIns_R_I(INS_add, ((src->OperIsLocalAddr()) ? EA_PTRSIZE : EA_BYREF), REG_RSI, - TARGET_POINTER_SIZE); + getEmitter()->emitIns_R_I(INS_add, srcAddrAttr, REG_RSI, TARGET_POINTER_SIZE); // Always copying to the stack - outgoing arg area // (or the outgoing arg area of the caller for a tail call) - use EA_PTRSIZE. getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_RDI, TARGET_POINTER_SIZE); - copiedSlots++; - gcPtrCount--; - i++; } - break; - - default: - unreached(); - break; } } - assert(gcPtrCount == 0); + assert(numGCSlotsCopied == putArgStk->gtNumberReferenceSlots); +#endif // _TARGET_X86_ } } -#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#endif // defined(FEATURE_PUT_STRUCT_ARG_STK) /***************************************************************************** * @@ -9043,7 +8336,7 @@ void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize, return infoPtr; } -#else // !JIT32_GCENCODER +#else // !JIT32_GCENCODER void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr)) { IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC()); @@ -9061,7 +8354,6 @@ void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize // Now we can actually use those slot ID's to declare live ranges. gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK); -#if defined(DEBUGGING_SUPPORT) if (compiler->opts.compDbgEnC) { // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp) @@ -9088,7 +8380,6 @@ void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize // frame gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize); } -#endif gcInfoEncoder->Build(); @@ -9203,18 +8494,33 @@ void CodeGen::genStoreLongLclVar(GenTree* treeNode) assert(varDsc->TypeGet() == TYP_LONG); assert(!varDsc->lvPromoted); GenTreePtr op1 = treeNode->gtOp.gtOp1; - noway_assert(op1->OperGet() == GT_LONG); + noway_assert(op1->OperGet() == GT_LONG || op1->OperGet() == GT_MUL_LONG); genConsumeRegs(op1); - // Definitions of register candidates will have been lowered to 2 int lclVars. - assert(!treeNode->InReg()); + if (op1->OperGet() == GT_LONG) + { + // Definitions of register candidates will have been lowered to 2 int lclVars. + assert(!treeNode->InReg()); + + GenTreePtr loVal = op1->gtGetOp1(); + GenTreePtr hiVal = op1->gtGetOp2(); + + // NYI: Contained immediates. + NYI_IF((loVal->gtRegNum == REG_NA) || (hiVal->gtRegNum == REG_NA), + "Store of long lclVar with contained immediate"); + + emit->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, loVal->gtRegNum, lclNum, 0); + emit->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, hiVal->gtRegNum, lclNum, genTypeSize(TYP_INT)); + } + else if (op1->OperGet() == GT_MUL_LONG) + { + assert((op1->gtFlags & GTF_MUL_64RSLT) != 0); - GenTreePtr loVal = op1->gtGetOp1(); - GenTreePtr hiVal = op1->gtGetOp2(); - // NYI: Contained immediates. - NYI_IF((loVal->gtRegNum == REG_NA) || (hiVal->gtRegNum == REG_NA), "Store of long lclVar with contained immediate"); - emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, loVal->gtRegNum, lclNum, 0); - emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, hiVal->gtRegNum, lclNum, genTypeSize(TYP_INT)); + // Stack store + getEmitter()->emitIns_S_R(ins_Store(TYP_INT), emitTypeSize(TYP_INT), REG_LNGRET_LO, lclNum, 0); + getEmitter()->emitIns_S_R(ins_Store(TYP_INT), emitTypeSize(TYP_INT), REG_LNGRET_HI, lclNum, + genTypeSize(TYP_INT)); + } } #endif // !defined(_TARGET_64BIT_) @@ -9332,57 +8638,6 @@ void CodeGen::genAmd64EmitterUnitTests() #endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_) -/*****************************************************************************/ -#ifdef DEBUGGING_SUPPORT -/***************************************************************************** - * genSetScopeInfo - * - * Called for every scope info piece to record by the main genSetScopeInfo() - */ - -void CodeGen::genSetScopeInfo(unsigned which, - UNATIVE_OFFSET startOffs, - UNATIVE_OFFSET length, - unsigned varNum, - unsigned LVnum, - bool avail, - Compiler::siVarLoc& varLoc) -{ - /* We need to do some mapping while reporting back these variables */ - - unsigned ilVarNum = compiler->compMap2ILvarNum(varNum); - noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM); - - VarName name = nullptr; - -#ifdef DEBUG - - for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++) - { - if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum) - { - name = compiler->info.compVarScopes[scopeNum].vsdName; - } - } - - // Hang on to this compiler->info. - - TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which]; - - tlvi.tlviVarNum = ilVarNum; - tlvi.tlviLVnum = LVnum; - tlvi.tlviName = name; - tlvi.tlviStartPC = startOffs; - tlvi.tlviLength = length; - tlvi.tlviAvailable = avail; - tlvi.tlviVarLoc = varLoc; - -#endif // DEBUG - - compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc); -} -#endif // DEBUGGING_SUPPORT - #endif // _TARGET_AMD64_ #endif // !LEGACY_BACKEND diff --git a/src/jit/compatjit/.gitmirror b/src/jit/compatjit/.gitmirror new file mode 100644 index 0000000000..f507630f94 --- /dev/null +++ b/src/jit/compatjit/.gitmirror @@ -0,0 +1 @@ +Only contents of this folder, excluding subfolders, will be mirrored by the Git-TFS Mirror.
\ No newline at end of file diff --git a/src/jit/compatjit/CMakeLists.txt b/src/jit/compatjit/CMakeLists.txt new file mode 100644 index 0000000000..1e0615e431 --- /dev/null +++ b/src/jit/compatjit/CMakeLists.txt @@ -0,0 +1,66 @@ +project(compatjit) + +# This compatjit.dll is only built if we are not building JIT32 as compatjit.dll. +# It is the same build as legacyjit.dll, just with a different name, and not +# built as an altjit. + +add_definitions(-DLEGACY_BACKEND) + +add_definitions(-DFEATURE_NO_HOST) +add_definitions(-DSELF_NO_HOST) +add_definitions(-DFEATURE_READYTORUN_COMPILER) +remove_definitions(-DFEATURE_MERGE_JIT_AND_ENGINE) + +# No SIMD in legacy back-end. +remove_definitions(-DFEATURE_SIMD) +remove_definitions(-DFEATURE_AVX_SUPPORT) + +if(WIN32) + add_definitions(-DFX_VER_INTERNALNAME_STR=compatjit.dll) +endif(WIN32) + +add_library_clr(compatjit + SHARED + ${SHARED_LIB_SOURCES} +) + +add_dependencies(compatjit jit_exports) + +set_property(TARGET compatjit APPEND_STRING PROPERTY LINK_FLAGS ${JIT_EXPORTS_LINKER_OPTION}) +set_property(TARGET compatjit APPEND_STRING PROPERTY LINK_DEPENDS ${JIT_EXPORTS_FILE}) + +set(RYUJIT_LINK_LIBRARIES + utilcodestaticnohost + gcinfo +) + +if(CLR_CMAKE_PLATFORM_UNIX) + list(APPEND RYUJIT_LINK_LIBRARIES + mscorrc_debug + coreclrpal + palrt + ) +else() + list(APPEND RYUJIT_LINK_LIBRARIES + ${STATIC_MT_CRT_LIB} + ${STATIC_MT_VCRT_LIB} + kernel32.lib + advapi32.lib + ole32.lib + oleaut32.lib + uuid.lib + user32.lib + version.lib + shlwapi.lib + bcrypt.lib + crypt32.lib + RuntimeObject.lib + ) +endif(CLR_CMAKE_PLATFORM_UNIX) + +target_link_libraries(compatjit + ${RYUJIT_LINK_LIBRARIES} +) + +# add the install targets +install_clr(compatjit) diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp index afbecdfc60..114847c0d0 100644 --- a/src/jit/compiler.cpp +++ b/src/jit/compiler.cpp @@ -48,6 +48,60 @@ bool Compiler::s_pAltJitExcludeAssembliesListInitialized = false; AssemblyNamesList2* Compiler::s_pAltJitExcludeAssembliesList = nullptr; #endif // ALT_JIT +/***************************************************************************** + * + * Little helpers to grab the current cycle counter value; this is done + * differently based on target architecture, host toolchain, etc. The + * main thing is to keep the overhead absolutely minimal; in fact, on + * x86/x64 we use RDTSC even though it's not thread-safe; GetThreadCycles + * (which is monotonous) is just too expensive. + */ +#ifdef FEATURE_JIT_METHOD_PERF + +#if defined(_HOST_X86_) || defined(_HOST_AMD64_) + +#if defined(_MSC_VER) + +#include <intrin.h> +inline bool _our_GetThreadCycles(unsigned __int64* cycleOut) +{ + *cycleOut = __rdtsc(); + return true; +} + +#elif defined(__clang__) + +inline bool _our_GetThreadCycles(unsigned __int64* cycleOut) +{ + uint64_t cycles; + asm volatile("rdtsc" : "=A"(cycles)); + *cycleOut = cycles; + return true; +} + +#else // neither _MSC_VER nor __clang__ + +// The following *might* work - might as well try. +#define _our_GetThreadCycles(cp) GetThreadCycles(cp) + +#endif + +#elif defined(_HOST_ARM_) || defined(_HOST_ARM64_) + +// If this doesn't work please see ../gc/gc.cpp for additional ARM +// info (and possible solutions). +#define _our_GetThreadCycles(cp) GetThreadCycles(cp) + +#else // not x86/x64 and not ARM + +// Don't know what this target is, but let's give it a try; if +// someone really wants to make this work, please add the right +// code here. +#define _our_GetThreadCycles(cp) GetThreadCycles(cp) + +#endif // which host OS + +#endif // FEATURE_JIT_METHOD_PERF /*****************************************************************************/ inline unsigned getCurTime() { @@ -147,8 +201,6 @@ void Compiler::compDspSrcLinesByLineNum(unsigned line, bool seek) void Compiler::compDspSrcLinesByNativeIP(UNATIVE_OFFSET curIP) { -#ifdef DEBUGGING_SUPPORT - static IPmappingDsc* nextMappingDsc; static unsigned lastLine; @@ -203,8 +255,6 @@ void Compiler::compDspSrcLinesByNativeIP(UNATIVE_OFFSET curIP) nextMappingDsc = nextMappingDsc->ipmdNext; } } - -#endif } /*****************************************************************************/ @@ -232,6 +282,15 @@ unsigned genTreeNsizHistBuckets[] = {1000, 5000, 10000, 50000, 100000, 500000, Histogram genTreeNsizHist(HostAllocator::getHostAllocator(), genTreeNsizHistBuckets); #endif // MEASURE_NODE_SIZE +/*****************************************************************************/ +#if MEASURE_MEM_ALLOC + +unsigned memSizeHistBuckets[] = {20, 50, 75, 100, 150, 250, 500, 1000, 5000, 0}; +Histogram memAllocHist(HostAllocator::getHostAllocator(), memSizeHistBuckets); +Histogram memUsedHist(HostAllocator::getHostAllocator(), memSizeHistBuckets); + +#endif // MEASURE_MEM_ALLOC + /***************************************************************************** * * Variables to keep track of total code amounts. @@ -475,7 +534,7 @@ bool Compiler::isSingleFloat32Struct(CORINFO_CLASS_HANDLE clsHnd) for (;;) { // all of class chain must be of value type and must have only one field - if (!info.compCompHnd->isValueClass(clsHnd) && info.compCompHnd->getClassNumInstanceFields(clsHnd) != 1) + if (!info.compCompHnd->isValueClass(clsHnd) || info.compCompHnd->getClassNumInstanceFields(clsHnd) != 1) { return false; } @@ -1101,14 +1160,11 @@ size_t genFlowNodeCnt; #ifdef DEBUG /* static */ unsigned Compiler::s_compMethodsCount = 0; // to produce unique label names - -/* static */ -bool Compiler::s_dspMemStats = false; #endif -#ifndef DEBUGGING_SUPPORT +#if MEASURE_MEM_ALLOC /* static */ -const bool Compiler::Options::compDbgCode = false; +bool Compiler::s_dspMemStats = false; #endif #ifndef PROFILING_SUPPORTED @@ -1184,18 +1240,22 @@ void Compiler::compShutdown() } #endif +#if NODEBASH_STATS + GenTree::ReportOperBashing(jitstdout); +#endif + // Where should we write our statistics output? FILE* fout = jitstdout; #ifdef FEATURE_JIT_METHOD_PERF - if (compJitTimeLogFilename != NULL) + if (compJitTimeLogFilename != nullptr) { - // I assume that this will return NULL if it fails for some reason, and - // that... FILE* jitTimeLogFile = _wfopen(compJitTimeLogFilename, W("a")); - // ...Print will return silently with a NULL argument. - CompTimeSummaryInfo::s_compTimeSummary.Print(jitTimeLogFile); - fclose(jitTimeLogFile); + if (jitTimeLogFile != nullptr) + { + CompTimeSummaryInfo::s_compTimeSummary.Print(jitTimeLogFile); + fclose(jitTimeLogFile); + } } #endif // FEATURE_JIT_METHOD_PERF @@ -1214,6 +1274,63 @@ void Compiler::compShutdown() } #endif // COUNT_RANGECHECKS +#if COUNT_AST_OPERS + + // Add up all the counts so that we can show percentages of total + unsigned gtc = 0; + for (unsigned op = 0; op < GT_COUNT; op++) + gtc += GenTree::s_gtNodeCounts[op]; + + if (gtc > 0) + { + unsigned rem_total = gtc; + unsigned rem_large = 0; + unsigned rem_small = 0; + + unsigned tot_large = 0; + unsigned tot_small = 0; + + fprintf(fout, "\nGenTree operator counts (approximate):\n\n"); + + for (unsigned op = 0; op < GT_COUNT; op++) + { + unsigned siz = GenTree::s_gtTrueSizes[op]; + unsigned cnt = GenTree::s_gtNodeCounts[op]; + double pct = 100.0 * cnt / gtc; + + if (siz > TREE_NODE_SZ_SMALL) + tot_large += cnt; + else + tot_small += cnt; + + // Let's not show anything below a threshold + if (pct >= 0.5) + { + fprintf(fout, " GT_%-17s %7u (%4.1lf%%) %3u bytes each\n", GenTree::OpName((genTreeOps)op), cnt, + pct, siz); + rem_total -= cnt; + } + else + { + if (siz > TREE_NODE_SZ_SMALL) + rem_large += cnt; + else + rem_small += cnt; + } + } + if (rem_total > 0) + { + fprintf(fout, " All other GT_xxx ... %7u (%4.1lf%%) ... %4.1lf%% small + %4.1lf%% large\n", rem_total, + 100.0 * rem_total / gtc, 100.0 * rem_small / gtc, 100.0 * rem_large / gtc); + } + fprintf(fout, " -----------------------------------------------------\n"); + fprintf(fout, " Total ....... %11u --ALL-- ... %4.1lf%% small + %4.1lf%% large\n", gtc, + 100.0 * tot_small / gtc, 100.0 * tot_large / gtc); + fprintf(fout, "\n"); + } + +#endif // COUNT_AST_OPERS + #if DISPLAY_SIZES if (grossVMsize && grossNCsize) @@ -1367,17 +1484,23 @@ void Compiler::compShutdown() #if MEASURE_MEM_ALLOC -#ifdef DEBUG - // Under debug, we only dump memory stats when the COMPlus_* variable is defined. - // Under non-debug, we don't have the COMPlus_* variable, and we always dump it. if (s_dspMemStats) -#endif { fprintf(fout, "\nAll allocations:\n"); s_aggMemStats.Print(jitstdout); fprintf(fout, "\nLargest method:\n"); s_maxCompMemStats.Print(jitstdout); + + fprintf(fout, "\n"); + fprintf(fout, "---------------------------------------------------\n"); + fprintf(fout, "Distribution of total memory allocated per method (in KB):\n"); + memAllocHist.dump(fout); + + fprintf(fout, "\n"); + fprintf(fout, "---------------------------------------------------\n"); + fprintf(fout, "Distribution of total memory used per method (in KB):\n"); + memUsedHist.dump(fout); } #endif // MEASURE_MEM_ALLOC @@ -1452,100 +1575,8 @@ void Compiler::compDisplayStaticSizes(FILE* fout) { #if MEASURE_NODE_SIZE - /* - IMPORTANT: Use the following code to check the alignment of - GenTree members (in a retail build, of course). - */ - - GenTree* gtDummy = nullptr; - - fprintf(fout, "\n"); - fprintf(fout, "Offset / size of gtOper = %2u / %2u\n", offsetof(GenTree, gtOper), sizeof(gtDummy->gtOper)); - fprintf(fout, "Offset / size of gtType = %2u / %2u\n", offsetof(GenTree, gtType), sizeof(gtDummy->gtType)); -#if FEATURE_ANYCSE - fprintf(fout, "Offset / size of gtCSEnum = %2u / %2u\n", offsetof(GenTree, gtCSEnum), - sizeof(gtDummy->gtCSEnum)); -#endif // FEATURE_ANYCSE -#if ASSERTION_PROP - fprintf(fout, "Offset / size of gtAssertionNum = %2u / %2u\n", offsetof(GenTree, gtAssertionNum), - sizeof(gtDummy->gtAssertionNum)); -#endif // ASSERTION_PROP -#if FEATURE_STACK_FP_X87 - fprintf(fout, "Offset / size of gtFPlvl = %2u / %2u\n", offsetof(GenTree, gtFPlvl), - sizeof(gtDummy->gtFPlvl)); -#endif // FEATURE_STACK_FP_X87 - // TODO: The section that report GenTree sizes should be made into a public static member function of the GenTree - // class (see https://github.com/dotnet/coreclr/pull/493) - // fprintf(fout, "Offset / size of gtCostEx = %2u / %2u\n", offsetof(GenTree, _gtCostEx ), - // sizeof(gtDummy->_gtCostEx )); - // fprintf(fout, "Offset / size of gtCostSz = %2u / %2u\n", offsetof(GenTree, _gtCostSz ), - // sizeof(gtDummy->_gtCostSz )); - fprintf(fout, "Offset / size of gtFlags = %2u / %2u\n", offsetof(GenTree, gtFlags), - sizeof(gtDummy->gtFlags)); - fprintf(fout, "Offset / size of gtVNPair = %2u / %2u\n", offsetof(GenTree, gtVNPair), - sizeof(gtDummy->gtVNPair)); - fprintf(fout, "Offset / size of gtRsvdRegs = %2u / %2u\n", offsetof(GenTree, gtRsvdRegs), - sizeof(gtDummy->gtRsvdRegs)); -#ifdef LEGACY_BACKEND - fprintf(fout, "Offset / size of gtUsedRegs = %2u / %2u\n", offsetof(GenTree, gtUsedRegs), - sizeof(gtDummy->gtUsedRegs)); -#endif // LEGACY_BACKEND -#ifndef LEGACY_BACKEND - fprintf(fout, "Offset / size of gtLsraInfo = %2u / %2u\n", offsetof(GenTree, gtLsraInfo), - sizeof(gtDummy->gtLsraInfo)); -#endif // !LEGACY_BACKEND - fprintf(fout, "Offset / size of gtNext = %2u / %2u\n", offsetof(GenTree, gtNext), sizeof(gtDummy->gtNext)); - fprintf(fout, "Offset / size of gtPrev = %2u / %2u\n", offsetof(GenTree, gtPrev), sizeof(gtDummy->gtPrev)); - fprintf(fout, "\n"); - -#if SMALL_TREE_NODES - fprintf(fout, "Small tree node size = %3u\n", TREE_NODE_SZ_SMALL); -#endif // SMALL_TREE_NODES - fprintf(fout, "Large tree node size = %3u\n", TREE_NODE_SZ_LARGE); - fprintf(fout, "Size of GenTree = %3u\n", sizeof(GenTree)); - fprintf(fout, "Size of GenTreeUnOp = %3u\n", sizeof(GenTreeUnOp)); - fprintf(fout, "Size of GenTreeOp = %3u\n", sizeof(GenTreeOp)); - fprintf(fout, "Size of GenTreeVal = %3u\n", sizeof(GenTreeVal)); - fprintf(fout, "Size of GenTreeIntConCommon = %3u\n", sizeof(GenTreeIntConCommon)); - fprintf(fout, "Size of GenTreePhysReg = %3u\n", sizeof(GenTreePhysReg)); -#ifndef LEGACY_BACKEND - fprintf(fout, "Size of GenTreeJumpTable = %3u\n", sizeof(GenTreeJumpTable)); -#endif // !LEGACY_BACKEND - fprintf(fout, "Size of GenTreeIntCon = %3u\n", sizeof(GenTreeIntCon)); - fprintf(fout, "Size of GenTreeLngCon = %3u\n", sizeof(GenTreeLngCon)); - fprintf(fout, "Size of GenTreeDblCon = %3u\n", sizeof(GenTreeDblCon)); - fprintf(fout, "Size of GenTreeStrCon = %3u\n", sizeof(GenTreeStrCon)); - fprintf(fout, "Size of GenTreeLclVarCommon = %3u\n", sizeof(GenTreeLclVarCommon)); - fprintf(fout, "Size of GenTreeLclVar = %3u\n", sizeof(GenTreeLclVar)); - fprintf(fout, "Size of GenTreeLclFld = %3u\n", sizeof(GenTreeLclFld)); - fprintf(fout, "Size of GenTreeRegVar = %3u\n", sizeof(GenTreeRegVar)); - fprintf(fout, "Size of GenTreeCast = %3u\n", sizeof(GenTreeCast)); - fprintf(fout, "Size of GenTreeBox = %3u\n", sizeof(GenTreeBox)); - fprintf(fout, "Size of GenTreeField = %3u\n", sizeof(GenTreeField)); - fprintf(fout, "Size of GenTreeArgList = %3u\n", sizeof(GenTreeArgList)); - fprintf(fout, "Size of GenTreeColon = %3u\n", sizeof(GenTreeColon)); - fprintf(fout, "Size of GenTreeCall = %3u\n", sizeof(GenTreeCall)); - fprintf(fout, "Size of GenTreeCmpXchg = %3u\n", sizeof(GenTreeCmpXchg)); - fprintf(fout, "Size of GenTreeFptrVal = %3u\n", sizeof(GenTreeFptrVal)); - fprintf(fout, "Size of GenTreeQmark = %3u\n", sizeof(GenTreeQmark)); - fprintf(fout, "Size of GenTreeIntrinsic = %3u\n", sizeof(GenTreeIntrinsic)); - fprintf(fout, "Size of GenTreeIndex = %3u\n", sizeof(GenTreeIndex)); - fprintf(fout, "Size of GenTreeArrLen = %3u\n", sizeof(GenTreeArrLen)); - fprintf(fout, "Size of GenTreeBoundsChk = %3u\n", sizeof(GenTreeBoundsChk)); - fprintf(fout, "Size of GenTreeArrElem = %3u\n", sizeof(GenTreeArrElem)); - fprintf(fout, "Size of GenTreeAddrMode = %3u\n", sizeof(GenTreeAddrMode)); - fprintf(fout, "Size of GenTreeIndir = %3u\n", sizeof(GenTreeIndir)); - fprintf(fout, "Size of GenTreeStoreInd = %3u\n", sizeof(GenTreeStoreInd)); - fprintf(fout, "Size of GenTreeRetExpr = %3u\n", sizeof(GenTreeRetExpr)); - fprintf(fout, "Size of GenTreeStmt = %3u\n", sizeof(GenTreeStmt)); - fprintf(fout, "Size of GenTreeObj = %3u\n", sizeof(GenTreeObj)); - fprintf(fout, "Size of GenTreeClsVar = %3u\n", sizeof(GenTreeClsVar)); - fprintf(fout, "Size of GenTreeArgPlace = %3u\n", sizeof(GenTreeArgPlace)); - fprintf(fout, "Size of GenTreeLabel = %3u\n", sizeof(GenTreeLabel)); - fprintf(fout, "Size of GenTreePhiArg = %3u\n", sizeof(GenTreePhiArg)); - fprintf(fout, "Size of GenTreePutArgStk = %3u\n", sizeof(GenTreePutArgStk)); - fprintf(fout, "\n"); -#endif // MEASURE_NODE_SIZE + GenTree::DumpNodeSizes(fout); +#endif #if MEASURE_BLOCK_SIZE @@ -1572,8 +1603,6 @@ void Compiler::compDisplayStaticSizes(FILE* fout) sizeof(bbDummy->bbJumpDest)); fprintf(fout, "Offset / size of bbJumpSwt = %3u / %3u\n", offsetof(BasicBlock, bbJumpSwt), sizeof(bbDummy->bbJumpSwt)); - fprintf(fout, "Offset / size of bbTreeList = %3u / %3u\n", offsetof(BasicBlock, bbTreeList), - sizeof(bbDummy->bbTreeList)); fprintf(fout, "Offset / size of bbEntryState = %3u / %3u\n", offsetof(BasicBlock, bbEntryState), sizeof(bbDummy->bbEntryState)); fprintf(fout, "Offset / size of bbStkTempsIn = %3u / %3u\n", offsetof(BasicBlock, bbStkTempsIn), @@ -1618,12 +1647,8 @@ void Compiler::compDisplayStaticSizes(FILE* fout) sizeof(bbDummy->bbHeapSsaNumIn)); fprintf(fout, "Offset / size of bbHeapSsaNumOut = %3u / %3u\n", offsetof(BasicBlock, bbHeapSsaNumOut), sizeof(bbDummy->bbHeapSsaNumOut)); - -#ifdef DEBUGGING_SUPPORT fprintf(fout, "Offset / size of bbScope = %3u / %3u\n", offsetof(BasicBlock, bbScope), sizeof(bbDummy->bbScope)); -#endif // DEBUGGING_SUPPORT - fprintf(fout, "Offset / size of bbCseGen = %3u / %3u\n", offsetof(BasicBlock, bbCseGen), sizeof(bbDummy->bbCseGen)); fprintf(fout, "Offset / size of bbCseIn = %3u / %3u\n", offsetof(BasicBlock, bbCseIn), @@ -1888,10 +1913,6 @@ void Compiler::compInit(ArenaAllocator* pAlloc, InlineInfo* inlineInfo) SIMDVectorHandle = nullptr; #endif -#ifdef DEBUG - inlRNG = nullptr; -#endif - compUsesThrowHelper = false; } @@ -2244,14 +2265,14 @@ const char* Compiler::compLocalVarName(unsigned varNum, unsigned offs) void Compiler::compSetProcessor() { - unsigned compileFlags = opts.eeFlags; + const JitFlags& jitFlags = *opts.jitFlags; #if defined(_TARGET_ARM_) info.genCPU = CPU_ARM; #elif defined(_TARGET_AMD64_) - info.genCPU = CPU_X64; + info.genCPU = CPU_X64; #elif defined(_TARGET_X86_) - if (compileFlags & CORJIT_FLG_TARGET_P4) + if (jitFlags.IsSet(JitFlags::JIT_FLAG_TARGET_P4)) info.genCPU = CPU_X86_PENTIUM_4; else info.genCPU = CPU_X86; @@ -2262,33 +2283,66 @@ void Compiler::compSetProcessor() // CLANG_FORMAT_COMMENT_ANCHOR; -#ifdef _TARGET_AMD64_ - opts.compUseFCOMI = false; - opts.compUseCMOV = true; - opts.compCanUseSSE2 = true; +#ifdef _TARGET_XARCH_ + opts.compCanUseSSE3_4 = false; + if (!jitFlags.IsSet(JitFlags::JIT_FLAG_PREJIT) && jitFlags.IsSet(JitFlags::JIT_FLAG_USE_SSE3_4)) + { + if (JitConfig.EnableSSE3_4() != 0) + { + opts.compCanUseSSE3_4 = true; + } + } #ifdef FEATURE_AVX_SUPPORT // COMPlus_EnableAVX can be used to disable using AVX if available on a target machine. // Note that FEATURE_AVX_SUPPORT is not enabled for ctpjit opts.compCanUseAVX = false; - if (((compileFlags & CORJIT_FLG_PREJIT) == 0) && ((compileFlags & CORJIT_FLG_USE_AVX2) != 0)) + if (!jitFlags.IsSet(JitFlags::JIT_FLAG_PREJIT) && jitFlags.IsSet(JitFlags::JIT_FLAG_USE_AVX2)) { if (JitConfig.EnableAVX() != 0) { opts.compCanUseAVX = true; - if (!compIsForInlining()) - { - codeGen->getEmitter()->SetUseAVX(true); - } } } -#endif -#endif //_TARGET_AMD64_ +#endif // FEATURE_AVX_SUPPORT -#ifdef _TARGET_X86_ - opts.compUseFCOMI = ((opts.eeFlags & CORJIT_FLG_USE_FCOMI) != 0); - opts.compUseCMOV = ((opts.eeFlags & CORJIT_FLG_USE_CMOV) != 0); - opts.compCanUseSSE2 = ((opts.eeFlags & CORJIT_FLG_USE_SSE2) != 0); + if (!compIsForInlining()) + { +#ifdef FEATURE_AVX_SUPPORT + if (opts.compCanUseAVX) + { + codeGen->getEmitter()->SetUseAVX(true); + } + else +#endif // FEATURE_AVX_SUPPORT + if (opts.compCanUseSSE3_4) + { + codeGen->getEmitter()->SetUseSSE3_4(true); + } + } +#endif // _TARGET_XARCH_ + +#ifdef _TARGET_AMD64_ + opts.compUseFCOMI = false; + opts.compUseCMOV = true; + opts.compCanUseSSE2 = true; +#elif defined(_TARGET_X86_) + opts.compUseFCOMI = jitFlags.IsSet(JitFlags::JIT_FLAG_USE_FCOMI); + opts.compUseCMOV = jitFlags.IsSet(JitFlags::JIT_FLAG_USE_CMOV); + opts.compCanUseSSE2 = jitFlags.IsSet(JitFlags::JIT_FLAG_USE_SSE2); + +#if !defined(LEGACY_BACKEND) && !defined(FEATURE_CORECLR) + // RyuJIT/x86 requires SSE2 to be available: there is no support for generating floating-point + // code with x87 instructions. On .NET Core, the VM always tells us that SSE2 is available. + // However, on desktop, under ngen, (and presumably in the unlikely case you're actually + // running on a machine without SSE2), the VM does not set the SSE2 flag. We ignore this and + // go ahead and generate SSE2 code anyway. + if (!opts.compCanUseSSE2) + { + JITDUMP("VM didn't set CORJIT_FLG_USE_SSE2! Ignoring, and generating SSE2 code anyway.\n"); + opts.compCanUseSSE2 = true; + } +#endif // !defined(LEGACY_BACKEND) && !defined(FEATURE_CORECLR) #ifdef DEBUG if (opts.compUseFCOMI) @@ -2296,7 +2350,9 @@ void Compiler::compSetProcessor() if (opts.compUseCMOV) opts.compUseCMOV = !compStressCompile(STRESS_USE_CMOV, 50); - // Should we override the SSE2 setting +#ifdef LEGACY_BACKEND + + // Should we override the SSE2 setting? enum { SSE2_FORCE_DISABLE = 0, @@ -2310,7 +2366,17 @@ void Compiler::compSetProcessor() opts.compCanUseSSE2 = true; else if (opts.compCanUseSSE2) opts.compCanUseSSE2 = !compStressCompile(STRESS_GENERIC_VARN, 50); + +#else // !LEGACY_BACKEND + + // RyuJIT/x86 requires SSE2 to be available and hence + // don't turn off compCanUseSSE2 under stress. + assert(opts.compCanUseSSE2); + +#endif // !LEGACY_BACKEND + #endif // DEBUG + #endif // _TARGET_X86_ } @@ -2378,31 +2444,36 @@ unsigned ReinterpretHexAsDecimal(unsigned in) return result; } -void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags) +void Compiler::compInitOptions(JitFlags* jitFlags) { #ifdef UNIX_AMD64_ABI opts.compNeedToAlignFrame = false; #endif // UNIX_AMD64_ABI memset(&opts, 0, sizeof(opts)); - unsigned compileFlags = jitFlags->corJitFlags; - if (compIsForInlining()) { - assert((compileFlags & CORJIT_FLG_LOST_WHEN_INLINING) == 0); - assert(compileFlags & CORJIT_FLG_SKIP_VERIFICATION); + // The following flags are lost when inlining. (They are removed in + // Compiler::fgInvokeInlineeCompiler().) + assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT)); + assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR)); + assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_ENTERLEAVE)); + assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_EnC)); + assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_INFO)); + + assert(jitFlags->IsSet(JitFlags::JIT_FLAG_SKIP_VERIFICATION)); } opts.jitFlags = jitFlags; - opts.eeFlags = compileFlags; opts.compFlags = CLFLG_MAXOPT; // Default value is for full optimization - if (opts.eeFlags & (CORJIT_FLG_DEBUG_CODE | CORJIT_FLG_MIN_OPT)) + if (jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_CODE) || jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT)) { opts.compFlags = CLFLG_MINOPT; } // Don't optimize .cctors (except prejit) or if we're an inlinee - else if (!(opts.eeFlags & CORJIT_FLG_PREJIT) && ((info.compFlags & FLG_CCTOR) == FLG_CCTOR) && !compIsForInlining()) + else if (!jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) && ((info.compFlags & FLG_CCTOR) == FLG_CCTOR) && + !compIsForInlining()) { opts.compFlags = CLFLG_MINOPT; } @@ -2414,32 +2485,31 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags) // If the EE sets SIZE_OPT or if we are compiling a Class constructor // we will optimize for code size at the expense of speed // - if ((opts.eeFlags & CORJIT_FLG_SIZE_OPT) || ((info.compFlags & FLG_CCTOR) == FLG_CCTOR)) + if (jitFlags->IsSet(JitFlags::JIT_FLAG_SIZE_OPT) || ((info.compFlags & FLG_CCTOR) == FLG_CCTOR)) { opts.compCodeOpt = SMALL_CODE; } // // If the EE sets SPEED_OPT we will optimize for speed at the expense of code size // - else if (opts.eeFlags & CORJIT_FLG_SPEED_OPT) + else if (jitFlags->IsSet(JitFlags::JIT_FLAG_SPEED_OPT)) { opts.compCodeOpt = FAST_CODE; - assert((opts.eeFlags & CORJIT_FLG_SIZE_OPT) == 0); + assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_SIZE_OPT)); } -//------------------------------------------------------------------------- + //------------------------------------------------------------------------- + + opts.compDbgCode = jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_CODE); + opts.compDbgInfo = jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_INFO); + opts.compDbgEnC = jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_EnC); -#ifdef DEBUGGING_SUPPORT - opts.compDbgCode = (opts.eeFlags & CORJIT_FLG_DEBUG_CODE) != 0; - opts.compDbgInfo = (opts.eeFlags & CORJIT_FLG_DEBUG_INFO) != 0; - opts.compDbgEnC = (opts.eeFlags & CORJIT_FLG_DEBUG_EnC) != 0; #if REGEN_SHORTCUTS || REGEN_CALLPAT // We never want to have debugging enabled when regenerating GC encoding patterns opts.compDbgCode = false; opts.compDbgInfo = false; opts.compDbgEnC = false; #endif -#endif compSetProcessor(); @@ -2473,7 +2543,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags) #ifdef DEBUG const JitConfigValues::MethodSet* pfAltJit; - if (opts.eeFlags & CORJIT_FLG_PREJIT) + if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) { pfAltJit = &JitConfig.AltJitNgen(); } @@ -2498,7 +2568,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags) #else // !DEBUG const char* altJitVal; - if (opts.eeFlags & CORJIT_FLG_PREJIT) + if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) { altJitVal = JitConfig.AltJitNgen().list(); } @@ -2602,7 +2672,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags) // if (!compIsForInlining()) { - if (opts.eeFlags & CORJIT_FLG_PREJIT) + if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) { if (JitConfig.NgenDump().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args)) { @@ -2952,10 +3022,8 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags) #endif // DEBUG #ifdef FEATURE_SIMD -#ifdef _TARGET_AMD64_ - // Minimum bar for availing SIMD benefits is SSE2 on AMD64. - featureSIMD = ((opts.eeFlags & CORJIT_FLG_FEATURE_SIMD) != 0); -#endif // _TARGET_AMD64_ + // Minimum bar for availing SIMD benefits is SSE2 on AMD64/x86. + featureSIMD = jitFlags->IsSet(JitFlags::JIT_FLAG_FEATURE_SIMD); #endif // FEATURE_SIMD if (compIsForInlining() || compIsForImportOnly()) @@ -2978,23 +3046,26 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags) opts.compTailCallLoopOpt = true; #endif -#ifdef DEBUG - opts.dspInstrs = false; - opts.dspEmit = false; - opts.dspLines = false; - opts.varNames = false; - opts.dmpHex = false; - opts.disAsm = false; - opts.disAsmSpilled = false; - opts.disDiffable = false; - opts.dspCode = false; - opts.dspEHTable = false; - opts.dspGCtbls = false; - opts.disAsm2 = false; - opts.dspUnwind = false; - s_dspMemStats = false; - opts.compLongAddress = false; +#ifdef PROFILING_SUPPORTED opts.compJitELTHookEnabled = false; +#endif // PROFILING_SUPPORTED + +#ifdef DEBUG + opts.dspInstrs = false; + opts.dspEmit = false; + opts.dspLines = false; + opts.varNames = false; + opts.dmpHex = false; + opts.disAsm = false; + opts.disAsmSpilled = false; + opts.disDiffable = false; + opts.dspCode = false; + opts.dspEHTable = false; + opts.dspGCtbls = false; + opts.disAsm2 = false; + opts.dspUnwind = false; + opts.compLongAddress = false; + opts.optRepeat = false; #ifdef LATE_DISASM opts.doLateDisasm = false; @@ -3007,7 +3078,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags) // if (!altJitConfig || opts.altJit) { - if (opts.eeFlags & CORJIT_FLG_PREJIT) + if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) { if ((JitConfig.NgenOrder() & 1) == 1) { @@ -3084,14 +3155,14 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags) opts.dspDiffable = true; } - if (JitConfig.DisplayMemStats() != 0) + if (JitConfig.JitLongAddress() != 0) { - s_dspMemStats = true; + opts.compLongAddress = true; } - if (JitConfig.JitLongAddress() != 0) + if (JitConfig.JitOptRepeat().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args)) { - opts.compLongAddress = true; + opts.optRepeat = true; } } @@ -3152,7 +3223,6 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags) //------------------------------------------------------------------------- -#ifdef DEBUGGING_SUPPORT #ifdef DEBUG assert(!codeGen->isGCTypeFixed()); opts.compGcChecks = (JitConfig.JitGCChecks() != 0) || compStressCompile(STRESS_GENERIC_VARN, 5); @@ -3173,11 +3243,15 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags) opts.compStackCheckOnCall = (dwJitStackChecks & DWORD(STACK_CHECK_ON_CALL)) != 0; #endif +#if MEASURE_MEM_ALLOC + s_dspMemStats = (JitConfig.DisplayMemStats() != 0); +#endif + #ifdef PROFILING_SUPPORTED - opts.compNoPInvokeInlineCB = (opts.eeFlags & CORJIT_FLG_PROF_NO_PINVOKE_INLINE) ? true : false; + opts.compNoPInvokeInlineCB = jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_NO_PINVOKE_INLINE); // Cache the profiler handle - if (opts.eeFlags & CORJIT_FLG_PROF_ENTERLEAVE) + if (jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_ENTERLEAVE)) { BOOL hookNeeded; BOOL indirected; @@ -3192,11 +3266,8 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags) compProfilerMethHndIndirected = false; } -#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_) - // Right now this ELT hook option is enabled only for arm and amd64 - - // Honour complus_JitELTHookEnabled only if VM has not asked us to generate profiler - // hooks in the first place. That is, Override VM only if it hasn't asked for a + // Honour COMPlus_JitELTHookEnabled only if VM has not asked us to generate profiler + // hooks in the first place. That is, override VM only if it hasn't asked for a // profiler callback for this method. if (!compProfilerHookNeeded && (JitConfig.JitELTHookEnabled() != 0)) { @@ -3209,7 +3280,6 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags) compProfilerMethHnd = (void*)DummyProfilerELTStub; compProfilerMethHndIndirected = false; } -#endif // _TARGET_ARM_ || _TARGET_AMD64_ #endif // PROFILING_SUPPORTED @@ -3226,10 +3296,9 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags) } #endif - opts.compMustInlinePInvokeCalli = (opts.eeFlags & CORJIT_FLG_IL_STUB) ? true : false; + opts.compMustInlinePInvokeCalli = jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB); opts.compScopeInfo = opts.compDbgInfo; -#endif // DEBUGGING_SUPPORT #ifdef LATE_DISASM codeGen->getDisAssembler().disOpenForLateDisAsm(info.compMethodName, info.compClassName, @@ -3239,7 +3308,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags) //------------------------------------------------------------------------- #if RELOC_SUPPORT - opts.compReloc = (opts.eeFlags & CORJIT_FLG_RELOC) ? true : false; + opts.compReloc = jitFlags->IsSet(JitFlags::JIT_FLAG_RELOC); #endif #ifdef DEBUG @@ -3249,7 +3318,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags) #endif #endif // DEBUG - opts.compProcedureSplitting = (opts.eeFlags & CORJIT_FLG_PROCSPLIT) ? true : false; + opts.compProcedureSplitting = jitFlags->IsSet(JitFlags::JIT_FLAG_PROCSPLIT); #ifdef _TARGET_ARM64_ // TODO-ARM64-NYI: enable hot/cold splitting @@ -3294,7 +3363,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags) fgProfileBuffer = nullptr; fgProfileData_ILSizeMismatch = false; fgNumProfileRuns = 0; - if (opts.eeFlags & CORJIT_FLG_BBOPT) + if (jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT)) { assert(!compIsForInlining()); HRESULT hr; @@ -3365,7 +3434,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags) printf("OPTIONS: compProcedureSplitting = %s\n", dspBool(opts.compProcedureSplitting)); printf("OPTIONS: compProcedureSplittingEH = %s\n", dspBool(opts.compProcedureSplittingEH)); - if ((opts.eeFlags & CORJIT_FLG_BBOPT) && fgHaveProfileData()) + if (jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT) && fgHaveProfileData()) { printf("OPTIONS: using real profile data\n"); } @@ -3375,7 +3444,7 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags) printf("OPTIONS: discarded IBC profile data due to mismatch in ILSize\n"); } - if (opts.eeFlags & CORJIT_FLG_PREJIT) + if (jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) { printf("OPTIONS: Jit invoked for ngen\n"); } @@ -3384,11 +3453,11 @@ void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags) #endif opts.compGCPollType = GCPOLL_NONE; - if (opts.eeFlags & CORJIT_FLG_GCPOLL_CALLS) + if (jitFlags->IsSet(JitFlags::JIT_FLAG_GCPOLL_CALLS)) { opts.compGCPollType = GCPOLL_CALL; } - else if (opts.eeFlags & CORJIT_FLG_GCPOLL_INLINE) + else if (jitFlags->IsSet(JitFlags::JIT_FLAG_GCPOLL_INLINE)) { // make sure that the EE didn't set both flags. assert(opts.compGCPollType == GCPOLL_NONE); @@ -3568,14 +3637,11 @@ void Compiler::compInitDebuggingInfo() info.compVarScopesCount = 0; -#ifdef DEBUGGING_SUPPORT if (opts.compScopeInfo) -#endif { eeGetVars(); } -#ifdef DEBUGGING_SUPPORT compInitVarScopeMap(); if (opts.compScopeInfo || opts.compDbgCode) @@ -3598,7 +3664,6 @@ void Compiler::compInitDebuggingInfo() JITDUMP("Debuggable code - Add new BB%02u to perform initialization of variables [%08X]\n", fgFirstBB->bbNum, dspPtr(fgFirstBB)); } -#endif // DEBUGGING_SUPPORT /*------------------------------------------------------------------------- * @@ -3617,9 +3682,7 @@ void Compiler::compInitDebuggingInfo() info.compStmtOffsetsCount = 0; -#ifdef DEBUGGING_SUPPORT if (opts.compDbgInfo) -#endif { /* Get hold of the line# records, if there are any */ @@ -3661,12 +3724,9 @@ void Compiler::compInitDebuggingInfo() void Compiler::compSetOptimizationLevel() { - unsigned compileFlags; bool theMinOptsValue; unsigned jitMinOpts; - compileFlags = opts.eeFlags; - if (compIsForInlining()) { theMinOptsValue = impInlineInfo->InlinerCompiler->opts.MinOpts(); @@ -3757,13 +3817,40 @@ void Compiler::compSetOptimizationLevel() } } +#if 0 + // The code in this #if can be used to debug optimization issues according to method hash. + // To use, uncomment, rebuild and set environment variables minoptshashlo and minoptshashhi. +#ifdef DEBUG + unsigned methHash = info.compMethodHash(); + char* lostr = getenv("minoptshashlo"); + unsigned methHashLo = 0; + if (lostr != nullptr) + { + sscanf_s(lostr, "%x", &methHashLo); + char* histr = getenv("minoptshashhi"); + unsigned methHashHi = UINT32_MAX; + if (histr != nullptr) + { + sscanf_s(histr, "%x", &methHashHi); + if (methHash >= methHashLo && methHash <= methHashHi) + { + printf("MinOpts for method %s, hash = 0x%x.\n", + info.compFullName, info.compMethodHash()); + printf(""); // in our logic this causes a flush + theMinOptsValue = true; + } + } + } +#endif +#endif + if (compStressCompile(STRESS_MIN_OPTS, 5)) { theMinOptsValue = true; } // For PREJIT we never drop down to MinOpts // unless unless CLFLG_MINOPT is set - else if (!(compileFlags & CORJIT_FLG_PREJIT)) + else if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) { if ((unsigned)JitConfig.JitMinOptsCodeSize() < info.compILCodeSize) { @@ -3805,7 +3892,7 @@ void Compiler::compSetOptimizationLevel() // Retail check if we should force Minopts due to the complexity of the method // For PREJIT we never drop down to MinOpts // unless unless CLFLG_MINOPT is set - if (!theMinOptsValue && !(compileFlags & CORJIT_FLG_PREJIT) && + if (!theMinOptsValue && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) && ((DEFAULT_MIN_OPTS_CODE_SIZE < info.compILCodeSize) || (DEFAULT_MIN_OPTS_INSTR_COUNT < opts.instrCount) || (DEFAULT_MIN_OPTS_BB_COUNT < fgBBcount) || (DEFAULT_MIN_OPTS_LV_NUM_COUNT < lvaCount) || (DEFAULT_MIN_OPTS_LV_REF_COUNT < opts.lvRefCount))) @@ -3828,14 +3915,14 @@ void Compiler::compSetOptimizationLevel() unsigned methHash = info.compMethodHash(); char* lostr = getenv("opthashlo"); unsigned methHashLo = 0; - if (lostr != NULL) + if (lostr != NULL) { sscanf_s(lostr, "%x", &methHashLo); // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers. } char* histr = getenv("opthashhi"); unsigned methHashHi = UINT32_MAX; - if (histr != NULL) + if (histr != NULL) { sscanf_s(histr, "%x", &methHashHi); // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers. @@ -3883,27 +3970,27 @@ _SetMinOpts: } #if !defined(_TARGET_AMD64_) - // The VM sets CORJIT_FLG_FRAMED for two reasons: (1) the COMPlus_JitFramed variable is set, or + // The VM sets JitFlags::JIT_FLAG_FRAMED for two reasons: (1) the COMPlus_JitFramed variable is set, or // (2) the function is marked "noinline". The reason for #2 is that people mark functions // noinline to ensure the show up on in a stack walk. But for AMD64, we don't need a frame // pointer for the frame to show up in stack walk. - if (compileFlags & CORJIT_FLG_FRAMED) + if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_FRAMED)) codeGen->setFrameRequired(true); #endif - if (compileFlags & CORJIT_FLG_RELOC) + if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELOC)) { codeGen->genAlignLoops = false; // loop alignment not supported for prejitted code - // The zapper doesn't set CORJIT_FLG_ALIGN_LOOPS, and there is + // The zapper doesn't set JitFlags::JIT_FLAG_ALIGN_LOOPS, and there is // no reason for it to set it as the JIT doesn't currently support loop alignment // for prejitted images. (The JIT doesn't know the final address of the code, hence // it can't align code based on unknown addresses.) - assert((compileFlags & CORJIT_FLG_ALIGN_LOOPS) == 0); + assert(!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_ALIGN_LOOPS)); } else { - codeGen->genAlignLoops = (compileFlags & CORJIT_FLG_ALIGN_LOOPS) != 0; + codeGen->genAlignLoops = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_ALIGN_LOOPS); } } @@ -4075,7 +4162,7 @@ void Compiler::compFunctionTraceEnd(void* methodCodePtr, ULONG methodCodeSize, b // For an overview of the structure of the JIT, see: // https://github.com/dotnet/coreclr/blob/master/Documentation/botr/ryujit-overview.md // -void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_FLAGS* compileFlags) +void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags* compileFlags) { if (compIsForInlining()) { @@ -4112,26 +4199,36 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_F fgRemovePreds(); } + EndPhase(PHASE_IMPORTATION); + if (compIsForInlining()) { /* Quit inlining if fgImport() failed for any reason. */ - if (compDonotInline()) + if (!compDonotInline()) { - return; + /* Filter out unimported BBs */ + + fgRemoveEmptyBlocks(); } - /* Filter out unimported BBs */ + EndPhase(PHASE_POST_IMPORT); - fgRemoveEmptyBlocks(); +#ifdef FEATURE_JIT_METHOD_PERF + if (pCompJitTimer != nullptr) + { +#if MEASURE_CLRAPI_CALLS + EndPhase(PHASE_CLR_API); +#endif + pCompJitTimer->Terminate(this, CompTimeSummaryInfo::s_compTimeSummary, false); + } +#endif return; } assert(!compDonotInline()); - EndPhase(PHASE_IMPORTATION); - // Maybe the caller was not interested in generating code if (compIsForImportOnly()) { @@ -4145,7 +4242,7 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_F fgRemoveEH(); #endif // !FEATURE_EH - if (compileFlags->corJitFlags & CORJIT_FLG_BBINSTR) + if (compileFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR)) { fgInstrumentMethod(); } @@ -4180,7 +4277,7 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_F /* Massage the trees so that we can generate code out of them */ fgMorph(); - EndPhase(PHASE_MORPH); + EndPhase(PHASE_MORPH_END); /* GS security checks for unsafe buffers */ if (getNeedsGSSecurityCookie()) @@ -4336,6 +4433,7 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_F bool doCopyProp = true; bool doAssertionProp = true; bool doRangeAnalysis = true; + int iterations = 1; #ifdef DEBUG doSsa = (JitConfig.JitDoSsa() != 0); @@ -4345,72 +4443,88 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_F doCopyProp = doValueNum && (JitConfig.JitDoCopyProp() != 0); doAssertionProp = doValueNum && (JitConfig.JitDoAssertionProp() != 0); doRangeAnalysis = doAssertionProp && (JitConfig.JitDoRangeAnalysis() != 0); -#endif - if (doSsa) + if (opts.optRepeat) { - fgSsaBuild(); - EndPhase(PHASE_BUILD_SSA); + iterations = JitConfig.JitOptRepeatCount(); } +#endif - if (doEarlyProp) + while (iterations > 0) { - /* Propagate array length and rewrite getType() method call */ - optEarlyProp(); - EndPhase(PHASE_EARLY_PROP); - } + if (doSsa) + { + fgSsaBuild(); + EndPhase(PHASE_BUILD_SSA); + } - if (doValueNum) - { - fgValueNumber(); - EndPhase(PHASE_VALUE_NUMBER); - } + if (doEarlyProp) + { + /* Propagate array length and rewrite getType() method call */ + optEarlyProp(); + EndPhase(PHASE_EARLY_PROP); + } - if (doLoopHoisting) - { - /* Hoist invariant code out of loops */ - optHoistLoopCode(); - EndPhase(PHASE_HOIST_LOOP_CODE); - } + if (doValueNum) + { + fgValueNumber(); + EndPhase(PHASE_VALUE_NUMBER); + } - if (doCopyProp) - { - /* Perform VN based copy propagation */ - optVnCopyProp(); - EndPhase(PHASE_VN_COPY_PROP); - } + if (doLoopHoisting) + { + /* Hoist invariant code out of loops */ + optHoistLoopCode(); + EndPhase(PHASE_HOIST_LOOP_CODE); + } + + if (doCopyProp) + { + /* Perform VN based copy propagation */ + optVnCopyProp(); + EndPhase(PHASE_VN_COPY_PROP); + } #if FEATURE_ANYCSE - /* Remove common sub-expressions */ - optOptimizeCSEs(); + /* Remove common sub-expressions */ + optOptimizeCSEs(); #endif // FEATURE_ANYCSE #if ASSERTION_PROP - if (doAssertionProp) - { - /* Assertion propagation */ - optAssertionPropMain(); - EndPhase(PHASE_ASSERTION_PROP_MAIN); - } + if (doAssertionProp) + { + /* Assertion propagation */ + optAssertionPropMain(); + EndPhase(PHASE_ASSERTION_PROP_MAIN); + } - if (doRangeAnalysis) - { - /* Optimize array index range checks */ - RangeCheck rc(this); - rc.OptimizeRangeChecks(); - EndPhase(PHASE_OPTIMIZE_INDEX_CHECKS); - } + if (doRangeAnalysis) + { + /* Optimize array index range checks */ + RangeCheck rc(this); + rc.OptimizeRangeChecks(); + EndPhase(PHASE_OPTIMIZE_INDEX_CHECKS); + } #endif // ASSERTION_PROP - /* update the flowgraph if we modified it during the optimization phase*/ - if (fgModified) - { - fgUpdateFlowGraph(); - EndPhase(PHASE_UPDATE_FLOW_GRAPH); + /* update the flowgraph if we modified it during the optimization phase*/ + if (fgModified) + { + fgUpdateFlowGraph(); + EndPhase(PHASE_UPDATE_FLOW_GRAPH); + + // Recompute the edge weight if we have modified the flow graph + fgComputeEdgeWeights(); + EndPhase(PHASE_COMPUTE_EDGE_WEIGHTS2); + } - // Recompute the edge weight if we have modified the flow graph - fgComputeEdgeWeights(); - EndPhase(PHASE_COMPUTE_EDGE_WEIGHTS2); + // Iterate if requested, resetting annotations first. + if (--iterations == 0) + { + break; + } + ResetOptAnnotations(); + RecomputeLoopInfo(); } } @@ -4540,7 +4654,12 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_F #ifdef FEATURE_JIT_METHOD_PERF if (pCompJitTimer) - pCompJitTimer->Terminate(this, CompTimeSummaryInfo::s_compTimeSummary); + { +#if MEASURE_CLRAPI_CALLS + EndPhase(PHASE_CLR_API); +#endif + pCompJitTimer->Terminate(this, CompTimeSummaryInfo::s_compTimeSummary, true); + } #endif RecordStateAtEndOfCompilation(); @@ -4569,6 +4688,82 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_F #endif // FUNC_INFO_LOGGING } +//------------------------------------------------------------------------ +// ResetOptAnnotations: Clear annotations produced during global optimizations. +// +// Notes: +// The intent of this method is to clear any information typically assumed +// to be set only once; it is used between iterations when JitOptRepeat is +// in effect. + +void Compiler::ResetOptAnnotations() +{ + assert(opts.optRepeat); + assert(JitConfig.JitOptRepeatCount() > 0); + fgResetForSsa(); + vnStore = nullptr; + m_opAsgnVarDefSsaNums = nullptr; + m_blockToEHPreds = nullptr; + fgSsaPassesCompleted = 0; + fgVNPassesCompleted = 0; + + for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) + { + for (GenTreeStmt* stmt = block->firstStmt(); stmt != nullptr; stmt = stmt->getNextStmt()) + { + stmt->gtFlags &= ~GTF_STMT_HAS_CSE; + + for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree != nullptr; tree = tree->gtNext) + { + tree->ClearVN(); + tree->ClearAssertion(); + tree->gtCSEnum = NO_CSE; + + // Clear any *_ASG_LHS flags -- these are set during SSA construction, + // and the heap live-in calculation depends on them being unset coming + // into SSA construction (without clearing them, a block that has a + // heap def via one of these before any heap use is treated as not having + // an upwards-exposed heap use, even though subsequent heap uses may not + // be killed by the store; this seems to be a bug, worked around here). + if (tree->OperIsIndir()) + { + tree->gtFlags &= ~GTF_IND_ASG_LHS; + } + else if (tree->OperGet() == GT_CLS_VAR) + { + tree->gtFlags &= ~GTF_CLS_VAR_ASG_LHS; + } + } + } + } +} + +//------------------------------------------------------------------------ +// RecomputeLoopInfo: Recompute loop annotations between opt-repeat iterations. +// +// Notes: +// The intent of this method is to update loop structure annotations, and those +// they depend on; these annotations may have become stale during optimization, +// and need to be up-to-date before running another iteration of optimizations. + +void Compiler::RecomputeLoopInfo() +{ + assert(opts.optRepeat); + assert(JitConfig.JitOptRepeatCount() > 0); + // Recompute reachability sets, dominators, and loops. + optLoopCount = 0; + fgDomsComputed = false; + for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) + { + block->bbFlags &= ~BBF_LOOP_FLAGS; + } + fgComputeReachability(); + // Rebuild the loop tree annotations themselves. Since this is performed as + // part of 'optOptimizeLoops', this will also re-perform loop rotation, but + // not other optimizations, as the others are not part of 'optOptimizeLoops'. + optOptimizeLoops(); +} + /*****************************************************************************/ void Compiler::ProcessShutdownWork(ICorStaticInfo* statInfo) { @@ -4696,11 +4891,13 @@ int Compiler::compCompile(CORINFO_METHOD_HANDLE methodHnd, CORINFO_METHOD_INFO* methodInfo, void** methodCodePtr, ULONG* methodCodeSize, - CORJIT_FLAGS* compileFlags) + JitFlags* compileFlags) { #ifdef FEATURE_JIT_METHOD_PERF static bool checkedForJitTimeLog = false; + pCompJitTimer = nullptr; + if (!checkedForJitTimeLog) { // Call into VM to get the config strings. FEATURE_JIT_METHOD_PERF is enabled for @@ -4713,14 +4910,10 @@ int Compiler::compCompile(CORINFO_METHOD_HANDLE methodHnd, checkedForJitTimeLog = true; } - if ((Compiler::compJitTimeLogFilename != NULL) || (JitTimeLogCsv() != NULL)) + if ((Compiler::compJitTimeLogFilename != nullptr) || (JitTimeLogCsv() != nullptr)) { pCompJitTimer = JitTimer::Create(this, methodInfo->ILCodeSize); } - else - { - pCompJitTimer = NULL; - } #endif // FEATURE_JIT_METHOD_PERF #ifdef DEBUG @@ -4862,7 +5055,7 @@ int Compiler::compCompile(CORINFO_METHOD_HANDLE methodHnd, // Set this before the first 'BADCODE' // Skip verification where possible - tiVerificationNeeded = (compileFlags->corJitFlags & CORJIT_FLG_SKIP_VERIFICATION) == 0; + tiVerificationNeeded = !compileFlags->IsSet(JitFlags::JIT_FLAG_SKIP_VERIFICATION); assert(!compIsForInlining() || !tiVerificationNeeded); // Inlinees must have been verified. @@ -4893,8 +5086,8 @@ int Compiler::compCompile(CORINFO_METHOD_HANDLE methodHnd, case CORINFO_VERIFICATION_CAN_SKIP: // The VM should first verify the open instantiation. If unverifiable code - // is detected, it should pass in CORJIT_FLG_SKIP_VERIFICATION. - assert(!"The VM should have used CORJIT_FLG_SKIP_VERIFICATION"); + // is detected, it should pass in JitFlags::JIT_FLAG_SKIP_VERIFICATION. + assert(!"The VM should have used JitFlags::JIT_FLAG_SKIP_VERIFICATION"); tiVerificationNeeded = false; break; @@ -4933,7 +5126,7 @@ int Compiler::compCompile(CORINFO_METHOD_HANDLE methodHnd, CORINFO_METHOD_INFO* methodInfo; void** methodCodePtr; ULONG* methodCodeSize; - CORJIT_FLAGS* compileFlags; + JitFlags* compileFlags; CorInfoInstantiationVerification instVerInfo; int result; @@ -5000,6 +5193,8 @@ void Compiler::compCompileFinish() // Make the updates. genMemStats.nraTotalSizeAlloc = compGetAllocator()->getTotalBytesAllocated(); genMemStats.nraTotalSizeUsed = compGetAllocator()->getTotalBytesUsed(); + memAllocHist.record((unsigned)((genMemStats.nraTotalSizeAlloc + 1023) / 1024)); + memUsedHist.record((unsigned)((genMemStats.nraTotalSizeUsed + 1023) / 1024)); s_aggMemStats.Add(genMemStats); if (genMemStats.allocSz > s_maxCompMemStats.allocSz) { @@ -5038,6 +5233,7 @@ void Compiler::compCompileFinish() // the prolog which requires memory (info.compLocalsCount <= 32) && (!opts.MinOpts()) && // We may have too many local variables, etc (getJitStressLevel() == 0) && // We need extra memory for stress + !opts.optRepeat && // We need extra memory to repeat opts !compAllocator->bypassHostAllocator() && // ArenaAllocator::getDefaultPageSize() is artificially low for // DirectAlloc (compAllocator->getTotalBytesAllocated() > (2 * ArenaAllocator::getDefaultPageSize())) && @@ -5071,7 +5267,7 @@ void Compiler::compCompileFinish() mdMethodDef currentMethodToken = info.compCompHnd->getMethodDefFromMethod(info.compMethodHnd); unsigned profCallCount = 0; - if (((opts.eeFlags & CORJIT_FLG_BBOPT) != 0) && fgHaveProfileData()) + if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT) && fgHaveProfileData()) { assert(fgProfileBuffer[0].ILOffset == 0); profCallCount = fgProfileBuffer[0].ExecutionCount; @@ -5208,7 +5404,7 @@ void Compiler::compCompileFinish() // For ngen the int3 or breakpoint instruction will be right at the // start of the ngen method and we will stop when we execute it. // - if ((opts.eeFlags & CORJIT_FLG_PREJIT) == 0) + if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) { if (compJitHaltMethod()) { @@ -5296,7 +5492,7 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr, CORINFO_METHOD_INFO* methodInfo, void** methodCodePtr, ULONG* methodCodeSize, - CORJIT_FLAGS* compileFlags, + JitFlags* compileFlags, CorInfoInstantiationVerification instVerInfo) { CORINFO_METHOD_HANDLE methodHnd = info.compMethodHnd; @@ -5438,7 +5634,7 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr, info.compIsContextful = (info.compClassAttr & CORINFO_FLG_CONTEXTFUL) != 0; - info.compPublishStubParam = (opts.eeFlags & CORJIT_FLG_PUBLISH_SECRET_PARAM) != 0; + info.compPublishStubParam = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PUBLISH_SECRET_PARAM); switch (methodInfo->args.getCallConv()) { @@ -5476,7 +5672,7 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr, const bool forceInline = !!(info.compFlags & CORINFO_FLG_FORCEINLINE); - if (!compIsForInlining() && (opts.eeFlags & CORJIT_FLG_PREJIT)) + if (!compIsForInlining() && opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) { // We're prejitting the root method. We also will analyze it as // a potential inline candidate. @@ -5644,10 +5840,6 @@ _Next: return CORJIT_OK; } -/*****************************************************************************/ -#ifdef DEBUGGING_SUPPORT -/*****************************************************************************/ - //------------------------------------------------------------------------ // compFindLocalVarLinear: Linear search for variable's scope containing offset. // @@ -5992,11 +6184,7 @@ void Compiler::compProcessScopesUntil(unsigned offset, } while (foundExit || foundEnter); } -/*****************************************************************************/ -#endif // DEBUGGING_SUPPORT -/*****************************************************************************/ - -#if defined(DEBUGGING_SUPPORT) && defined(DEBUG) +#if defined(DEBUG) void Compiler::compDispScopeLists() { @@ -6044,10 +6232,6 @@ void Compiler::compDispScopeLists() } } -#endif - -#if defined(DEBUG) - void Compiler::compDispLocalVars() { printf("info.compVarScopesCount = %d\n", info.compVarScopesCount); @@ -6066,7 +6250,66 @@ void Compiler::compDispLocalVars() } } -#endif +#endif // DEBUG + +/*****************************************************************************/ + +#if MEASURE_CLRAPI_CALLS + +struct WrapICorJitInfo : public ICorJitInfo +{ + //------------------------------------------------------------------------ + // WrapICorJitInfo::makeOne: allocate an instance of WrapICorJitInfo + // + // Arguments: + // alloc - the allocator to get memory from for the instance + // compile - the compiler instance + // compHndRef - the ICorJitInfo handle from the EE; the caller's + // copy may be replaced with a "wrapper" instance + // + // Return Value: + // If the config flags indicate that ICorJitInfo should be wrapped, + // we return the "wrapper" instance; otherwise we return "nullptr". + + static WrapICorJitInfo* makeOne(ArenaAllocator* alloc, Compiler* compiler, COMP_HANDLE& compHndRef /* INOUT */) + { + WrapICorJitInfo* wrap = nullptr; + + if (JitConfig.JitEECallTimingInfo() != 0) + { + // It's too early to use the default allocator, so we do this + // in two steps to be safe (the constructor doesn't need to do + // anything except fill in the vtable pointer, so we let the + // compiler do it). + void* inst = alloc->allocateMemory(roundUp(sizeof(WrapICorJitInfo))); + if (inst != nullptr) + { + // If you get a build error here due to 'WrapICorJitInfo' being + // an abstract class, it's very likely that the wrapper bodies + // in ICorJitInfo_API_wrapper.hpp are no longer in sync with + // the EE interface; please be kind and update the header file. + wrap = new (inst, jitstd::placement_t()) WrapICorJitInfo(); + + wrap->wrapComp = compiler; + + // Save the real handle and replace it with our wrapped version. + wrap->wrapHnd = compHndRef; + compHndRef = wrap; + } + } + + return wrap; + } + +private: + Compiler* wrapComp; + COMP_HANDLE wrapHnd; // the "real thing" + +public: +#include "ICorJitInfo_API_wrapper.hpp" +}; + +#endif // MEASURE_CLRAPI_CALLS /*****************************************************************************/ @@ -6078,7 +6321,7 @@ int jitNativeCode(CORINFO_METHOD_HANDLE methodHnd, CORINFO_METHOD_INFO* methodInfo, void** methodCodePtr, ULONG* methodCodeSize, - CORJIT_FLAGS* compileFlags, + JitFlags* compileFlags, void* inlineInfoPtr) { // @@ -6093,6 +6336,10 @@ START: ArenaAllocator* pAlloc = nullptr; ArenaAllocator alloc; +#if MEASURE_CLRAPI_CALLS + WrapICorJitInfo* wrapCLR = nullptr; +#endif + if (inlineInfo) { // Use inliner's memory allocator when compiling the inlinee. @@ -6128,8 +6375,11 @@ START: CORINFO_METHOD_INFO* methodInfo; void** methodCodePtr; ULONG* methodCodeSize; - CORJIT_FLAGS* compileFlags; + JitFlags* compileFlags; InlineInfo* inlineInfo; +#if MEASURE_CLRAPI_CALLS + WrapICorJitInfo* wrapCLR; +#endif int result; } param; @@ -6145,7 +6395,10 @@ START: param.methodCodeSize = methodCodeSize; param.compileFlags = compileFlags; param.inlineInfo = inlineInfo; - param.result = result; +#if MEASURE_CLRAPI_CALLS + param.wrapCLR = nullptr; +#endif + param.result = result; setErrorTrap(compHnd, Param*, pParamOuter, ¶m) { @@ -6172,6 +6425,10 @@ START: pParam->pComp = (Compiler*)pParam->pAlloc->allocateMemory(roundUp(sizeof(*pParam->pComp))); } +#if MEASURE_CLRAPI_CALLS + pParam->wrapCLR = WrapICorJitInfo::makeOne(pParam->pAlloc, pParam->pComp, pParam->compHnd); +#endif + // push this compiler on the stack (TLS) pParam->pComp->prevCompiler = JitTls::GetCompiler(); JitTls::SetCompiler(pParam->pComp); @@ -6238,8 +6495,9 @@ START: jitFallbackCompile = true; // Update the flags for 'safer' code generation. - compileFlags->corJitFlags |= CORJIT_FLG_MIN_OPT; - compileFlags->corJitFlags &= ~(CORJIT_FLG_SIZE_OPT | CORJIT_FLG_SPEED_OPT); + compileFlags->Set(JitFlags::JIT_FLAG_MIN_OPT); + compileFlags->Clear(JitFlags::JIT_FLAG_SIZE_OPT); + compileFlags->Clear(JitFlags::JIT_FLAG_SPEED_OPT); goto START; } @@ -6952,9 +7210,12 @@ void Compiler::compDispCallArgStats(FILE* fout) // Static variables CritSecObject CompTimeSummaryInfo::s_compTimeSummaryLock; CompTimeSummaryInfo CompTimeSummaryInfo::s_compTimeSummary; +#if MEASURE_CLRAPI_CALLS +double JitTimer::s_cyclesPerSec = CycleTimer::CyclesPerSecond(); +#endif #endif // FEATURE_JIT_METHOD_PERF -#if defined(FEATURE_JIT_METHOD_PERF) || DUMP_FLOWGRAPHS +#if defined(FEATURE_JIT_METHOD_PERF) || DUMP_FLOWGRAPHS || defined(FEATURE_TRACELOGGING) const char* PhaseNames[] = { #define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) string_nm, #include "compphases.h" @@ -6983,13 +7244,36 @@ int PhaseParent[] = { }; CompTimeInfo::CompTimeInfo(unsigned byteCodeBytes) - : m_byteCodeBytes(byteCodeBytes), m_totalCycles(0), m_parentPhaseEndSlop(0), m_timerFailure(false) + : m_byteCodeBytes(byteCodeBytes) + , m_totalCycles(0) + , m_parentPhaseEndSlop(0) + , m_timerFailure(false) +#if MEASURE_CLRAPI_CALLS + , m_allClrAPIcalls(0) + , m_allClrAPIcycles(0) +#endif { for (int i = 0; i < PHASE_NUMBER_OF; i++) { m_invokesByPhase[i] = 0; m_cyclesByPhase[i] = 0; +#if MEASURE_CLRAPI_CALLS + m_CLRinvokesByPhase[i] = 0; + m_CLRcyclesByPhase[i] = 0; +#endif } + +#if MEASURE_CLRAPI_CALLS + assert(ARRAYSIZE(m_perClrAPIcalls) == API_ICorJitInfo_Names::API_COUNT); + assert(ARRAYSIZE(m_perClrAPIcycles) == API_ICorJitInfo_Names::API_COUNT); + assert(ARRAYSIZE(m_maxClrAPIcycles) == API_ICorJitInfo_Names::API_COUNT); + for (int i = 0; i < API_ICorJitInfo_Names::API_COUNT; i++) + { + m_perClrAPIcalls[i] = 0; + m_perClrAPIcycles[i] = 0; + m_maxClrAPIcycles[i] = 0; + } +#endif } bool CompTimeSummaryInfo::IncludedInFilteredData(CompTimeInfo& info) @@ -6997,52 +7281,125 @@ bool CompTimeSummaryInfo::IncludedInFilteredData(CompTimeInfo& info) return false; // info.m_byteCodeBytes < 10; } -void CompTimeSummaryInfo::AddInfo(CompTimeInfo& info) +//------------------------------------------------------------------------ +// CompTimeSummaryInfo::AddInfo: Record timing info from one compile. +// +// Arguments: +// info - The timing information to record. +// includePhases - If "true", the per-phase info in "info" is valid, +// which means that a "normal" compile has ended; if +// the value is "false" we are recording the results +// of a partial compile (typically an import-only run +// on behalf of the inliner) in which case the phase +// info is not valid and so we only record EE call +// overhead. +void CompTimeSummaryInfo::AddInfo(CompTimeInfo& info, bool includePhases) { if (info.m_timerFailure) + { return; // Don't update if there was a failure. + } CritSecHolder timeLock(s_compTimeSummaryLock); - m_numMethods++; - bool includeInFiltered = IncludedInFilteredData(info); + if (includePhases) + { + bool includeInFiltered = IncludedInFilteredData(info); - // Update the totals and maxima. - m_total.m_byteCodeBytes += info.m_byteCodeBytes; - m_maximum.m_byteCodeBytes = max(m_maximum.m_byteCodeBytes, info.m_byteCodeBytes); - m_total.m_totalCycles += info.m_totalCycles; - m_maximum.m_totalCycles = max(m_maximum.m_totalCycles, info.m_totalCycles); + m_numMethods++; - if (includeInFiltered) - { - m_numFilteredMethods++; - m_filtered.m_byteCodeBytes += info.m_byteCodeBytes; - m_filtered.m_totalCycles += info.m_totalCycles; - m_filtered.m_parentPhaseEndSlop += info.m_parentPhaseEndSlop; - } + // Update the totals and maxima. + m_total.m_byteCodeBytes += info.m_byteCodeBytes; + m_maximum.m_byteCodeBytes = max(m_maximum.m_byteCodeBytes, info.m_byteCodeBytes); + m_total.m_totalCycles += info.m_totalCycles; + m_maximum.m_totalCycles = max(m_maximum.m_totalCycles, info.m_totalCycles); + +#if MEASURE_CLRAPI_CALLS + // Update the CLR-API values. + m_total.m_allClrAPIcalls += info.m_allClrAPIcalls; + m_maximum.m_allClrAPIcalls = max(m_maximum.m_allClrAPIcalls, info.m_allClrAPIcalls); + m_total.m_allClrAPIcycles += info.m_allClrAPIcycles; + m_maximum.m_allClrAPIcycles = max(m_maximum.m_allClrAPIcycles, info.m_allClrAPIcycles); +#endif - for (int i = 0; i < PHASE_NUMBER_OF; i++) - { - m_total.m_invokesByPhase[i] += info.m_invokesByPhase[i]; - m_total.m_cyclesByPhase[i] += info.m_cyclesByPhase[i]; if (includeInFiltered) { - m_filtered.m_invokesByPhase[i] += info.m_invokesByPhase[i]; - m_filtered.m_cyclesByPhase[i] += info.m_cyclesByPhase[i]; + m_numFilteredMethods++; + m_filtered.m_byteCodeBytes += info.m_byteCodeBytes; + m_filtered.m_totalCycles += info.m_totalCycles; + m_filtered.m_parentPhaseEndSlop += info.m_parentPhaseEndSlop; + } + + for (int i = 0; i < PHASE_NUMBER_OF; i++) + { + m_total.m_invokesByPhase[i] += info.m_invokesByPhase[i]; + m_total.m_cyclesByPhase[i] += info.m_cyclesByPhase[i]; + +#if MEASURE_CLRAPI_CALLS + m_total.m_CLRinvokesByPhase[i] += info.m_CLRinvokesByPhase[i]; + m_total.m_CLRcyclesByPhase[i] += info.m_CLRcyclesByPhase[i]; +#endif + + if (includeInFiltered) + { + m_filtered.m_invokesByPhase[i] += info.m_invokesByPhase[i]; + m_filtered.m_cyclesByPhase[i] += info.m_cyclesByPhase[i]; +#if MEASURE_CLRAPI_CALLS + m_filtered.m_CLRinvokesByPhase[i] += info.m_CLRinvokesByPhase[i]; + m_filtered.m_CLRcyclesByPhase[i] += info.m_CLRcyclesByPhase[i]; +#endif + } + m_maximum.m_cyclesByPhase[i] = max(m_maximum.m_cyclesByPhase[i], info.m_cyclesByPhase[i]); + +#if MEASURE_CLRAPI_CALLS + m_maximum.m_CLRcyclesByPhase[i] = max(m_maximum.m_CLRcyclesByPhase[i], info.m_CLRcyclesByPhase[i]); +#endif } - m_maximum.m_cyclesByPhase[i] = max(m_maximum.m_cyclesByPhase[i], info.m_cyclesByPhase[i]); + m_total.m_parentPhaseEndSlop += info.m_parentPhaseEndSlop; + m_maximum.m_parentPhaseEndSlop = max(m_maximum.m_parentPhaseEndSlop, info.m_parentPhaseEndSlop); + } +#if MEASURE_CLRAPI_CALLS + else + { + m_totMethods++; + + // Update the "global" CLR-API values. + m_total.m_allClrAPIcalls += info.m_allClrAPIcalls; + m_maximum.m_allClrAPIcalls = max(m_maximum.m_allClrAPIcalls, info.m_allClrAPIcalls); + m_total.m_allClrAPIcycles += info.m_allClrAPIcycles; + m_maximum.m_allClrAPIcycles = max(m_maximum.m_allClrAPIcycles, info.m_allClrAPIcycles); + + // Update the per-phase CLR-API values. + m_total.m_invokesByPhase[PHASE_CLR_API] += info.m_allClrAPIcalls; + m_maximum.m_invokesByPhase[PHASE_CLR_API] = + max(m_maximum.m_perClrAPIcalls[PHASE_CLR_API], info.m_allClrAPIcalls); + m_total.m_cyclesByPhase[PHASE_CLR_API] += info.m_allClrAPIcycles; + m_maximum.m_cyclesByPhase[PHASE_CLR_API] = + max(m_maximum.m_cyclesByPhase[PHASE_CLR_API], info.m_allClrAPIcycles); + } + + for (int i = 0; i < API_ICorJitInfo_Names::API_COUNT; i++) + { + m_total.m_perClrAPIcalls[i] += info.m_perClrAPIcalls[i]; + m_maximum.m_perClrAPIcalls[i] = max(m_maximum.m_perClrAPIcalls[i], info.m_perClrAPIcalls[i]); + + m_total.m_perClrAPIcycles[i] += info.m_perClrAPIcycles[i]; + m_maximum.m_perClrAPIcycles[i] = max(m_maximum.m_perClrAPIcycles[i], info.m_perClrAPIcycles[i]); + + m_maximum.m_maxClrAPIcycles[i] = max(m_maximum.m_maxClrAPIcycles[i], info.m_maxClrAPIcycles[i]); } - m_total.m_parentPhaseEndSlop += info.m_parentPhaseEndSlop; - m_maximum.m_parentPhaseEndSlop = max(m_maximum.m_parentPhaseEndSlop, info.m_parentPhaseEndSlop); +#endif } // Static -LPCWSTR Compiler::compJitTimeLogFilename = NULL; +LPCWSTR Compiler::compJitTimeLogFilename = nullptr; void CompTimeSummaryInfo::Print(FILE* f) { - if (f == NULL) + if (f == nullptr) + { return; + } // Otherwise... double countsPerSec = CycleTimer::CyclesPerSecond(); if (countsPerSec == 0.0) @@ -7051,13 +7408,16 @@ void CompTimeSummaryInfo::Print(FILE* f) return; } + bool extraInfo = (JitConfig.JitEECallTimingInfo() != 0); + double totTime_ms = 0.0; + fprintf(f, "JIT Compilation time report:\n"); fprintf(f, " Compiled %d methods.\n", m_numMethods); if (m_numMethods != 0) { fprintf(f, " Compiled %d bytecodes total (%d max, %8.2f avg).\n", m_total.m_byteCodeBytes, m_maximum.m_byteCodeBytes, (double)m_total.m_byteCodeBytes / (double)m_numMethods); - double totTime_ms = ((double)m_total.m_totalCycles / countsPerSec) * 1000.0; + totTime_ms = ((double)m_total.m_totalCycles / countsPerSec) * 1000.0; fprintf(f, " Time: total: %10.3f Mcycles/%10.3f ms\n", ((double)m_total.m_totalCycles / 1000000.0), totTime_ms); fprintf(f, " max: %10.3f Mcycles/%10.3f ms\n", ((double)m_maximum.m_totalCycles) / 1000000.0, @@ -7065,15 +7425,36 @@ void CompTimeSummaryInfo::Print(FILE* f) fprintf(f, " avg: %10.3f Mcycles/%10.3f ms\n", ((double)m_total.m_totalCycles) / 1000000.0 / (double)m_numMethods, totTime_ms / (double)m_numMethods); - fprintf(f, " Total time by phases:\n"); - fprintf(f, " PHASE inv/meth Mcycles time (ms) %% of total max (ms)\n"); - fprintf(f, " --------------------------------------------------------------------------------------\n"); + const char* extraHdr1 = ""; + const char* extraHdr2 = ""; +#if MEASURE_CLRAPI_CALLS + if (extraInfo) + { + extraHdr1 = " CLRs/meth % in CLR"; + extraHdr2 = "-----------------------"; + } +#endif + + fprintf(f, "\n Total time by phases:\n"); + fprintf(f, " PHASE inv/meth Mcycles time (ms) %% of total max (ms)%s\n", + extraHdr1); + fprintf(f, " ---------------------------------------------------------------------------------------%s\n", + extraHdr2); + // Ensure that at least the names array and the Phases enum have the same number of entries: assert(sizeof(PhaseNames) / sizeof(const char*) == PHASE_NUMBER_OF); for (int i = 0; i < PHASE_NUMBER_OF; i++) { - double phase_tot_ms = (((double)m_total.m_cyclesByPhase[i]) / countsPerSec) * 1000.0; - double phase_max_ms = (((double)m_maximum.m_cyclesByPhase[i]) / countsPerSec) * 1000.0; + double phase_tot_ms = (((double)m_total.m_cyclesByPhase[i]) / countsPerSec) * 1000.0; + double phase_max_ms = (((double)m_maximum.m_cyclesByPhase[i]) / countsPerSec) * 1000.0; + double phase_tot_pct = 100.0 * phase_tot_ms / totTime_ms; + +#if MEASURE_CLRAPI_CALLS + // Skip showing CLR API call info if we didn't collect any + if (i == PHASE_CLR_API && !extraInfo) + continue; +#endif + // Indent nested phases, according to depth. int ancPhase = PhaseParent[i]; while (ancPhase != -1) @@ -7081,13 +7462,33 @@ void CompTimeSummaryInfo::Print(FILE* f) fprintf(f, " "); ancPhase = PhaseParent[ancPhase]; } - fprintf(f, " %-30s %5.2f %10.2f %9.3f %8.2f%% %8.3f\n", PhaseNames[i], + fprintf(f, " %-30s %6.2f %10.2f %9.3f %8.2f%% %8.3f", PhaseNames[i], ((double)m_total.m_invokesByPhase[i]) / ((double)m_numMethods), ((double)m_total.m_cyclesByPhase[i]) / 1000000.0, phase_tot_ms, (phase_tot_ms * 100.0 / totTime_ms), phase_max_ms); + +#if MEASURE_CLRAPI_CALLS + if (extraInfo && i != PHASE_CLR_API) + { + double nest_tot_ms = (((double)m_total.m_CLRcyclesByPhase[i]) / countsPerSec) * 1000.0; + double nest_percent = nest_tot_ms * 100.0 / totTime_ms; + double calls_per_fn = ((double)m_total.m_CLRinvokesByPhase[i]) / ((double)m_numMethods); + + if (nest_percent > 0.1 || calls_per_fn > 10) + fprintf(f, " %5.1f %8.2f%%", calls_per_fn, nest_percent); + } +#endif + fprintf(f, "\n"); + } + + // Show slop if it's over a certain percentage of the total + double pslop_pct = 100.0 * m_total.m_parentPhaseEndSlop * 1000.0 / countsPerSec / totTime_ms; + if (pslop_pct >= 1.0) + { + fprintf(f, "\n 'End phase slop' should be very small (if not, there's unattributed time): %9.3f Mcycles = " + "%3.1f%% of total.\n\n", + m_total.m_parentPhaseEndSlop / 1000000.0, pslop_pct); } - fprintf(f, "\n 'End phase slop' should be very small (if not, there's unattributed time): %9.3f Mcycles.\n", - m_total.m_parentPhaseEndSlop); } if (m_numFilteredMethods > 0) { @@ -7121,19 +7522,125 @@ void CompTimeSummaryInfo::Print(FILE* f) ((double)m_filtered.m_cyclesByPhase[i]) / 1000000.0, phase_tot_ms, (phase_tot_ms * 100.0 / totTime_ms)); } - fprintf(f, "\n 'End phase slop' should be very small (if not, there's unattributed time): %9.3f Mcycles.\n", - m_filtered.m_parentPhaseEndSlop); + + double fslop_ms = m_filtered.m_parentPhaseEndSlop * 1000.0 / countsPerSec; + if (fslop_ms > 1.0) + { + fprintf(f, + "\n 'End phase slop' should be very small (if not, there's unattributed time): %9.3f Mcycles.\n", + m_filtered.m_parentPhaseEndSlop); + } } + +#if MEASURE_CLRAPI_CALLS + if (m_total.m_allClrAPIcalls > 0 && m_total.m_allClrAPIcycles > 0) + { + fprintf(f, "\n"); + if (m_totMethods > 0) + fprintf(f, " Imported %u methods.\n\n", m_numMethods + m_totMethods); + + fprintf(f, " CLR API # calls total time max time avg time %% " + "of total\n"); + fprintf(f, " -------------------------------------------------------------------------------"); + fprintf(f, "---------------------\n"); + + static const char* APInames[] = { +#define DEF_CLR_API(name) #name, +#include "ICorJitInfo_API_names.h" + }; + + unsigned shownCalls = 0; + double shownMillis = 0.0; +#ifdef DEBUG + unsigned checkedCalls = 0; + double checkedMillis = 0.0; +#endif + + for (unsigned pass = 0; pass < 2; pass++) + { + for (unsigned i = 0; i < API_ICorJitInfo_Names::API_COUNT; i++) + { + unsigned calls = m_total.m_perClrAPIcalls[i]; + if (calls == 0) + continue; + + unsigned __int64 cycles = m_total.m_perClrAPIcycles[i]; + double millis = 1000.0 * cycles / countsPerSec; + + // Don't show the small fry to keep the results manageable + if (millis < 0.5) + { + // We always show the following API because it is always called + // exactly once for each method and its body is the simplest one + // possible (it just returns an integer constant), and therefore + // it can be used to measure the overhead of adding the CLR API + // timing code. Roughly speaking, on a 3GHz x64 box the overhead + // per call should be around 40 ns when using RDTSC, compared to + // about 140 ns when using GetThreadCycles() under Windows. + if (i != API_ICorJitInfo_Names::API_getExpectedTargetArchitecture) + continue; + } + + // In the first pass we just compute the totals. + if (pass == 0) + { + shownCalls += m_total.m_perClrAPIcalls[i]; + shownMillis += millis; + continue; + } + + unsigned __int32 maxcyc = m_maximum.m_maxClrAPIcycles[i]; + double max_ms = 1000.0 * maxcyc / countsPerSec; + + fprintf(f, " %-40s", APInames[i]); // API name + fprintf(f, " %8u %9.1f ms", calls, millis); // #calls, total time + fprintf(f, " %8.1f ms %8.1f ns", max_ms, 1000000.0 * millis / calls); // max, avg time + fprintf(f, " %5.1f%%\n", 100.0 * millis / shownMillis); // % of total + +#ifdef DEBUG + checkedCalls += m_total.m_perClrAPIcalls[i]; + checkedMillis += millis; +#endif + } + } + +#ifdef DEBUG + assert(checkedCalls == shownCalls); + assert(checkedMillis == shownMillis); +#endif + + if (shownCalls > 0 || shownMillis > 0) + { + fprintf(f, " -------------------------"); + fprintf(f, "---------------------------------------------------------------------------\n"); + fprintf(f, " Total for calls shown above %8u %10.1f ms", shownCalls, shownMillis); + if (totTime_ms > 0.0) + fprintf(f, " (%4.1lf%% of overall JIT time)", shownMillis * 100.0 / totTime_ms); + fprintf(f, "\n"); + } + fprintf(f, "\n"); + } +#endif + + fprintf(f, "\n"); } JitTimer::JitTimer(unsigned byteCodeSize) : m_info(byteCodeSize) { +#if MEASURE_CLRAPI_CALLS + m_CLRcallInvokes = 0; + m_CLRcallCycles = 0; +#endif + #ifdef DEBUG m_lastPhase = (Phases)-1; +#if MEASURE_CLRAPI_CALLS + m_CLRcallAPInum = -1; +#endif #endif unsigned __int64 threadCurCycles; - if (GetThreadCycles(&threadCurCycles)) + if (_our_GetThreadCycles(&threadCurCycles)) { m_start = threadCurCycles; m_curPhaseStart = threadCurCycles; @@ -7147,9 +7654,10 @@ void JitTimer::EndPhase(Phases phase) // assert((int)phase > (int)m_lastPhase); // We should end phases in increasing order. unsigned __int64 threadCurCycles; - if (GetThreadCycles(&threadCurCycles)) + if (_our_GetThreadCycles(&threadCurCycles)) { unsigned __int64 phaseCycles = (threadCurCycles - m_curPhaseStart); + // If this is not a leaf phase, the assumption is that the last subphase must have just recently ended. // Credit the duration to "slop", the total of which should be very small. if (PhaseHasChildren[phase]) @@ -7161,6 +7669,13 @@ void JitTimer::EndPhase(Phases phase) // It is a leaf phase. Credit duration to it. m_info.m_invokesByPhase[phase]++; m_info.m_cyclesByPhase[phase] += phaseCycles; + +#if MEASURE_CLRAPI_CALLS + // Record the CLR API timing info as well. + m_info.m_CLRinvokesByPhase[phase] += m_CLRcallInvokes; + m_info.m_CLRcyclesByPhase[phase] += m_CLRcallCycles; +#endif + // Credit the phase's ancestors, if any. int ancPhase = PhaseParent[phase]; while (ancPhase != -1) @@ -7168,8 +7683,13 @@ void JitTimer::EndPhase(Phases phase) m_info.m_cyclesByPhase[ancPhase] += phaseCycles; ancPhase = PhaseParent[ancPhase]; } - // Did we just end the last phase? - if (phase + 1 == PHASE_NUMBER_OF) + +#if MEASURE_CLRAPI_CALLS + const Phases lastPhase = PHASE_CLR_API; +#else + const Phases lastPhase = PHASE_NUMBER_OF; +#endif + if (phase + 1 == lastPhase) { m_info.m_totalCycles = (threadCurCycles - m_start); } @@ -7179,11 +7699,92 @@ void JitTimer::EndPhase(Phases phase) } } } + #ifdef DEBUG m_lastPhase = phase; #endif +#if MEASURE_CLRAPI_CALLS + m_CLRcallInvokes = 0; + m_CLRcallCycles = 0; +#endif +} + +#if MEASURE_CLRAPI_CALLS + +//------------------------------------------------------------------------ +// JitTimer::CLRApiCallEnter: Start the stopwatch for an EE call. +// +// Arguments: +// apix - The API index - an "enum API_ICorJitInfo_Names" value. +// + +void JitTimer::CLRApiCallEnter(unsigned apix) +{ + assert(m_CLRcallAPInum == -1); // Nested calls not allowed + m_CLRcallAPInum = apix; + + // If we can't get the cycles, we'll just ignore this call + if (!_our_GetThreadCycles(&m_CLRcallStart)) + m_CLRcallStart = 0; +} + +//------------------------------------------------------------------------ +// JitTimer::CLRApiCallLeave: compute / record time spent in an EE call. +// +// Arguments: +// apix - The API's "enum API_ICorJitInfo_Names" value; this value +// should match the value passed to the most recent call to +// "CLRApiCallEnter" (i.e. these must come as matched pairs), +// and they also may not nest. +// + +void JitTimer::CLRApiCallLeave(unsigned apix) +{ + // Make sure we're actually inside a measured CLR call. + assert(m_CLRcallAPInum != -1); + m_CLRcallAPInum = -1; + + // Ignore this one if we don't have a valid starting counter. + if (m_CLRcallStart != 0) + { + if (JitConfig.JitEECallTimingInfo() != 0) + { + unsigned __int64 threadCurCycles; + if (_our_GetThreadCycles(&threadCurCycles)) + { + // Compute the cycles spent in the call. + threadCurCycles -= m_CLRcallStart; + + // Add the cycles to the 'phase' and bump its use count. + m_info.m_cyclesByPhase[PHASE_CLR_API] += threadCurCycles; + m_info.m_invokesByPhase[PHASE_CLR_API] += 1; + + // Add the values to the "per API" info. + m_info.m_allClrAPIcycles += threadCurCycles; + m_info.m_allClrAPIcalls += 1; + + m_info.m_perClrAPIcalls[apix] += 1; + m_info.m_perClrAPIcycles[apix] += threadCurCycles; + m_info.m_maxClrAPIcycles[apix] = max(m_info.m_maxClrAPIcycles[apix], (unsigned __int32)threadCurCycles); + + // Subtract the cycles from the enclosing phase by bumping its start time + m_curPhaseStart += threadCurCycles; + + // Update the running totals. + m_CLRcallInvokes += 1; + m_CLRcallCycles += threadCurCycles; + } + } + + m_CLRcallStart = 0; + } + + assert(m_CLRcallAPInum != -1); // No longer in this API call. + m_CLRcallAPInum = -1; } +#endif // MEASURE_CLRAPI_CALLS + CritSecObject JitTimer::s_csvLock; LPCWSTR Compiler::JitTimeLogCsv() @@ -7195,39 +7796,38 @@ LPCWSTR Compiler::JitTimeLogCsv() void JitTimer::PrintCsvHeader() { LPCWSTR jitTimeLogCsv = Compiler::JitTimeLogCsv(); - if (jitTimeLogCsv == NULL) + if (jitTimeLogCsv == nullptr) { return; } CritSecHolder csvLock(s_csvLock); - FILE* fp = _wfopen(jitTimeLogCsv, W("r")); - if (fp == nullptr) + FILE* fp = _wfopen(jitTimeLogCsv, W("a")); + if (fp != nullptr) { - // File doesn't exist, so create it and write the header - - // Use write mode, so we rewrite the file, and retain only the last compiled process/dll. - // Ex: ngen install mscorlib won't print stats for "ngen" but for "mscorsvw" - FILE* fp = _wfopen(jitTimeLogCsv, W("w")); - fprintf(fp, "\"Method Name\","); - fprintf(fp, "\"Method Index\","); - fprintf(fp, "\"IL Bytes\","); - fprintf(fp, "\"Basic Blocks\","); - fprintf(fp, "\"Opt Level\","); - fprintf(fp, "\"Loops Cloned\","); - - for (int i = 0; i < PHASE_NUMBER_OF; i++) + // Write the header if the file is empty + if (ftell(fp) == 0) { - fprintf(fp, "\"%s\",", PhaseNames[i]); - } + fprintf(fp, "\"Method Name\","); + fprintf(fp, "\"Method Index\","); + fprintf(fp, "\"IL Bytes\","); + fprintf(fp, "\"Basic Blocks\","); + fprintf(fp, "\"Opt Level\","); + fprintf(fp, "\"Loops Cloned\","); - InlineStrategy::DumpCsvHeader(fp); + for (int i = 0; i < PHASE_NUMBER_OF; i++) + { + fprintf(fp, "\"%s\",", PhaseNames[i]); + } - fprintf(fp, "\"Total Cycles\","); - fprintf(fp, "\"CPS\"\n"); + InlineStrategy::DumpCsvHeader(fp); + + fprintf(fp, "\"Total Cycles\","); + fprintf(fp, "\"CPS\"\n"); + } + fclose(fp); } - fclose(fp); } extern ICorJitHost* g_jitHost; @@ -7235,7 +7835,7 @@ extern ICorJitHost* g_jitHost; void JitTimer::PrintCsvMethodStats(Compiler* comp) { LPCWSTR jitTimeLogCsv = Compiler::JitTimeLogCsv(); - if (jitTimeLogCsv == NULL) + if (jitTimeLogCsv == nullptr) { return; } @@ -7265,7 +7865,9 @@ void JitTimer::PrintCsvMethodStats(Compiler* comp) for (int i = 0; i < PHASE_NUMBER_OF; i++) { if (!PhaseHasChildren[i]) + { totCycles += m_info.m_cyclesByPhase[i]; + } fprintf(fp, "%I64u,", m_info.m_cyclesByPhase[i]); } @@ -7277,23 +7879,14 @@ void JitTimer::PrintCsvMethodStats(Compiler* comp) } // Completes the timing of the current method, and adds it to "sum". -void JitTimer::Terminate(Compiler* comp, CompTimeSummaryInfo& sum) +void JitTimer::Terminate(Compiler* comp, CompTimeSummaryInfo& sum, bool includePhases) { -#ifdef DEBUG - unsigned __int64 totCycles2 = 0; - for (int i = 0; i < PHASE_NUMBER_OF; i++) + if (includePhases) { - if (!PhaseHasChildren[i]) - totCycles2 += m_info.m_cyclesByPhase[i]; + PrintCsvMethodStats(comp); } - // We include m_parentPhaseEndSlop in the next phase's time also (we probably shouldn't) - // totCycles2 += m_info.m_parentPhaseEndSlop; - assert(totCycles2 == m_info.m_totalCycles); -#endif - - PrintCsvMethodStats(comp); - sum.AddInfo(m_info); + sum.AddInfo(m_info, includePhases); } #endif // FEATURE_JIT_METHOD_PERF @@ -7331,6 +7924,10 @@ void Compiler::MemStats::PrintByKind(FILE* f) void Compiler::AggregateMemStats::Print(FILE* f) { fprintf(f, "For %9u methods:\n", nMethods); + if (nMethods == 0) + { + return; + } fprintf(f, " count: %12u (avg %7u per method)\n", allocCnt, allocCnt / nMethods); fprintf(f, " alloc size : %12llu (avg %7llu per method)\n", allocSz, allocSz / nMethods); fprintf(f, " max alloc : %12llu\n", allocSzMax); @@ -8520,6 +9117,9 @@ int cTreeFlagsIR(Compiler* comp, GenTree* tree) break; case GT_MUL: +#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) + case GT_MUL_LONG: +#endif if (tree->gtFlags & GTF_MUL_64RSLT) { @@ -10124,11 +10724,6 @@ void cNodeIR(Compiler* comp, GenTree* tree) } break; - case GT_STORE_CLS_VAR: - - chars += printf(" ???"); - break; - case GT_LEA: GenTreeAddrMode* lea = tree->AsAddrMode(); diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 05047c5ecb..d8cd491063 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -691,12 +691,21 @@ public: // is now TYP_INT in the local variable table. It's not really unused, because it's in the tree. assert(varTypeIsStruct(lvType) || (lvType == TYP_BLK) || (lvPromoted && lvUnusedStruct)); + +#if defined(FEATURE_SIMD) && !defined(_TARGET_64BIT_) + // For 32-bit architectures, we make local variable SIMD12 types 16 bytes instead of just 12. We can't do + // this for arguments, which must be passed according the defined ABI. + if ((lvType == TYP_SIMD12) && !lvIsParam) + { + assert(lvExactSize == 12); + return 16; + } +#endif // defined(FEATURE_SIMD) && !defined(_TARGET_64BIT_) + return (unsigned)(roundUp(lvExactSize, TARGET_POINTER_SIZE)); } -#if defined(DEBUGGING_SUPPORT) || defined(DEBUG) unsigned lvSlotNum; // original slot # (if remapped) -#endif typeInfo lvVerTypeInfo; // type info needed for verification @@ -926,6 +935,14 @@ extern const char* PhaseNames[]; extern const char* PhaseEnums[]; extern const LPCWSTR PhaseShortNames[]; +// The following enum provides a simple 1:1 mapping to CLR API's +enum API_ICorJitInfo_Names +{ +#define DEF_CLR_API(name) API_##name, +#include "ICorJitInfo_API_names.h" + API_COUNT +}; + //--------------------------------------------------------------- // Compilation time. // @@ -949,6 +966,10 @@ struct CompTimeInfo unsigned __int64 m_totalCycles; unsigned __int64 m_invokesByPhase[PHASE_NUMBER_OF]; unsigned __int64 m_cyclesByPhase[PHASE_NUMBER_OF]; +#if MEASURE_CLRAPI_CALLS + unsigned __int64 m_CLRinvokesByPhase[PHASE_NUMBER_OF]; + unsigned __int64 m_CLRcyclesByPhase[PHASE_NUMBER_OF]; +#endif // For better documentation, we call EndPhase on // non-leaf phases. We should also call EndPhase on the // last leaf subphase; obviously, the elapsed cycles between the EndPhase @@ -960,12 +981,25 @@ struct CompTimeInfo unsigned __int64 m_parentPhaseEndSlop; bool m_timerFailure; +#if MEASURE_CLRAPI_CALLS + // The following measures the time spent inside each individual CLR API call. + unsigned m_allClrAPIcalls; + unsigned m_perClrAPIcalls[API_ICorJitInfo_Names::API_COUNT]; + unsigned __int64 m_allClrAPIcycles; + unsigned __int64 m_perClrAPIcycles[API_ICorJitInfo_Names::API_COUNT]; + unsigned __int32 m_maxClrAPIcycles[API_ICorJitInfo_Names::API_COUNT]; +#endif // MEASURE_CLRAPI_CALLS + CompTimeInfo(unsigned byteCodeBytes); #endif }; #ifdef FEATURE_JIT_METHOD_PERF +#if MEASURE_CLRAPI_CALLS +struct WrapICorJitInfo; +#endif + // This class summarizes the JIT time information over the course of a run: the number of methods compiled, // and the total and maximum timings. (These are instances of the "CompTimeInfo" type described above). // The operation of adding a single method's timing to the summary may be performed concurrently by several @@ -977,6 +1011,7 @@ class CompTimeSummaryInfo static CritSecObject s_compTimeSummaryLock; int m_numMethods; + int m_totMethods; CompTimeInfo m_total; CompTimeInfo m_maximum; @@ -996,13 +1031,14 @@ public: // This is the unique CompTimeSummaryInfo object for this instance of the runtime. static CompTimeSummaryInfo s_compTimeSummary; - CompTimeSummaryInfo() : m_numMethods(0), m_total(0), m_maximum(0), m_numFilteredMethods(0), m_filtered(0) + CompTimeSummaryInfo() + : m_numMethods(0), m_totMethods(0), m_total(0), m_maximum(0), m_numFilteredMethods(0), m_filtered(0) { } // Assumes that "info" is a completed CompTimeInfo for a compilation; adds it to the summary. // This is thread safe. - void AddInfo(CompTimeInfo& info); + void AddInfo(CompTimeInfo& info, bool includePhases); // Print the summary information to "f". // This is not thread-safe; assumed to be called by only one thread. @@ -1017,6 +1053,13 @@ class JitTimer { unsigned __int64 m_start; // Start of the compilation. unsigned __int64 m_curPhaseStart; // Start of the current phase. +#if MEASURE_CLRAPI_CALLS + unsigned __int64 m_CLRcallStart; // Start of the current CLR API call (if any). + unsigned __int64 m_CLRcallInvokes; // CLR API invokes under current outer so far + unsigned __int64 m_CLRcallCycles; // CLR API cycles under current outer so far. + int m_CLRcallAPInum; // The enum/index of the current CLR API call (or -1). + static double s_cyclesPerSec; // Cached for speedier measurements +#endif #ifdef DEBUG Phases m_lastPhase; // The last phase that was completed (or (Phases)-1 to start). #endif @@ -1045,9 +1088,15 @@ public: // Ends the current phase (argument is for a redundant check). void EndPhase(Phases phase); +#if MEASURE_CLRAPI_CALLS + // Start and end a timed CLR API call. + void CLRApiCallEnter(unsigned apix); + void CLRApiCallLeave(unsigned apix); +#endif // MEASURE_CLRAPI_CALLS + // Completes the timing of the current method, which is assumed to have "byteCodeBytes" bytes of bytecode, // and adds it to "sum". - void Terminate(Compiler* comp, CompTimeSummaryInfo& sum); + void Terminate(Compiler* comp, CompTimeSummaryInfo& sum, bool includePhases); // Attempts to query the cycle counter of the current thread. If successful, returns "true" and sets // *cycles to the cycle counter value. Otherwise, returns false and sets the "m_timerFailure" flag of @@ -1164,7 +1213,13 @@ struct fgArgTabEntry regNumber otherRegNum; // The (second) register to use when passing this argument. SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; -#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#elif defined(_TARGET_X86_) + __declspec(property(get = getIsStruct)) bool isStruct; + bool getIsStruct() + { + return varTypeIsStruct(node); + } +#endif // _TARGET_X86_ #ifdef _TARGET_ARM_ void SetIsHfaRegArg(bool hfaRegArg) @@ -1293,6 +1348,10 @@ public: { return hasStackArgs; } + bool AreArgsComplete() const + { + return argsComplete; + } }; #ifdef DEBUG @@ -1939,8 +1998,6 @@ public: GenTreeArgList* gtNewArgList(GenTreePtr op1, GenTreePtr op2); GenTreeArgList* gtNewArgList(GenTreePtr op1, GenTreePtr op2, GenTreePtr op3); - GenTreeArgList* gtNewAggregate(GenTree* element); - static fgArgTabEntryPtr gtArgEntryByArgNum(GenTreePtr call, unsigned argNum); static fgArgTabEntryPtr gtArgEntryByNode(GenTreePtr call, GenTreePtr node); fgArgTabEntryPtr gtArgEntryByLateArgIndex(GenTreePtr call, unsigned lateArgInx); @@ -1975,7 +2032,18 @@ public: GenTreePtr gtClone(GenTree* tree, bool complexOK = false); - GenTreePtr gtCloneExpr(GenTree* tree, unsigned addFlags = 0, unsigned varNum = (unsigned)-1, int varVal = 0); + // If `tree` is a lclVar with lclNum `varNum`, return an IntCns with value `varVal`; otherwise, + // create a copy of `tree`, adding specified flags, replacing uses of lclVar `deepVarNum` with + // IntCnses with value `deepVarVal`. + GenTreePtr gtCloneExpr( + GenTree* tree, unsigned addFlags, unsigned varNum, int varVal, unsigned deepVarNum, int deepVarVal); + + // Create a copy of `tree`, optionally adding specifed flags, and optionally mapping uses of local + // `varNum` to int constants with value `varVal`. + GenTreePtr gtCloneExpr(GenTree* tree, unsigned addFlags = 0, unsigned varNum = (unsigned)-1, int varVal = 0) + { + return gtCloneExpr(tree, addFlags, varNum, varVal, varNum, varVal); + } GenTreePtr gtReplaceTree(GenTreePtr stmt, GenTreePtr tree, GenTreePtr replacementTree); @@ -1997,7 +2065,7 @@ public: unsigned gtHashValue(GenTree* tree); - unsigned gtSetListOrder(GenTree* list, bool regs); + unsigned gtSetListOrder(GenTree* list, bool regs, bool isListCallArgs); void gtWalkOp(GenTree** op1, GenTree** op2, GenTree* adr, bool constOnly); @@ -2277,7 +2345,8 @@ public: DNER_VMNeedsStackAddr, DNER_LiveInOutOfHandler, DNER_LiveAcrossUnmanagedCall, - DNER_BlockOp, // Is read or written via a block operation that explicitly takes the address. + DNER_BlockOp, // Is read or written via a block operation that explicitly takes the address. + DNER_IsStructArg, // Is a struct passed as an argument in a way that requires a stack location. #ifdef JIT32_GCENCODER DNER_PinningRef, #endif @@ -2439,7 +2508,6 @@ public: void lvaInit(); - unsigned lvaArgSize(const void* argTok); unsigned lvaLclSize(unsigned varNum); unsigned lvaLclExactSize(unsigned varNum); @@ -2712,9 +2780,10 @@ protected: void impImportNewObjArray(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_CALL_INFO* pCallInfo); - bool impCanPInvokeInline(var_types callRetTyp); - bool impCanPInvokeInlineCallSite(var_types callRetTyp); - void impCheckForPInvokeCall(GenTreePtr call, CORINFO_METHOD_HANDLE methHnd, CORINFO_SIG_INFO* sig, unsigned mflags); + bool impCanPInvokeInline(BasicBlock* block); + bool impCanPInvokeInlineCallSite(BasicBlock* block); + void impCheckForPInvokeCall( + GenTreePtr call, CORINFO_METHOD_HANDLE methHnd, CORINFO_SIG_INFO* sig, unsigned mflags, BasicBlock* block); GenTreePtr impImportIndirectCall(CORINFO_SIG_INFO* sig, IL_OFFSETX ilOffset = BAD_IL_OFFSET); void impPopArgsForUnmanagedCall(GenTreePtr call, CORINFO_SIG_INFO* sig); @@ -2739,8 +2808,6 @@ protected: GenTreePtr impFixupCallStructReturn(GenTreePtr call, CORINFO_CLASS_HANDLE retClsHnd); - GenTreePtr impInitCallLongReturn(GenTreePtr call); - GenTreePtr impFixupStructReturnType(GenTreePtr op, CORINFO_CLASS_HANDLE retClsHnd); #ifdef DEBUG @@ -2764,7 +2831,6 @@ protected: void impImportLeave(BasicBlock* block); void impResetLeaveBlock(BasicBlock* block, unsigned jmpAddr); - BOOL impLocAllocOnStack(); GenTreePtr impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig, @@ -2868,6 +2934,8 @@ public: unsigned flags, void* compileTimeHandle); + GenTreePtr getRuntimeContextTree(CORINFO_RUNTIME_LOOKUP_KIND kind); + GenTreePtr impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_LOOKUP* pLookup, void* compileTimeHandle); @@ -3148,8 +3216,6 @@ private: static LONG jitNestingLevel; #endif // DEBUG - bool seenConditionalJump; - static BOOL impIsAddressInLocal(GenTreePtr tree, GenTreePtr* lclVarTreeOut); void impMakeDiscretionaryInlineObservations(InlineInfo* pInlineInfo, InlineResult* inlineResult); @@ -3455,8 +3521,9 @@ public: void fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loadw); void fgMorphBlocks(); - bool fgMorphBlockStmt(BasicBlock* block, GenTreePtr stmt DEBUGARG(const char* msg)); + bool fgMorphBlockStmt(BasicBlock* block, GenTreeStmt* stmt DEBUGARG(const char* msg)); + void fgCheckArgCnt(); void fgSetOptions(); #ifdef DEBUG @@ -3845,7 +3912,7 @@ public: // var_types getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, structPassingKind* wbPassStruct = nullptr, - unsigned structSize = 0); + unsigned structSize = 0); #ifdef DEBUG // Print a representation of "vnp" or "vn" on standard output. @@ -4072,7 +4139,7 @@ public: void fgUnreachableBlock(BasicBlock* block); - void fgRemoveJTrue(BasicBlock* block); + void fgRemoveConditionalJump(BasicBlock* block); BasicBlock* fgLastBBInMainFunction(); @@ -4204,6 +4271,7 @@ public: void fgDebugCheckLinks(bool morphTrees = false); void fgDebugCheckNodeLinks(BasicBlock* block, GenTreePtr stmt); void fgDebugCheckFlags(GenTreePtr tree); + void fgDebugCheckFlagsHelper(GenTreePtr tree, unsigned treeFlags, unsigned chkFlags); #endif #ifdef LEGACY_BACKEND @@ -4305,7 +4373,7 @@ protected: void fgLinkBasicBlocks(); - void fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* jumpTarget); + unsigned fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* jumpTarget); void fgCheckBasicBlockControlFlow(); @@ -4380,13 +4448,6 @@ private: GenTree* fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType = nullptr); GenTree* fgMakeMultiUse(GenTree** ppTree); - // After replacing oldChild with newChild, fixup the fgArgTabEntryPtr - // if it happens to be an argument to a call. - void fgFixupIfCallArg(ArrayStack<GenTree*>* parentStack, GenTree* oldChild, GenTree* newChild); - -public: - void fgFixupArgTabEntryPtr(GenTreePtr parentCall, GenTreePtr oldArg, GenTreePtr newArg); - private: // Recognize a bitwise rotation pattern and convert into a GT_ROL or a GT_ROR node. GenTreePtr fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree); @@ -4440,16 +4501,11 @@ private: // for sufficiently small offsets, we can rely on OS page protection to implicitly null-check addresses that we // know will be dereferenced. To know that reliance on implicit null checking is sound, we must further know that // all offsets between the top-level indirection and the bottom are constant, and that their sum is sufficiently - // small; hence the other fields of MorphAddrContext. Finally, the odd structure of GT_COPYBLK, in which the second - // argument is a GT_LIST, requires us to "tell" that List node that its parent is a GT_COPYBLK, so it "knows" that - // each of its arguments should be evaluated in MACK_Ind contexts. (This would not be true for GT_LIST nodes - // representing method call argument lists.) + // small; hence the other fields of MorphAddrContext. enum MorphAddrContextKind { MACK_Ind, MACK_Addr, - MACK_CopyBlock, // This is necessary so we know we have to start a new "Ind" context for each of the - // addresses in the arg list. }; struct MorphAddrContext { @@ -4513,7 +4569,7 @@ private: void fgMorphCallInline(GenTreeCall* call, InlineResult* result); void fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result); #if DEBUG - void fgNoteNonInlineCandidate(GenTreePtr tree, GenTreeCall* call); + void fgNoteNonInlineCandidate(GenTreeStmt* stmt, GenTreeCall* call); static fgWalkPreFn fgFindNonInlineCandidate; #endif GenTreePtr fgOptimizeDelegateConstructor(GenTreePtr call, CORINFO_CONTEXT_HANDLE* ExactContextHnd); @@ -4525,16 +4581,14 @@ private: GenTreePtr fgMorphGetStructAddr(GenTreePtr* pTree, CORINFO_CLASS_HANDLE clsHnd, bool isRValue = false); GenTreePtr fgMorphBlkNode(GenTreePtr tree, bool isDest); GenTreePtr fgMorphBlockOperand(GenTreePtr tree, var_types asgType, unsigned blockWidth, bool isDest); + void fgMorphUnsafeBlk(GenTreeObj* obj); GenTreePtr fgMorphCopyBlock(GenTreePtr tree); GenTreePtr fgMorphForRegisterFP(GenTreePtr tree); GenTreePtr fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac = nullptr); GenTreePtr fgMorphSmpOpPre(GenTreePtr tree); - GenTreePtr fgMorphDivByConst(GenTreeOp* tree); - GenTreePtr fgMorphModByConst(GenTreeOp* tree); GenTreePtr fgMorphModToSubMulDiv(GenTreeOp* tree); GenTreePtr fgMorphSmpOpOptional(GenTreeOp* tree); GenTreePtr fgMorphRecognizeBoxNullable(GenTree* compare); - bool fgShouldUseMagicNumberDivide(GenTreeOp* tree); GenTreePtr fgMorphToEmulatedFP(GenTreePtr tree); GenTreePtr fgMorphConst(GenTreePtr tree); @@ -4544,11 +4598,12 @@ public: private: #if LOCAL_ASSERTION_PROP + void fgKillDependentAssertionsSingle(unsigned lclNum DEBUGARG(GenTreePtr tree)); void fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTreePtr tree)); #endif void fgMorphTreeDone(GenTreePtr tree, GenTreePtr oldTree = nullptr DEBUGARG(int morphNum = 0)); - GenTreePtr fgMorphStmt; + GenTreeStmt* fgMorphStmt; unsigned fgGetBigOffsetMorphingTemp(var_types type); // We cache one temp per type to be // used when morphing big offset. @@ -4564,7 +4619,6 @@ private: void fgMarkUseDef(GenTreeLclVarCommon* tree, GenTree* asgdLclVar = nullptr); -#ifdef DEBUGGING_SUPPORT void fgBeginScopeLife(VARSET_TP* inScope, VarScopeDsc* var); void fgEndScopeLife(VARSET_TP* inScope, VarScopeDsc* var); @@ -4578,8 +4632,6 @@ private: void fgDispDebugScopes(); #endif // DEBUG -#endif // DEBUGGING_SUPPORT - //------------------------------------------------------------------------- // // The following keeps track of any code we've added for things like array @@ -4622,6 +4674,7 @@ private: void fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* result); void fgInsertInlineeBlocks(InlineInfo* pInlineInfo); GenTreePtr fgInlinePrependStatements(InlineInfo* inlineInfo); + void fgInlineAppendStatements(InlineInfo* inlineInfo, BasicBlock* block, GenTreePtr stmt); #if FEATURE_MULTIREG_RET GenTreePtr fgGetStructAsStructPtr(GenTreePtr tree); @@ -4905,6 +4958,7 @@ public: #define LPFLG_VAR_LIMIT 0x0100 // iterator is compared with a local var (var # found in lpVarLimit) #define LPFLG_CONST_LIMIT 0x0200 // iterator is compared with a constant (found in lpConstLimit) #define LPFLG_ARRLEN_LIMIT 0x0400 // iterator is compared with a.len or a[i].len (found in lpArrLenLimit) +#define LPFLG_SIMD_LIMIT 0x0080 // iterator is compared with Vector<T>.Count (found in lpConstLimit) #define LPFLG_HAS_PREHEAD 0x0800 // lpHead is known to be a preHead for this loop #define LPFLG_REMOVED 0x1000 // has been removed from the loop table (unrolled or optimized away) @@ -5205,6 +5259,11 @@ protected: static const int MIN_CSE_COST = 2; + // Keeps tracked cse indices + BitVecTraits* cseTraits; + EXPSET_TP cseFull; + EXPSET_TP cseEmpty; + /* Generic list of nodes - used by the CSE logic */ struct treeLst @@ -6237,7 +6296,7 @@ public: BOOL eeIsValueClass(CORINFO_CLASS_HANDLE clsHnd); -#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD) +#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD) || defined(TRACK_LSRA_STATS) bool IsSuperPMIException(unsigned code) { @@ -6334,10 +6393,19 @@ public: #endif } + inline bool IsTargetAbi(CORINFO_RUNTIME_ABI abi) + { +#if COR_JIT_EE_VERSION > 460 + return eeGetEEInfo()->targetAbi == abi; +#else + return CORINFO_DESKTOP_ABI == abi; +#endif + } + inline bool generateCFIUnwindCodes() { -#if COR_JIT_EE_VERSION > 460 && defined(UNIX_AMD64_ABI) - return eeGetEEInfo()->targetAbi == CORINFO_CORERT_ABI; +#ifdef UNIX_AMD64_ABI + return IsTargetAbi(CORINFO_CORERT_ABI); #else return false; #endif @@ -6522,8 +6590,6 @@ private: public: CodeGenInterface* codeGen; -#ifdef DEBUGGING_SUPPORT - // The following holds information about instr offsets in terms of generated code. struct IPmappingDsc @@ -6553,7 +6619,6 @@ public: typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, IL_OFFSETX, JitSimplerHashBehavior> CallSiteILOffsetTable; CallSiteILOffsetTable* genCallSite2ILOffsetMap; -#endif // DEBUGGING_SUPPORT unsigned genReturnLocal; // Local number for the return value when applicable. BasicBlock* genReturnBB; // jumped to when not optimizing for speed. @@ -6588,8 +6653,14 @@ public: { return codeGen->doDoubleAlign(); } - DWORD getCanDoubleAlign(); // Defined & used only by RegAlloc -#endif // DOUBLE_ALIGN + DWORD getCanDoubleAlign(); + bool shouldDoubleAlign(unsigned refCntStk, + unsigned refCntReg, + unsigned refCntWtdReg, + unsigned refCntStkParam, + unsigned refCntWtdStkDbl); +#endif // DOUBLE_ALIGN + __declspec(property(get = getFullPtrRegMap, put = setFullPtrRegMap)) bool genFullPtrRegMap; bool getFullPtrRegMap() { @@ -6829,6 +6900,11 @@ private: return InstructionSet_AVX; } + if (CanUseSSE3_4()) + { + return InstructionSet_SSE3_4; + } + // min bar is SSE2 assert(canUseSSE2()); return InstructionSet_SSE2; @@ -7072,7 +7148,7 @@ private: // and small int base type vectors. SIMDIntrinsicID impSIMDIntegralRelOpGreaterThanOrEqual( CORINFO_CLASS_HANDLE typeHnd, unsigned simdVectorSize, var_types baseType, GenTree** op1, GenTree** op2); -#endif // defined(_TARGET_AMD64_) && !defined(LEGACY_BACKEND) +#endif // defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND) void setLclRelatedToSIMDIntrinsic(GenTreePtr tree); bool areFieldsContiguous(GenTreePtr op1, GenTreePtr op2); @@ -7261,6 +7337,16 @@ private: // Returns true if the TYP_SIMD locals on stack are aligned at their // preferred byte boundary specified by getSIMDTypeAlignment(). + // + // As per the Intel manual, the preferred alignment for AVX vectors is 32-bytes. On Amd64, + // RSP/EBP is aligned at 16-bytes, therefore to align SIMD types at 32-bytes we need even + // RSP/EBP to be 32-byte aligned. It is not clear whether additional stack space used in + // aligning stack is worth the benefit and for now will use 16-byte alignment for AVX + // 256-bit vectors with unaligned load/stores to/from memory. On x86, the stack frame + // is aligned to 4 bytes. We need to extend existing support for double (8-byte) alignment + // to 16 or 32 byte alignment for frames with local SIMD vars, if that is determined to be + // profitable. + // bool isSIMDTypeLocalAligned(unsigned varNum) { #if defined(FEATURE_SIMD) && ALIGN_SIMD_TYPES @@ -7270,8 +7356,7 @@ private: int off = lvaFrameAddress(varNum, &ebpBased); // TODO-Cleanup: Can't this use the lvExactSize on the varDsc? int alignment = getSIMDTypeAlignment(lvaTable[varNum].lvType); - bool isAligned = ((off % alignment) == 0); - noway_assert(isAligned || lvaTable[varNum].lvIsParam); + bool isAligned = (alignment <= STACK_ALIGN) && ((off % alignment) == 0); return isAligned; } #endif // FEATURE_SIMD @@ -7289,6 +7374,16 @@ private: #endif } + // Whether SSE3, SSE3, SSE4.1 and SSE4.2 is available + bool CanUseSSE3_4() const + { +#ifdef _TARGET_XARCH_ + return opts.compCanUseSSE3_4; +#else + return false; +#endif + } + bool canUseAVX() const { #ifdef FEATURE_AVX_SUPPORT @@ -7393,21 +7488,21 @@ public: struct Options { - CORJIT_FLAGS* jitFlags; // all flags passed from the EE - unsigned eeFlags; // CorJitFlag flags passed from the EE - unsigned compFlags; // method attributes + JitFlags* jitFlags; // all flags passed from the EE + unsigned compFlags; // method attributes codeOptimize compCodeOpt; // what type of code optimizations bool compUseFCOMI; bool compUseCMOV; #ifdef _TARGET_XARCH_ - bool compCanUseSSE2; // Allow CodeGen to use "movq XMM" instructions + bool compCanUseSSE2; // Allow CodeGen to use "movq XMM" instructions + bool compCanUseSSE3_4; // Allow CodeGen to use SSE3, SSSE3, SSE4.1 and SSE4.2 instructions #ifdef FEATURE_AVX_SUPPORT bool compCanUseAVX; // Allow CodeGen to use AVX 256-bit vectors for SIMD operations -#endif -#endif +#endif // FEATURE_AVX_SUPPORT +#endif // _TARGET_XARCH_ // optimize maximally and/or favor speed over size? @@ -7464,7 +7559,7 @@ public: #ifdef FEATURE_READYTORUN_COMPILER inline bool IsReadyToRun() { - return (eeFlags & CORJIT_FLG_READYTORUN) != 0; + return jitFlags->IsSet(JitFlags::JIT_FLAG_READYTORUN); } #else inline bool IsReadyToRun() @@ -7478,7 +7573,7 @@ public: inline bool ShouldUsePInvokeHelpers() { #if COR_JIT_EE_VERSION > 460 - return (jitFlags->corJitFlags2 & CORJIT_FLG2_USE_PINVOKE_HELPERS) != 0; + return jitFlags->IsSet(JitFlags::JIT_FLAG_USE_PINVOKE_HELPERS); #else return false; #endif @@ -7489,7 +7584,7 @@ public: inline bool IsReversePInvoke() { #if COR_JIT_EE_VERSION > 460 - return (jitFlags->corJitFlags2 & CORJIT_FLG2_REVERSE_PINVOKE) != 0; + return jitFlags->IsSet(JitFlags::JIT_FLAG_REVERSE_PINVOKE); #else return false; #endif @@ -7499,7 +7594,7 @@ public: inline bool IsJit32Compat() { #if defined(_TARGET_X86_) && COR_JIT_EE_VERSION > 460 - return (jitFlags->corJitFlags2 & CORJIT_FLG2_DESKTOP_QUIRKS) != 0; + return jitFlags->IsSet(JitFlags::JIT_FLAG_DESKTOP_QUIRKS); #else return false; #endif @@ -7509,7 +7604,7 @@ public: inline bool IsJit64Compat() { #if defined(_TARGET_AMD64_) && COR_JIT_EE_VERSION > 460 - return (jitFlags->corJitFlags2 & CORJIT_FLG2_DESKTOP_QUIRKS) != 0; + return jitFlags->IsSet(JitFlags::JIT_FLAG_DESKTOP_QUIRKS); #elif defined(_TARGET_AMD64_) && !defined(FEATURE_CORECLR) return true; #else @@ -7517,14 +7612,10 @@ public: #endif } -#ifdef DEBUGGING_SUPPORT bool compScopeInfo; // Generate the LocalVar info ? bool compDbgCode; // Generate debugger-friendly code? bool compDbgInfo; // Gather debugging info? bool compDbgEnC; -#else - static const bool compDbgCode; -#endif #ifdef PROFILING_SUPPORTED bool compNoPInvokeInlineCB; @@ -7584,6 +7675,7 @@ public: bool altJit; // True if we are an altjit and are compiling this method #ifdef DEBUG + bool optRepeat; // Repeat optimizer phases k times bool compProcedureSplittingEH; // Separate cold code from hot code for functions with EH bool dspCode; // Display native code generated bool dspEHTable; // Display the EH table reported to the VM @@ -7623,9 +7715,11 @@ public: // for any call. We have a plan for not needing for stubs though bool compNeedStackProbes; - // Whether to emit Enter/Leave/TailCall hooks using a dummy stub (DummyProfilerELTStub()) - // This options helps one to make JIT behave as if it is under profiler. +#ifdef PROFILING_SUPPORTED + // Whether to emit Enter/Leave/TailCall hooks using a dummy stub (DummyProfilerELTStub()). + // This option helps make the JIT behave as if it is running under a profiler. bool compJitELTHookEnabled; +#endif // PROFILING_SUPPORTED #if FEATURE_TAILCALL_OPT // Whether opportunistic or implicit tail call optimization is enabled. @@ -7650,8 +7744,6 @@ public: #ifdef DEBUG - static bool s_dspMemStats; // Display per-phase memory statistics for every function - template <typename T> T dspPtr(T p) { @@ -7759,8 +7851,8 @@ public: codeOptimize compCodeOpt() { #if 0 - // Switching between size & speed has measurable throughput impact - // (3.5% on NGen mscorlib when measured). It used to be enabled for + // Switching between size & speed has measurable throughput impact + // (3.5% on NGen mscorlib when measured). It used to be enabled for // DEBUG, but should generate identical code between CHK & RET builds, // so that's not acceptable. // TODO-Throughput: Figure out what to do about size vs. speed & throughput. @@ -7772,10 +7864,6 @@ public: #endif } -#ifdef DEBUG - CLRRandom* inlRNG; -#endif - //--------------------- Info about the procedure -------------------------- struct Info @@ -7855,8 +7943,6 @@ public: // and the VM expects that, or the JIT is a "self-host" compiler // (e.g., x86 hosted targeting x86) and the VM expects that. -#if defined(DEBUGGING_SUPPORT) || defined(DEBUG) - /* The following holds IL scope information about local variables. */ @@ -7871,8 +7957,6 @@ public: unsigned compStmtOffsetsCount; ICorDebugInfo::BoundaryTypes compStmtOffsetsImplicit; -#endif // DEBUGGING_SUPPORT || DEBUG - #define CPU_X86 0x0100 // The generic X86 CPU #define CPU_X86_PENTIUM_4 0x0110 @@ -7937,9 +8021,12 @@ public: // Such method's compRetNativeType is TYP_STRUCT without a hidden RetBufArg return varTypeIsStruct(info.compRetNativeType) && (info.compRetBuffArg == BAD_VAR_NUM); #endif // TARGET_XXX + #else // not FEATURE_MULTIREG_RET + // For this architecture there are no multireg returns return false; + #endif // FEATURE_MULTIREG_RET } @@ -7960,7 +8047,7 @@ public: void compDispLocalVars(); -#endif // DEBUGGING_SUPPORT || DEBUG +#endif // DEBUG //-------------------------- Global Compiler Data ------------------------------------ @@ -8059,19 +8146,22 @@ public: CORINFO_METHOD_INFO* methodInfo, void** methodCodePtr, ULONG* methodCodeSize, - CORJIT_FLAGS* compileFlags); + JitFlags* compileFlags); void compCompileFinish(); int compCompileHelper(CORINFO_MODULE_HANDLE classPtr, COMP_HANDLE compHnd, CORINFO_METHOD_INFO* methodInfo, void** methodCodePtr, ULONG* methodCodeSize, - CORJIT_FLAGS* compileFlags, + JitFlags* compileFlags, CorInfoInstantiationVerification instVerInfo); ArenaAllocator* compGetAllocator(); #if MEASURE_MEM_ALLOC + + static bool s_dspMemStats; // Display per-phase memory statistics for every function + struct MemStats { unsigned allocCnt; // # of allocs @@ -8195,9 +8285,8 @@ public: void compDspSrcLinesByLineNum(unsigned line, bool seek = false); #endif // DEBUG -//------------------------------------------------------------------------- + //------------------------------------------------------------------------- -#ifdef DEBUGGING_SUPPORT typedef ListNode<VarScopeDsc*> VarScopeListNode; struct VarScopeMapInfo @@ -8255,8 +8344,6 @@ public: void compDispScopeLists(); #endif // DEBUG -#endif // DEBUGGING_SUPPORT - bool compIsProfilerHookNeeded(); //------------------------------------------------------------------------- @@ -8299,7 +8386,7 @@ public: protected: size_t compMaxUncheckedOffsetForNullObject; - void compInitOptions(CORJIT_FLAGS* compileFlags); + void compInitOptions(JitFlags* compileFlags); void compSetProcessor(); void compInitDebuggingInfo(); @@ -8307,16 +8394,22 @@ protected: #ifdef _TARGET_ARMARCH_ bool compRsvdRegCheck(FrameLayoutState curState); #endif - void compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_FLAGS* compileFlags); + void compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags* compileFlags); - // Data required for generating profiler Enter/Leave/TailCall hooks - CLANG_FORMAT_COMMENT_ANCHOR; + // Clear annotations produced during optimizations; to be used between iterations when repeating opts. + void ResetOptAnnotations(); + + // Regenerate loop descriptors; to be used between iterations when repeating opts. + void RecomputeLoopInfo(); #ifdef PROFILING_SUPPORTED + // Data required for generating profiler Enter/Leave/TailCall hooks + bool compProfilerHookNeeded; // Whether profiler Enter/Leave/TailCall hook needs to be generated for the method void* compProfilerMethHnd; // Profiler handle of the method being compiled. Passed as param to ELT callbacks bool compProfilerMethHndIndirected; // Whether compProfilerHandle is pointer to the handle or is an actual handle #endif + #ifdef _TARGET_AMD64_ bool compQuirkForPPP(); // Check if this method should be Quirked for the PPP issue #endif @@ -8692,6 +8785,18 @@ private: #endif inline void EndPhase(Phases phase); // Indicate the end of the given phase. +#if MEASURE_CLRAPI_CALLS + // Thin wrappers that call into JitTimer (if present). + inline void CLRApiCallEnter(unsigned apix); + inline void CLRApiCallLeave(unsigned apix); + +public: + inline void CLR_API_Enter(API_ICorJitInfo_Names ename); + inline void CLR_API_Leave(API_ICorJitInfo_Names ename); + +private: +#endif + #if defined(DEBUG) || defined(INLINE_DATA) || defined(FEATURE_CLRSQM) // These variables are associated with maintaining SQM data about compile time. unsigned __int64 m_compCyclesAtEndOfInlining; // The thread-virtualized cycle count at the end of the inlining phase diff --git a/src/jit/compiler.hpp b/src/jit/compiler.hpp index eb8eb19c68..e8358fd2ab 100644 --- a/src/jit/compiler.hpp +++ b/src/jit/compiler.hpp @@ -473,10 +473,17 @@ inline unsigned Compiler::funGetFuncIdx(BasicBlock* block) #endif // !FEATURE_EH_FUNCLETS -/***************************************************************************** - * - * Map a register mask to a register number - */ +//------------------------------------------------------------------------------ +// genRegNumFromMask : Maps a single register mask to a register number. +// +// Arguments: +// mask - the register mask +// +// Return Value: +// The number of the register contained in the mask. +// +// Assumptions: +// The mask contains one and only one register. inline regNumber genRegNumFromMask(regMaskTP mask) { @@ -768,7 +775,8 @@ inline double getR8LittleEndian(const BYTE* ptr) /***************************************************************************** * - * Return the bitmask to use in the EXPSET_TP for the CSE with the given CSE index. + * Return the normalized index to use in the EXPSET_TP for the CSE with + * the given CSE index. * Each GenTree has the following field: * signed char gtCSEnum; // 0 or the CSE index (negated if def) * So zero is reserved to mean this node is not a CSE @@ -777,15 +785,15 @@ inline double getR8LittleEndian(const BYTE* ptr) * This precondition is checked by the assert on the first line of this method. */ -inline EXPSET_TP genCSEnum2bit(unsigned index) +inline unsigned int genCSEnum2bit(unsigned index) { assert((index > 0) && (index <= EXPSET_SZ)); - return ((EXPSET_TP)1 << (index - 1)); + return (index - 1); } #ifdef DEBUG -const char* genES2str(EXPSET_TP set); +const char* genES2str(BitVecTraits* traits, EXPSET_TP set); const char* refCntWtd2str(unsigned refCntWtd); #endif @@ -870,6 +878,10 @@ inline GenTree::GenTree(genTreeOps oper, var_types type DEBUGARG(bool largeNode) #endif #endif +#if COUNT_AST_OPERS + InterlockedIncrement(&s_gtNodeCounts[oper]); +#endif + #ifdef DEBUG gtSeqNum = 0; gtTreeID = JitTls::GetCompiler()->compGenTreeID++; @@ -1285,11 +1297,11 @@ inline void GenTree::SetOper(genTreeOps oper, ValueNumberUpdate vnUpdate) assert(GenTree::s_gtNodeSizes[gtOper] == TREE_NODE_SZ_SMALL || GenTree::s_gtNodeSizes[gtOper] == TREE_NODE_SZ_LARGE); - assert(GenTree::s_gtNodeSizes[oper] == TREE_NODE_SZ_SMALL || GenTree::s_gtNodeSizes[oper] == TREE_NODE_SZ_LARGE); + assert(GenTree::s_gtNodeSizes[oper] == TREE_NODE_SZ_SMALL || GenTree::s_gtNodeSizes[oper] == TREE_NODE_SZ_LARGE); assert(GenTree::s_gtNodeSizes[oper] == TREE_NODE_SZ_SMALL || (gtDebugFlags & GTF_DEBUG_NODE_LARGE)); - gtOper = oper; + SetOperRaw(oper); #ifdef DEBUG // Maintain the invariant that unary operators always have NULL gtOp2. @@ -1327,6 +1339,9 @@ inline void GenTree::CopyFrom(const GenTree* src, Compiler* comp) assert((gtDebugFlags & GTF_DEBUG_NODE_LARGE) || GenTree::s_gtNodeSizes[src->gtOper] == TREE_NODE_SZ_SMALL); GenTreePtr prev = gtPrev; GenTreePtr next = gtNext; + + RecordOperBashing(OperGet(), src->OperGet()); // nop unless NODEBASH_STATS is enabled + // The VTable pointer is copied intentionally here memcpy((void*)this, (void*)src, src->GetNodeSize()); this->gtPrev = prev; @@ -1373,7 +1388,7 @@ inline void GenTree::InitNodeSize() inline void GenTree::SetOper(genTreeOps oper, ValueNumberUpdate vnUpdate) { - gtOper = oper; + SetOperRaw(oper); if (vnUpdate == CLEAR_VN) { @@ -1384,6 +1399,7 @@ inline void GenTree::SetOper(genTreeOps oper, ValueNumberUpdate vnUpdate) inline void GenTree::CopyFrom(GenTreePtr src) { + RecordOperBashing(OperGet(), src->OperGet()); // nop unless NODEBASH_STATS is enabled *this = *src; #ifdef DEBUG gtSeqNum = 0; @@ -1405,6 +1421,16 @@ inline GenTreePtr Compiler::gtNewCastNodeL(var_types typ, GenTreePtr op1, var_ty #endif // SMALL_TREE_NODES /*****************************************************************************/ +/*****************************************************************************/ + +inline void GenTree::SetOperRaw(genTreeOps oper) +{ + // Please do not do anything here other than assign to gtOper (debug-only + // code is OK, but should be kept to a minimum). + RecordOperBashing(OperGet(), oper); // nop unless NODEBASH_STATS is enabled + gtOper = oper; +} + inline void GenTree::SetOperResetFlags(genTreeOps oper) { SetOper(oper); @@ -1446,7 +1472,7 @@ inline void GenTree::ChangeOper(genTreeOps oper, ValueNumberUpdate vnUpdate) inline void GenTree::ChangeOperUnchecked(genTreeOps oper) { - gtOper = oper; // Trust the caller and don't use SetOper() + SetOperRaw(oper); // Trust the caller and don't use SetOper() gtFlags &= GTF_COMMON_MASK; } @@ -1579,7 +1605,7 @@ inline unsigned Compiler::lvaGrabTemp(bool shortLifetime DEBUGARG(const char* re #if 0 // TODO-Cleanup: Enable this and test. -#ifdef DEBUG +#ifdef DEBUG // Fill the old table with junks. So to detect the un-intended use. memset(lvaTable, fDefaultFill2.val_DontUse_(CLRConfig::INTERNAL_JitDefaultFill, 0xFF), lvaCount * sizeof(*lvaTable)); #endif @@ -1655,7 +1681,7 @@ inline unsigned Compiler::lvaGrabTemps(unsigned cnt DEBUGARG(const char* reason) } #if 0 -#ifdef DEBUG +#ifdef DEBUG // TODO-Cleanup: Enable this and test. // Fill the old table with junks. So to detect the un-intended use. memset(lvaTable, fDefaultFill2.val_DontUse_(CLRConfig::INTERNAL_JitDefaultFill, 0xFF), lvaCount * sizeof(*lvaTable)); @@ -3909,7 +3935,7 @@ inline bool Compiler::IsSharedStaticHelper(GenTreePtr tree) helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_DYNAMICCLASS || helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_DYNAMICCLASS || #ifdef FEATURE_READYTORUN_COMPILER - helper == CORINFO_HELP_READYTORUN_STATIC_BASE || + helper == CORINFO_HELP_READYTORUN_STATIC_BASE || helper == CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE || #endif helper == CORINFO_HELP_CLASSINIT_SHARED_DYNAMICCLASS; #if 0 @@ -3944,7 +3970,7 @@ inline bool jitStaticFldIsGlobAddr(CORINFO_FIELD_HANDLE fldHnd) return (fldHnd == FLD_GLOBAL_DS || fldHnd == FLD_GLOBAL_FS); } -#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD) +#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD) || defined(FEATURE_TRACELOGGING) inline bool Compiler::eeIsNativeMethod(CORINFO_METHOD_HANDLE method) { @@ -4087,16 +4113,12 @@ inline bool Compiler::compIsProfilerHookNeeded() { #ifdef PROFILING_SUPPORTED return compProfilerHookNeeded - -#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_) // IL stubs are excluded by VM and we need to do the same even running // under a complus env hook to generate profiler hooks - || (opts.compJitELTHookEnabled && !(opts.eeFlags & CORJIT_FLG_IL_STUB)) -#endif - ; -#else // PROFILING_SUPPORTED + || (opts.compJitELTHookEnabled && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB)); +#else // !PROFILING_SUPPORTED return false; -#endif +#endif // !PROFILING_SUPPORTED } /***************************************************************************** @@ -4185,7 +4207,7 @@ inline bool Compiler::impIsDUP_LDVIRTFTN_TOKEN(const BYTE* delegateCreateStart, inline bool Compiler::compIsForImportOnly() { - return ((opts.eeFlags & CORJIT_FLG_IMPORT_ONLY) != 0); + return opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IMPORT_ONLY); } /***************************************************************************** @@ -4352,10 +4374,12 @@ inline bool Compiler::lvaIsGCTracked(const LclVarDsc* varDsc) { if (varDsc->lvTracked && (varDsc->lvType == TYP_REF || varDsc->lvType == TYP_BYREF)) { + // Stack parameters are always untracked w.r.t. GC reportings + const bool isStackParam = varDsc->lvIsParam && !varDsc->lvIsRegArg; #ifdef _TARGET_AMD64_ - return !lvaIsFieldOfDependentlyPromotedStruct(varDsc); + return !isStackParam && !lvaIsFieldOfDependentlyPromotedStruct(varDsc); #else // !_TARGET_AMD64_ - return true; + return !isStackParam; #endif // !_TARGET_AMD64_ } else @@ -4367,8 +4391,10 @@ inline bool Compiler::lvaIsGCTracked(const LclVarDsc* varDsc) inline void Compiler::EndPhase(Phases phase) { #if defined(FEATURE_JIT_METHOD_PERF) - if (pCompJitTimer != NULL) + if (pCompJitTimer != nullptr) + { pCompJitTimer->EndPhase(phase); + } #endif #if DUMP_FLOWGRAPHS fgDumpFlowGraph(phase); @@ -4405,6 +4431,36 @@ inline void Compiler::EndPhase(Phases phase) } /*****************************************************************************/ +#if MEASURE_CLRAPI_CALLS + +inline void Compiler::CLRApiCallEnter(unsigned apix) +{ + if (pCompJitTimer != nullptr) + { + pCompJitTimer->CLRApiCallEnter(apix); + } +} +inline void Compiler::CLRApiCallLeave(unsigned apix) +{ + if (pCompJitTimer != nullptr) + { + pCompJitTimer->CLRApiCallLeave(apix); + } +} + +inline void Compiler::CLR_API_Enter(API_ICorJitInfo_Names ename) +{ + CLRApiCallEnter(ename); +} + +inline void Compiler::CLR_API_Leave(API_ICorJitInfo_Names ename) +{ + CLRApiCallLeave(ename); +} + +#endif // MEASURE_CLRAPI_CALLS + +/*****************************************************************************/ bool Compiler::fgExcludeFromSsa(unsigned lclNum) { if (opts.MinOpts()) diff --git a/src/jit/compphases.h b/src/jit/compphases.h index f193d04647..ac1bb636ff 100644 --- a/src/jit/compphases.h +++ b/src/jit/compphases.h @@ -22,7 +22,12 @@ CompPhaseNameMacro(PHASE_PRE_IMPORT, "Pre-import", "PRE-IMP", false, -1) CompPhaseNameMacro(PHASE_IMPORTATION, "Importation", "IMPORT", false, -1) CompPhaseNameMacro(PHASE_POST_IMPORT, "Post-import", "POST-IMP", false, -1) -CompPhaseNameMacro(PHASE_MORPH, "Morph", "MORPH", false, -1) +CompPhaseNameMacro(PHASE_MORPH_INIT, "Morph - Init", "MOR-INIT" ,false, -1) +CompPhaseNameMacro(PHASE_MORPH_INLINE, "Morph - Inlining", "MOR-INL", false, -1) +CompPhaseNameMacro(PHASE_MORPH_IMPBYREF, "Morph - ByRefs", "MOR-BYREF",false, -1) +CompPhaseNameMacro(PHASE_STR_ADRLCL, "Morph - Structs/AddrExp", "MOR-STRAL",false, -1) +CompPhaseNameMacro(PHASE_MORPH_GLOBAL, "Morph - Global", "MOR-GLOB", false, -1) +CompPhaseNameMacro(PHASE_MORPH_END, "Morph - Finish", "MOR-END", false, -1) CompPhaseNameMacro(PHASE_GS_COOKIE, "GS Cookie", "GS-COOK", false, -1) CompPhaseNameMacro(PHASE_COMPUTE_PREDS, "Compute preds", "PREDS", false, -1) CompPhaseNameMacro(PHASE_MARK_GC_POLL_BLOCKS, "Mark GC poll blocks", "GC-POLL", false, -1) @@ -55,7 +60,7 @@ CompPhaseNameMacro(PHASE_OPTIMIZE_INDEX_CHECKS, "Optimize index checks", #if FEATURE_VALNUM_CSE CompPhaseNameMacro(PHASE_OPTIMIZE_VALNUM_CSES, "Optimize Valnum CSEs", "OPT-CSE", false, -1) -#endif +#endif CompPhaseNameMacro(PHASE_VN_COPY_PROP, "VN based copy prop", "CP-PROP", false, -1) #if ASSERTION_PROP @@ -86,6 +91,12 @@ CompPhaseNameMacro(PHASE_LINEAR_SCAN_RESOLVE, "LSRA resolve", CompPhaseNameMacro(PHASE_GENERATE_CODE, "Generate code", "CODEGEN", false, -1) CompPhaseNameMacro(PHASE_EMIT_CODE, "Emit code", "EMIT", false, -1) CompPhaseNameMacro(PHASE_EMIT_GCEH, "Emit GC+EH tables", "EMT-GCEH", false, -1) + +#if MEASURE_CLRAPI_CALLS +// The following is a "pseudo-phase" - it aggregates timing info +// for calls through ICorJitInfo across all "real" phases. +CompPhaseNameMacro(PHASE_CLR_API, "CLR API calls", "CLR-API", false, -1) +#endif // clang-format on #undef CompPhaseNameMacro diff --git a/src/jit/crossgen/CMakeLists.txt b/src/jit/crossgen/CMakeLists.txt index f79d9e72ce..6440e91a04 100644 --- a/src/jit/crossgen/CMakeLists.txt +++ b/src/jit/crossgen/CMakeLists.txt @@ -1,7 +1,7 @@ include(${CLR_DIR}/crossgen.cmake) -if(CLR_CMAKE_TARGET_ARCH_I386 OR CLR_CMAKE_TARGET_ARCH_ARM) +if(CLR_CMAKE_TARGET_ARCH_ARM) add_definitions(-DLEGACY_BACKEND) endif() -add_library_clr(${JIT_BASE_NAME}_crossgen ${SOURCES}) +add_library_clr(clrjit_crossgen ${SOURCES}) diff --git a/src/jit/decomposelongs.cpp b/src/jit/decomposelongs.cpp index cf66487367..98b8b081fc 100644 --- a/src/jit/decomposelongs.cpp +++ b/src/jit/decomposelongs.cpp @@ -65,7 +65,7 @@ void DecomposeLongs::DecomposeBlock(BasicBlock* block) assert(block->isEmpty() || block->IsLIR()); m_blockWeight = block->getBBWeight(m_compiler); - m_range = &LIR::AsRange(block); + m_range = &LIR::AsRange(block); DecomposeRangeHelper(); } @@ -90,7 +90,7 @@ void DecomposeLongs::DecomposeRange(Compiler* compiler, unsigned blockWeight, LI DecomposeLongs decomposer(compiler); decomposer.m_blockWeight = blockWeight; - decomposer.m_range = ⦥ + decomposer.m_range = ⦥ decomposer.DecomposeRangeHelper(); } @@ -111,13 +111,7 @@ void DecomposeLongs::DecomposeRangeHelper() GenTree* node = Range().FirstNonPhiNode(); while (node != nullptr) { - LIR::Use use; - if (!Range().TryGetUse(node, &use)) - { - use = LIR::Use::GetDummyUse(Range(), node); - } - - node = DecomposeNode(use); + node = DecomposeNode(node); } assert(Range().CheckLIR(m_compiler)); @@ -132,10 +126,8 @@ void DecomposeLongs::DecomposeRangeHelper() // Return Value: // The next node to process. // -GenTree* DecomposeLongs::DecomposeNode(LIR::Use& use) +GenTree* DecomposeLongs::DecomposeNode(GenTree* tree) { - GenTree* tree = use.Def(); - // Handle the case where we are implicitly using the lower half of a long lclVar. if ((tree->TypeGet() == TYP_INT) && tree->OperIsLocal()) { @@ -171,14 +163,15 @@ GenTree* DecomposeLongs::DecomposeNode(LIR::Use& use) } #endif // DEBUG + LIR::Use use; + if (!Range().TryGetUse(tree, &use)) + { + use = LIR::Use::GetDummyUse(Range(), tree); + } + GenTree* nextNode = nullptr; switch (tree->OperGet()) { - case GT_PHI: - case GT_PHI_ARG: - nextNode = tree->gtNext; - break; - case GT_LCL_VAR: nextNode = DecomposeLclVar(use); break; @@ -212,8 +205,7 @@ GenTree* DecomposeLongs::DecomposeNode(LIR::Use& use) break; case GT_STORE_LCL_FLD: - assert(tree->gtOp.gtOp1->OperGet() == GT_LONG); - NYI("st.lclFld of of TYP_LONG"); + nextNode = DecomposeStoreLclFld(use); break; case GT_IND: @@ -239,23 +231,11 @@ GenTree* DecomposeLongs::DecomposeNode(LIR::Use& use) break; case GT_MUL: - NYI("Arithmetic binary operators on TYP_LONG - GT_MUL"); - break; - - case GT_DIV: - NYI("Arithmetic binary operators on TYP_LONG - GT_DIV"); - break; - - case GT_MOD: - NYI("Arithmetic binary operators on TYP_LONG - GT_MOD"); - break; - - case GT_UDIV: - NYI("Arithmetic binary operators on TYP_LONG - GT_UDIV"); + nextNode = DecomposeMul(use); break; case GT_UMOD: - NYI("Arithmetic binary operators on TYP_LONG - GT_UMOD"); + nextNode = DecomposeUMod(use); break; case GT_LSH: @@ -266,11 +246,7 @@ GenTree* DecomposeLongs::DecomposeNode(LIR::Use& use) case GT_ROL: case GT_ROR: - NYI("Arithmetic binary operators on TYP_LONG - ROTATE"); - break; - - case GT_MULHI: - NYI("Arithmetic binary operators on TYP_LONG - MULHI"); + nextNode = DecomposeRotate(use); break; case GT_LOCKADD: @@ -288,6 +264,37 @@ GenTree* DecomposeLongs::DecomposeNode(LIR::Use& use) } } + // If we replaced the argument to a GT_FIELD_LIST element with a GT_LONG node, split that field list + // element into two elements: one for each half of the GT_LONG. + if ((use.Def()->OperGet() == GT_LONG) && !use.IsDummyUse() && (use.User()->OperGet() == GT_FIELD_LIST)) + { + GenTreeOp* value = use.Def()->AsOp(); + Range().Remove(value); + + // The node returned by `use.User()` is the head of the field list. We need to find the actual node that uses + // the `GT_LONG` so that we can split it. + GenTreeFieldList* listNode = use.User()->AsFieldList(); + for (; listNode != nullptr; listNode = listNode->Rest()) + { + if (listNode->Current() == value) + { + break; + } + } + + assert(listNode != nullptr); + GenTree* rest = listNode->gtOp2; + + GenTreeFieldList* loNode = listNode; + loNode->gtOp1 = value->gtOp1; + loNode->gtFieldType = TYP_INT; + + GenTreeFieldList* hiNode = + new (m_compiler, GT_FIELD_LIST) GenTreeFieldList(value->gtOp2, loNode->gtFieldOffset + 4, TYP_INT, loNode); + + hiNode->gtOp2 = rest; + } + #ifdef DEBUG if (m_compiler->verbose) { @@ -308,23 +315,25 @@ GenTree* DecomposeLongs::DecomposeNode(LIR::Use& use) // Arguments: // use - the LIR::Use object for the def that needs to be decomposed. // loResult - the decomposed low part -// hiResult - the decomposed high part. This must follow loResult in the linear order, -// as the new GT_LONG node will be inserted immediately after it. +// hiResult - the decomposed high part +// insertResultAfter - the node that the GT_LONG should be inserted after // // Return Value: // The next node to process. // -GenTree* DecomposeLongs::FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult) +GenTree* DecomposeLongs::FinalizeDecomposition(LIR::Use& use, + GenTree* loResult, + GenTree* hiResult, + GenTree* insertResultAfter) { assert(use.IsInitialized()); assert(loResult != nullptr); assert(hiResult != nullptr); assert(Range().Contains(loResult)); assert(Range().Contains(hiResult)); - assert(loResult->Precedes(hiResult)); GenTree* gtLong = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loResult, hiResult); - Range().InsertAfter(hiResult, gtLong); + Range().InsertAfter(insertResultAfter, gtLong); use.ReplaceWith(m_compiler, gtLong); @@ -366,8 +375,6 @@ GenTree* DecomposeLongs::DecomposeLclVar(LIR::Use& use) } else { - noway_assert(varDsc->lvLRACandidate == false); - loResult->SetOper(GT_LCL_FLD); loResult->AsLclFld()->gtLclOffs = 0; loResult->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField(); @@ -380,7 +387,7 @@ GenTree* DecomposeLongs::DecomposeLclVar(LIR::Use& use) m_compiler->lvaIncRefCnts(loResult); m_compiler->lvaIncRefCnts(hiResult); - return FinalizeDecomposition(use, loResult, hiResult); + return FinalizeDecomposition(use, loResult, hiResult, hiResult); } //------------------------------------------------------------------------ @@ -404,7 +411,7 @@ GenTree* DecomposeLongs::DecomposeLclFld(LIR::Use& use) GenTree* hiResult = m_compiler->gtNewLclFldNode(loResult->gtLclNum, TYP_INT, loResult->gtLclOffs + 4); Range().InsertAfter(loResult, hiResult); - return FinalizeDecomposition(use, loResult, hiResult); + return FinalizeDecomposition(use, loResult, hiResult, hiResult); } //------------------------------------------------------------------------ @@ -423,59 +430,118 @@ GenTree* DecomposeLongs::DecomposeStoreLclVar(LIR::Use& use) GenTree* tree = use.Def(); GenTree* rhs = tree->gtGetOp1(); - if ((rhs->OperGet() == GT_PHI) || (rhs->OperGet() == GT_CALL)) + if ((rhs->OperGet() == GT_PHI) || (rhs->OperGet() == GT_CALL) || + ((rhs->OperGet() == GT_MUL_LONG) && (rhs->gtFlags & GTF_MUL_64RSLT) != 0)) { // GT_CALLs are not decomposed, so will not be converted to GT_LONG // GT_STORE_LCL_VAR = GT_CALL are handled in genMultiRegCallStoreToLocal + // GT_MULs are not decomposed, so will not be converted to GT_LONG return tree->gtNext; } noway_assert(rhs->OperGet() == GT_LONG); + unsigned varNum = tree->AsLclVarCommon()->gtLclNum; LclVarDsc* varDsc = m_compiler->lvaTable + varNum; + if (!varDsc->lvPromoted) + { + // We cannot decompose a st.lclVar that is not promoted because doing so + // changes its liveness semantics. For example, consider the following + // decomposition of a st.lclVar into two st.lclFlds: + // + // Before: + // + // /--* t0 int + // +--* t1 int + // t2 = * gt_long long + // + // /--* t2 long + // * st.lclVar long V0 + // + // After: + // /--* t0 int + // * st.lclFld int V0 [+0] + // + // /--* t1 int + // * st.lclFld int V0 [+4] + // + // Before decomposition, the `st.lclVar` is a simple def of `V0`. After + // decomposition, each `st.lclFld` is a partial def of `V0`. This partial + // def is treated as both a use and a def of the appropriate lclVar. This + // difference will affect any situation in which the liveness of a variable + // at a def matters (e.g. dead store elimination, live-in sets, etc.). As + // a result, we leave these stores as-is and generate the decomposed store + // in the code generator. + // + // NOTE: this does extend the lifetime of the low half of the `GT_LONG` + // node as compared to the decomposed form. If we start doing more code + // motion in the backend, this may cause some CQ issues and some sort of + // decomposition could be beneficial. + return tree->gtNext; + } + + assert(varDsc->lvFieldCnt == 2); m_compiler->lvaDecRefCnts(tree); - GenTree* loRhs = rhs->gtGetOp1(); - GenTree* hiRhs = rhs->gtGetOp2(); - GenTree* hiStore = m_compiler->gtNewLclLNode(varNum, TYP_INT); + GenTreeOp* value = rhs->AsOp(); + Range().Remove(value); - if (varDsc->lvPromoted) - { - assert(varDsc->lvFieldCnt == 2); + const unsigned loVarNum = varDsc->lvFieldLclStart; + GenTree* loStore = tree; + loStore->AsLclVarCommon()->SetLclNum(loVarNum); + loStore->gtOp.gtOp1 = value->gtOp1; + loStore->gtType = TYP_INT; - unsigned loVarNum = varDsc->lvFieldLclStart; - unsigned hiVarNum = loVarNum + 1; - tree->AsLclVarCommon()->SetLclNum(loVarNum); - hiStore->SetOper(GT_STORE_LCL_VAR); - hiStore->AsLclVarCommon()->SetLclNum(hiVarNum); - } - else - { - noway_assert(varDsc->lvLRACandidate == false); + const unsigned hiVarNum = loVarNum + 1; + GenTree* hiStore = m_compiler->gtNewLclLNode(hiVarNum, TYP_INT); + hiStore->SetOper(GT_STORE_LCL_VAR); + hiStore->gtOp.gtOp1 = value->gtOp2; + hiStore->gtFlags |= GTF_VAR_DEF; - tree->SetOper(GT_STORE_LCL_FLD); - tree->AsLclFld()->gtLclOffs = 0; - tree->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField(); + m_compiler->lvaIncRefCnts(loStore); + m_compiler->lvaIncRefCnts(hiStore); - hiStore->SetOper(GT_STORE_LCL_FLD); - hiStore->AsLclFld()->gtLclOffs = 4; - hiStore->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField(); - } + Range().InsertAfter(tree, hiStore); - // 'tree' is going to steal the loRhs node for itself, so we need to remove the - // GT_LONG node from the threading. - Range().Remove(rhs); + return hiStore->gtNext; +} - tree->gtOp.gtOp1 = loRhs; - tree->gtType = TYP_INT; +//------------------------------------------------------------------------ +// DecomposeStoreLclFld: Decompose GT_STORE_LCL_FLD. +// +// Arguments: +// use - the LIR::Use object for the def that needs to be decomposed. +// +// Return Value: +// The next node to process. +// +GenTree* DecomposeLongs::DecomposeStoreLclFld(LIR::Use& use) +{ + assert(use.IsInitialized()); + assert(use.Def()->OperGet() == GT_STORE_LCL_FLD); - hiStore->gtOp.gtOp1 = hiRhs; - hiStore->gtFlags |= GTF_VAR_DEF; + GenTreeLclFld* store = use.Def()->AsLclFld(); + + GenTreeOp* value = store->gtOp1->AsOp(); + assert(value->OperGet() == GT_LONG); + Range().Remove(value); + + // The original store node will be repurposed to store the low half of the GT_LONG. + GenTreeLclFld* loStore = store; + loStore->gtOp1 = value->gtOp1; + loStore->gtType = TYP_INT; + loStore->gtFlags |= GTF_VAR_USEASG; - m_compiler->lvaIncRefCnts(tree); + // Create the store for the upper half of the GT_LONG and insert it after the low store. + GenTreeLclFld* hiStore = m_compiler->gtNewLclFldNode(loStore->gtLclNum, TYP_INT, loStore->gtLclOffs + 4); + hiStore->SetOper(GT_STORE_LCL_FLD); + hiStore->gtOp1 = value->gtOp2; + hiStore->gtFlags |= (GTF_VAR_DEF | GTF_VAR_USEASG); + + // Bump the ref count for the destination. m_compiler->lvaIncRefCnts(hiStore); - Range().InsertAfter(tree, hiStore); + Range().InsertAfter(loStore, hiStore); return hiStore->gtNext; } @@ -494,35 +560,103 @@ GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use) assert(use.IsInitialized()); assert(use.Def()->OperGet() == GT_CAST); - GenTree* tree = use.Def(); + GenTree* cast = use.Def()->AsCast(); GenTree* loResult = nullptr; GenTree* hiResult = nullptr; - assert(tree->gtPrev == tree->gtGetOp1()); - NYI_IF(tree->gtOverflow(), "TYP_LONG cast with overflow"); - switch (tree->AsCast()->CastFromType()) + var_types srcType = cast->CastFromType(); + var_types dstType = cast->CastToType(); + + if ((cast->gtFlags & GTF_UNSIGNED) != 0) + { + srcType = genUnsignedType(srcType); + } + + if (varTypeIsLong(srcType)) + { + if (cast->gtOverflow() && (varTypeIsUnsigned(srcType) != varTypeIsUnsigned(dstType))) + { + GenTree* srcOp = cast->gtGetOp1(); + noway_assert(srcOp->OperGet() == GT_LONG); + GenTree* loSrcOp = srcOp->gtGetOp1(); + GenTree* hiSrcOp = srcOp->gtGetOp2(); + + // + // When casting between long types an overflow check is needed only if the types + // have different signedness. In both cases (long->ulong and ulong->long) we only + // need to check if the high part is negative or not. Use the existing cast node + // to perform a int->uint cast of the high part to take advantage of the overflow + // check provided by codegen. + // + + loResult = loSrcOp; + + hiResult = cast; + hiResult->gtType = TYP_INT; + hiResult->AsCast()->gtCastType = TYP_UINT; + hiResult->gtFlags &= ~GTF_UNSIGNED; + hiResult->gtOp.gtOp1 = hiSrcOp; + + Range().Remove(cast); + Range().Remove(srcOp); + Range().InsertAfter(hiSrcOp, hiResult); + } + else + { + NYI("Unimplemented long->long no-op cast decomposition"); + } + } + else if (varTypeIsIntegralOrI(srcType)) { - case TYP_INT: - if (tree->gtFlags & GTF_UNSIGNED) + if (cast->gtOverflow() && !varTypeIsUnsigned(srcType) && varTypeIsUnsigned(dstType)) + { + // + // An overflow check is needed only when casting from a signed type to ulong. + // Change the cast type to uint to take advantage of the overflow check provided + // by codegen and then zero extend the resulting uint to ulong. + // + + loResult = cast; + loResult->AsCast()->gtCastType = TYP_UINT; + loResult->gtType = TYP_INT; + + hiResult = m_compiler->gtNewZeroConNode(TYP_INT); + + Range().InsertAfter(loResult, hiResult); + } + else + { + if (varTypeIsUnsigned(srcType)) { - loResult = tree->gtGetOp1(); - Range().Remove(tree); + loResult = cast->gtGetOp1(); + hiResult = m_compiler->gtNewZeroConNode(TYP_INT); - hiResult = new (m_compiler, GT_CNS_INT) GenTreeIntCon(TYP_INT, 0); + Range().Remove(cast); Range().InsertAfter(loResult, hiResult); } else { - NYI("Lowering of signed cast TYP_INT->TYP_LONG"); - } - break; + LIR::Use src(Range(), &(cast->gtOp.gtOp1), cast); + unsigned lclNum = src.ReplaceWithLclVar(m_compiler, m_blockWeight); - default: - NYI("Unimplemented type for Lowering of cast to TYP_LONG"); - break; + loResult = src.Def(); + + GenTree* loCopy = m_compiler->gtNewLclvNode(lclNum, TYP_INT); + GenTree* shiftBy = m_compiler->gtNewIconNode(31, TYP_INT); + hiResult = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, loCopy, shiftBy); + + Range().Remove(cast); + Range().InsertAfter(loResult, loCopy, shiftBy, hiResult); + m_compiler->lvaIncRefCnts(loCopy); + } + } + } + else + { + NYI("Unimplemented cast decomposition"); } - return FinalizeDecomposition(use, loResult, hiResult); + return FinalizeDecomposition(use, loResult, hiResult, hiResult); } //------------------------------------------------------------------------ @@ -549,7 +683,7 @@ GenTree* DecomposeLongs::DecomposeCnsLng(LIR::Use& use) GenTree* hiResult = new (m_compiler, GT_CNS_INT) GenTreeIntCon(TYP_INT, hiVal); Range().InsertAfter(loResult, hiResult); - return FinalizeDecomposition(use, loResult, hiResult); + return FinalizeDecomposition(use, loResult, hiResult, hiResult); } //------------------------------------------------------------------------ @@ -567,35 +701,7 @@ GenTree* DecomposeLongs::DecomposeCall(LIR::Use& use) assert(use.Def()->OperGet() == GT_CALL); // We only need to force var = call() if the call's result is used. - if (use.IsDummyUse()) - return use.Def()->gtNext; - - GenTree* user = use.User(); - if (user->OperGet() == GT_STORE_LCL_VAR) - { - // If parent is already a STORE_LCL_VAR, we can skip it if - // it is already marked as lvIsMultiRegRet. - unsigned varNum = user->AsLclVarCommon()->gtLclNum; - if (m_compiler->lvaTable[varNum].lvIsMultiRegRet) - { - return use.Def()->gtNext; - } - else if (!m_compiler->lvaTable[varNum].lvPromoted) - { - // If var wasn't promoted, we can just set lvIsMultiRegRet. - m_compiler->lvaTable[varNum].lvIsMultiRegRet = true; - return use.Def()->gtNext; - } - } - - GenTree* originalNode = use.Def(); - - // Otherwise, we need to force var = call() - unsigned varNum = use.ReplaceWithLclVar(m_compiler, m_blockWeight); - m_compiler->lvaTable[varNum].lvIsMultiRegRet = true; - - // Decompose the new LclVar use - return DecomposeLclVar(use); + return StoreNodeToVar(use); } //------------------------------------------------------------------------ @@ -627,7 +733,7 @@ GenTree* DecomposeLongs::DecomposeStoreInd(LIR::Use& use) // + --* t155 long // * storeIndir long - GenTree* gtLong = tree->gtOp.gtOp2; + GenTree* gtLong = tree->gtOp.gtOp2; // Save address to a temp. It is used in storeIndLow and storeIndHigh trees. LIR::Use address(Range(), &tree->gtOp.gtOp1, tree); @@ -721,12 +827,13 @@ GenTree* DecomposeLongs::DecomposeInd(LIR::Use& use) GenTreePtr addrHigh = new (m_compiler, GT_LEA) GenTreeAddrMode(TYP_REF, addrBaseHigh, nullptr, 0, genTypeSize(TYP_INT)); GenTreePtr indHigh = new (m_compiler, GT_IND) GenTreeIndir(GT_IND, TYP_INT, addrHigh, nullptr); + indHigh->gtFlags |= (indLow->gtFlags & (GTF_GLOB_REF | GTF_EXCEPT | GTF_IND_FLAGS)); m_compiler->lvaIncRefCnts(addrBaseHigh); Range().InsertAfter(indLow, addrBaseHigh, addrHigh, indHigh); - return FinalizeDecomposition(use, indLow, indHigh); + return FinalizeDecomposition(use, indLow, indHigh, indHigh); } //------------------------------------------------------------------------ @@ -758,7 +865,7 @@ GenTree* DecomposeLongs::DecomposeNot(LIR::Use& use) GenTree* hiResult = new (m_compiler, GT_NOT) GenTreeOp(GT_NOT, TYP_INT, hiOp1, nullptr); Range().InsertAfter(loResult, hiResult); - return FinalizeDecomposition(use, loResult, hiResult); + return FinalizeDecomposition(use, loResult, hiResult, hiResult); } //------------------------------------------------------------------------ @@ -779,14 +886,6 @@ GenTree* DecomposeLongs::DecomposeNeg(LIR::Use& use) GenTree* gtLong = tree->gtGetOp1(); noway_assert(gtLong->OperGet() == GT_LONG); - LIR::Use op1(Range(), >Long->gtOp.gtOp1, gtLong); - op1.ReplaceWithLclVar(m_compiler, m_blockWeight); - - LIR::Use op2(Range(), >Long->gtOp.gtOp2, gtLong); - op2.ReplaceWithLclVar(m_compiler, m_blockWeight); - - // Neither GT_NEG nor the introduced temporaries have side effects. - tree->gtFlags &= ~GTF_ALL_EFFECT; GenTree* loOp1 = gtLong->gtGetOp1(); GenTree* hiOp1 = gtLong->gtGetOp2(); @@ -799,11 +898,10 @@ GenTree* DecomposeLongs::DecomposeNeg(LIR::Use& use) GenTree* zero = m_compiler->gtNewZeroConNode(TYP_INT); GenTree* hiAdjust = m_compiler->gtNewOperNode(GT_ADD_HI, TYP_INT, hiOp1, zero); GenTree* hiResult = m_compiler->gtNewOperNode(GT_NEG, TYP_INT, hiAdjust); - hiResult->gtFlags = tree->gtFlags; Range().InsertAfter(loResult, zero, hiAdjust, hiResult); - return FinalizeDecomposition(use, loResult, hiResult); + return FinalizeDecomposition(use, loResult, hiResult, hiResult); } //------------------------------------------------------------------------ @@ -864,14 +962,19 @@ GenTree* DecomposeLongs::DecomposeArith(LIR::Use& use) } } - return FinalizeDecomposition(use, loResult, hiResult); + return FinalizeDecomposition(use, loResult, hiResult, hiResult); } //------------------------------------------------------------------------ -// DecomposeShift: Decompose GT_LSH, GT_RSH, GT_RSZ. For shift nodes, we need to use -// the shift helper functions, so we here convert the shift into a helper call by -// pulling its arguments out of linear order and making them the args to a call, then -// replacing the original node with the new call. +// DecomposeShift: Decompose GT_LSH, GT_RSH, GT_RSZ. For shift nodes being shifted +// by a constant int, we can inspect the shift amount and decompose to the appropriate +// node types, generating a shl/shld pattern for GT_LSH, a shrd/shr pattern for GT_RSZ, +// and a shrd/sar pattern for GT_SHR for most shift amounts. Shifting by 0, >= 32 and +// >= 64 are special cased to produce better code patterns. +// +// For all other shift nodes, we need to use the shift helper functions, so we here convert +// the shift into a helper call by pulling its arguments out of linear order and making +// them the args to a call, then replacing the original node with the new call. // // Arguments: // use - the LIR::Use object for the def that needs to be decomposed. @@ -883,66 +986,646 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use) { assert(use.IsInitialized()); - GenTree* tree = use.Def(); - GenTree* gtLong = tree->gtGetOp1(); - genTreeOps oper = tree->OperGet(); + GenTree* tree = use.Def(); + GenTree* gtLong = tree->gtGetOp1(); + GenTree* loOp1 = gtLong->gtGetOp1(); + GenTree* hiOp1 = gtLong->gtGetOp2(); + GenTree* shiftByOp = tree->gtGetOp2(); + + genTreeOps oper = tree->OperGet(); + genTreeOps shiftByOper = shiftByOp->OperGet(); assert((oper == GT_LSH) || (oper == GT_RSH) || (oper == GT_RSZ)); - LIR::Use loOp1Use(Range(), >Long->gtOp.gtOp1, gtLong); - loOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight); + // If we are shifting by a constant int, we do not want to use a helper, instead, we decompose. + if (shiftByOper == GT_CNS_INT) + { + unsigned int count = shiftByOp->gtIntCon.gtIconVal; + Range().Remove(shiftByOp); - LIR::Use hiOp1Use(Range(), >Long->gtOp.gtOp2, gtLong); - hiOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight); + if (count == 0) + { + GenTree* next = tree->gtNext; + // Remove tree and don't do anything else. + Range().Remove(tree); + use.ReplaceWith(m_compiler, gtLong); + return next; + } - LIR::Use shiftWidthUse(Range(), &tree->gtOp.gtOp2, tree); - shiftWidthUse.ReplaceWithLclVar(m_compiler, m_blockWeight); + GenTree* loResult; + GenTree* hiResult; - GenTree* loOp1 = gtLong->gtGetOp1(); - GenTree* hiOp1 = gtLong->gtGetOp2(); + GenTree* insertAfter; - GenTree* shiftWidthOp = tree->gtGetOp2(); + switch (oper) + { + case GT_LSH: + { + Range().Remove(hiOp1); + if (count < 32) + { + // Hi is a GT_LSH_HI, lo is a GT_LSH. Will produce: + // reg1 = lo + // shl lo, shift + // shld hi, reg1, shift + + Range().Remove(gtLong); + loOp1 = RepresentOpAsLocalVar(loOp1, gtLong, >Long->gtOp.gtOp1); + unsigned loOp1LclNum = loOp1->AsLclVarCommon()->gtLclNum; + Range().Remove(loOp1); + + GenTree* shiftByHi = m_compiler->gtNewIconNode(count, TYP_INT); + GenTree* shiftByLo = m_compiler->gtNewIconNode(count, TYP_INT); + + loResult = m_compiler->gtNewOperNode(GT_LSH, TYP_INT, loOp1, shiftByLo); + + // Create a GT_LONG that contains loCopy and hiOp1. This will be used in codegen to + // generate the shld instruction + GenTree* loCopy = m_compiler->gtNewLclvNode(loOp1LclNum, TYP_INT); + GenTree* hiOp = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loCopy, hiOp1); + hiResult = m_compiler->gtNewOperNode(GT_LSH_HI, TYP_INT, hiOp, shiftByHi); + + m_compiler->lvaIncRefCnts(loCopy); + + Range().InsertBefore(tree, loCopy, hiOp1, hiOp); + Range().InsertBefore(tree, shiftByHi, hiResult); + Range().InsertBefore(tree, loOp1, shiftByLo, loResult); + + insertAfter = loResult; + } + else + { + assert(count >= 32); + + if (count < 64) + { + if (count == 32) + { + // Move loOp1 into hiResult (shift of 32 bits is just a mov of lo to hi) + // We need to make sure that we save lo to a temp variable so that we don't overwrite lo + // before saving it to hi in the case that we are doing an inplace shift. I.e.: + // x = x << 32 + + LIR::Use loOp1Use(Range(), >Long->gtOp.gtOp1, gtLong); + loOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight); + + hiResult = loOp1Use.Def(); + Range().Remove(gtLong); + } + else + { + Range().Remove(gtLong); + Range().Remove(loOp1); + assert(count > 32 && count < 64); + + // Move loOp1 into hiResult, do a GT_LSH with count - 32. + // We will compute hiResult before loResult in this case, so we don't need to store lo to a + // temp + GenTree* shiftBy = m_compiler->gtNewIconNode(count - 32, TYP_INT); + hiResult = m_compiler->gtNewOperNode(oper, TYP_INT, loOp1, shiftBy); + Range().InsertBefore(tree, loOp1, shiftBy, hiResult); + } + } + else + { + Range().Remove(gtLong); + Range().Remove(loOp1); + assert(count >= 64); + + // Zero out hi (shift of >= 64 bits moves all the bits out of the two registers) + hiResult = m_compiler->gtNewZeroConNode(TYP_INT); + Range().InsertBefore(tree, hiResult); + } + + // Zero out loResult (shift of >= 32 bits shifts all lo bits to hiResult) + loResult = m_compiler->gtNewZeroConNode(TYP_INT); + Range().InsertBefore(tree, loResult); + + insertAfter = loResult; + } + } + break; + case GT_RSZ: + { + Range().Remove(gtLong); + + if (count < 32) + { + // Hi is a GT_RSZ, lo is a GT_RSH_LO. Will produce: + // reg1 = hi + // shrd lo, reg1, shift + // shr hi, shift + + hiOp1 = RepresentOpAsLocalVar(hiOp1, gtLong, >Long->gtOp.gtOp2); + unsigned hiOp1LclNum = hiOp1->AsLclVarCommon()->gtLclNum; + GenTree* hiCopy = m_compiler->gtNewLclvNode(hiOp1LclNum, TYP_INT); + + GenTree* shiftByHi = m_compiler->gtNewIconNode(count, TYP_INT); + GenTree* shiftByLo = m_compiler->gtNewIconNode(count, TYP_INT); + + m_compiler->lvaIncRefCnts(hiCopy); + + hiResult = m_compiler->gtNewOperNode(GT_RSZ, TYP_INT, hiOp1, shiftByHi); + + // Create a GT_LONG that contains loOp1 and hiCopy. This will be used in codegen to + // generate the shrd instruction + GenTree* loOp = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loOp1, hiCopy); + loResult = m_compiler->gtNewOperNode(GT_RSH_LO, TYP_INT, loOp, shiftByLo); + + Range().InsertBefore(tree, hiCopy, loOp); + Range().InsertBefore(tree, shiftByLo, loResult); + Range().InsertBefore(tree, shiftByHi, hiResult); + } + else + { + Range().Remove(loOp1); + Range().Remove(hiOp1); + assert(count >= 32); + if (count < 64) + { + if (count == 32) + { + // Move hiOp1 into loResult. + loResult = hiOp1; + Range().InsertBefore(tree, loResult); + } + else + { + assert(count > 32 && count < 64); + + // Move hiOp1 into loResult, do a GT_RSZ with count - 32. + GenTree* shiftBy = m_compiler->gtNewIconNode(count - 32, TYP_INT); + loResult = m_compiler->gtNewOperNode(oper, TYP_INT, hiOp1, shiftBy); + Range().InsertBefore(tree, hiOp1, shiftBy, loResult); + } + } + else + { + assert(count >= 64); + + // Zero out lo + loResult = m_compiler->gtNewZeroConNode(TYP_INT); + Range().InsertBefore(tree, loResult); + } + + // Zero out hi + hiResult = m_compiler->gtNewZeroConNode(TYP_INT); + Range().InsertBefore(tree, hiResult); + } + + insertAfter = hiResult; + } + break; + case GT_RSH: + { + Range().Remove(gtLong); + Range().Remove(loOp1); + + hiOp1 = RepresentOpAsLocalVar(hiOp1, gtLong, >Long->gtOp.gtOp2); + unsigned hiOp1LclNum = hiOp1->AsLclVarCommon()->gtLclNum; + GenTree* hiCopy = m_compiler->gtNewLclvNode(hiOp1LclNum, TYP_INT); + Range().Remove(hiOp1); + + if (count < 32) + { + // Hi is a GT_RSH, lo is a GT_RSH_LO. Will produce: + // reg1 = hi + // shrd lo, reg1, shift + // sar hi, shift + + GenTree* shiftByHi = m_compiler->gtNewIconNode(count, TYP_INT); + GenTree* shiftByLo = m_compiler->gtNewIconNode(count, TYP_INT); + m_compiler->lvaIncRefCnts(hiCopy); + + hiResult = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, hiOp1, shiftByHi); + + // Create a GT_LONG that contains loOp1 and hiCopy. This will be used in codegen to + // generate the shrd instruction + GenTree* loOp = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loOp1, hiCopy); + loResult = m_compiler->gtNewOperNode(GT_RSH_LO, TYP_INT, loOp, shiftByLo); + + Range().InsertBefore(tree, loOp1, hiCopy, loOp); + Range().InsertBefore(tree, shiftByLo, loResult); + Range().InsertBefore(tree, shiftByHi, hiOp1, hiResult); + } + else + { + assert(count >= 32); + if (count < 64) + { + if (count == 32) + { + // Move hiOp1 into loResult. + loResult = hiOp1; + Range().InsertBefore(tree, loResult); + } + else + { + assert(count > 32 && count < 64); + + // Move hiOp1 into loResult, do a GT_RSH with count - 32. + GenTree* shiftBy = m_compiler->gtNewIconNode(count - 32, TYP_INT); + loResult = m_compiler->gtNewOperNode(oper, TYP_INT, hiOp1, shiftBy); + Range().InsertBefore(tree, hiOp1, shiftBy, loResult); + } + + // Propagate sign bit in hiResult + GenTree* shiftBy = m_compiler->gtNewIconNode(31, TYP_INT); + hiResult = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, hiCopy, shiftBy); + Range().InsertBefore(tree, shiftBy, hiCopy, hiResult); + + m_compiler->lvaIncRefCnts(hiCopy); + } + else + { + assert(count >= 64); + + // Propagate sign bit in loResult + GenTree* loShiftBy = m_compiler->gtNewIconNode(31, TYP_INT); + loResult = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, hiCopy, loShiftBy); + Range().InsertBefore(tree, hiCopy, loShiftBy, loResult); + + // Propagate sign bit in hiResult + GenTree* shiftBy = m_compiler->gtNewIconNode(31, TYP_INT); + hiResult = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, hiOp1, shiftBy); + Range().InsertBefore(tree, shiftBy, hiOp1, hiResult); + + m_compiler->lvaIncRefCnts(hiCopy); + } + } + + insertAfter = hiResult; + } + break; + default: + unreached(); + } - Range().Remove(gtLong); - Range().Remove(loOp1); - Range().Remove(hiOp1); + // Remove tree from Range + Range().Remove(tree); - Range().Remove(shiftWidthOp); + return FinalizeDecomposition(use, loResult, hiResult, insertAfter); + } + else + { + // arguments are single used, but LIR call can work only with local vars. + shiftByOp = RepresentOpAsLocalVar(shiftByOp, tree, &tree->gtOp.gtOp2); + loOp1 = RepresentOpAsLocalVar(loOp1, gtLong, >Long->gtOp.gtOp1); + hiOp1 = RepresentOpAsLocalVar(hiOp1, gtLong, >Long->gtOp.gtOp2); - // TODO-X86-CQ: If the shift operand is a GT_CNS_INT, we should pipe the instructions through to codegen - // and generate the shift instructions ourselves there, rather than replacing it with a helper call. + Range().Remove(shiftByOp); + Range().Remove(gtLong); + Range().Remove(loOp1); + Range().Remove(hiOp1); - unsigned helper; + unsigned helper; - switch (oper) + switch (oper) + { + case GT_LSH: + helper = CORINFO_HELP_LLSH; + break; + case GT_RSH: + helper = CORINFO_HELP_LRSH; + break; + case GT_RSZ: + helper = CORINFO_HELP_LRSZ; + break; + default: + unreached(); + } + + GenTreeArgList* argList = m_compiler->gtNewArgList(loOp1, hiOp1, shiftByOp); + + GenTree* call = m_compiler->gtNewHelperCallNode(helper, TYP_LONG, 0, argList); + call->gtFlags |= tree->gtFlags & GTF_ALL_EFFECT; + + GenTreeCall* callNode = call->AsCall(); + ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc(); + retTypeDesc->InitializeLongReturnType(m_compiler); + + call = m_compiler->fgMorphArgs(callNode); + Range().InsertAfter(tree, LIR::SeqTree(m_compiler, call)); + + Range().Remove(tree); + use.ReplaceWith(m_compiler, call); + return call; + } +} + +//------------------------------------------------------------------------ +// DecomposeRotate: Decompose GT_ROL and GT_ROR with constant shift amounts. We can +// inspect the rotate amount and decompose to the appropriate node types, generating +// a shld/shld pattern for GT_ROL, a shrd/shrd pattern for GT_ROR, for most rotate +// amounts. +// +// Arguments: +// use - the LIR::Use object for the def that needs to be decomposed. +// +// Return Value: +// The next node to process. +// +GenTree* DecomposeLongs::DecomposeRotate(LIR::Use& use) +{ + GenTree* tree = use.Def(); + GenTree* gtLong = tree->gtGetOp1(); + GenTree* rotateByOp = tree->gtGetOp2(); + + genTreeOps oper = tree->OperGet(); + + assert((oper == GT_ROL) || (oper == GT_ROR)); + assert(rotateByOp->IsCnsIntOrI()); + + // For longs, we need to change rols into two GT_LSH_HIs and rors into two GT_RSH_LOs + // so we will get: + // + // shld lo, hi, rotateAmount + // shld hi, loCopy, rotateAmount + // + // or: + // + // shrd lo, hi, rotateAmount + // shrd hi, loCopy, rotateAmount + + if (oper == GT_ROL) { - case GT_LSH: - helper = CORINFO_HELP_LLSH; - break; - case GT_RSH: - helper = CORINFO_HELP_LRSH; - break; - case GT_RSZ: - helper = CORINFO_HELP_LRSZ; - break; - default: - unreached(); + oper = GT_LSH_HI; + } + else + { + oper = GT_RSH_LO; } - GenTreeArgList* argList = m_compiler->gtNewArgList(loOp1, hiOp1, shiftWidthOp); + unsigned int count = rotateByOp->gtIntCon.gtIconVal; + Range().Remove(rotateByOp); + + // Make sure the rotate amount is between 0 and 63. + assert((count < 64) && (count != 0)); + + GenTree* loResult; + GenTree* hiResult; + + if (count == 32) + { + // If the rotate amount is 32, then swap hi and lo + LIR::Use loOp1Use(Range(), >Long->gtOp.gtOp1, gtLong); + loOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight); + + LIR::Use hiOp1Use(Range(), >Long->gtOp.gtOp2, gtLong); + hiOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight); + + hiResult = loOp1Use.Def(); + loResult = hiOp1Use.Def(); + gtLong->gtOp.gtOp1 = loResult; + gtLong->gtOp.gtOp2 = hiResult; + + GenTree* next = tree->gtNext; + // Remove tree and don't do anything else. + Range().Remove(tree); + use.ReplaceWith(m_compiler, gtLong); + return next; + } + else + { + GenTree* loOp1; + GenTree* hiOp1; + + if (count > 32) + { + // If count > 32, we swap hi and lo, and subtract 32 from count + hiOp1 = gtLong->gtGetOp1(); + loOp1 = gtLong->gtGetOp2(); + + Range().Remove(gtLong); + loOp1 = RepresentOpAsLocalVar(loOp1, gtLong, >Long->gtOp.gtOp2); + hiOp1 = RepresentOpAsLocalVar(hiOp1, gtLong, >Long->gtOp.gtOp1); + + count -= 32; + } + else + { + loOp1 = gtLong->gtGetOp1(); + hiOp1 = gtLong->gtGetOp2(); + + Range().Remove(gtLong); + loOp1 = RepresentOpAsLocalVar(loOp1, gtLong, >Long->gtOp.gtOp1); + hiOp1 = RepresentOpAsLocalVar(hiOp1, gtLong, >Long->gtOp.gtOp2); + } + + unsigned loOp1LclNum = loOp1->AsLclVarCommon()->gtLclNum; + unsigned hiOp1LclNum = hiOp1->AsLclVarCommon()->gtLclNum; + + Range().Remove(loOp1); + Range().Remove(hiOp1); + + GenTree* rotateByHi = m_compiler->gtNewIconNode(count, TYP_INT); + GenTree* rotateByLo = m_compiler->gtNewIconNode(count, TYP_INT); + + // Create a GT_LONG that contains loOp1 and hiCopy. This will be used in codegen to + // generate the shld instruction + GenTree* hiCopy = m_compiler->gtNewLclvNode(hiOp1LclNum, TYP_INT); + GenTree* loOp = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, hiCopy, loOp1); + loResult = m_compiler->gtNewOperNode(oper, TYP_INT, loOp, rotateByLo); + + // Create a GT_LONG that contains loCopy and hiOp1. This will be used in codegen to + // generate the shld instruction + GenTree* loCopy = m_compiler->gtNewLclvNode(loOp1LclNum, TYP_INT); + GenTree* hiOp = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loCopy, hiOp1); + hiResult = m_compiler->gtNewOperNode(oper, TYP_INT, hiOp, rotateByHi); + + m_compiler->lvaIncRefCnts(loCopy); + m_compiler->lvaIncRefCnts(hiCopy); + + Range().InsertBefore(tree, hiCopy, loOp1, loOp); + Range().InsertBefore(tree, rotateByLo, loResult); + Range().InsertBefore(tree, loCopy, hiOp1, hiOp); + Range().InsertBefore(tree, rotateByHi, hiResult); + + Range().Remove(tree); + + return FinalizeDecomposition(use, loResult, hiResult, hiResult); + } +} + +//------------------------------------------------------------------------ +// DecomposeMul: Decompose GT_MUL. The only GT_MULs that make it to decompose are +// those with the GTF_MUL_64RSLT flag set. These muls result in a mul instruction that +// returns its result in two registers like GT_CALLs do. Additionally, these muls are +// guaranteed to be in the form long = (long)int * (long)int. Therefore, to decompose +// these nodes, we convert them into GT_MUL_LONGs, undo the cast from int to long by +// stripping out the lo ops, and force them into the form var = mul, as we do for +// GT_CALLs. In codegen, we then produce a mul instruction that produces the result +// in edx:eax, and store those registers on the stack in genStoreLongLclVar. +// +// All other GT_MULs have been converted to helper calls in morph.cpp +// +// Arguments: +// use - the LIR::Use object for the def that needs to be decomposed. +// +// Return Value: +// The next node to process. +// +GenTree* DecomposeLongs::DecomposeMul(LIR::Use& use) +{ + assert(use.IsInitialized()); + + GenTree* tree = use.Def(); + genTreeOps oper = tree->OperGet(); + + assert(oper == GT_MUL); + assert((tree->gtFlags & GTF_MUL_64RSLT) != 0); + + GenTree* op1 = tree->gtGetOp1(); + GenTree* op2 = tree->gtGetOp2(); + + GenTree* loOp1 = op1->gtGetOp1(); + GenTree* hiOp1 = op1->gtGetOp2(); + GenTree* loOp2 = op2->gtGetOp1(); + GenTree* hiOp2 = op2->gtGetOp2(); + + Range().Remove(hiOp1); + Range().Remove(hiOp2); + Range().Remove(op1); + Range().Remove(op2); + + // Get rid of the hi ops. We don't need them. + tree->gtOp.gtOp1 = loOp1; + tree->gtOp.gtOp2 = loOp2; + tree->SetOperRaw(GT_MUL_LONG); + + return StoreNodeToVar(use); +} + +//------------------------------------------------------------------------ +// DecomposeUMod: Decompose GT_UMOD. The only GT_UMODs that make it to decompose +// are guaranteed to be an unsigned long mod with op2 which is a cast to long from +// a constant int whose value is between 2 and 0x3fffffff. All other GT_UMODs are +// morphed into helper calls. These GT_UMODs will actually return an int value in +// RDX. In decompose, we make the lo operation a TYP_INT GT_UMOD, with op2 as the +// original lo half and op1 as a GT_LONG. We make the hi part 0, so we end up with: +// +// GT_UMOD[TYP_INT] ( GT_LONG [TYP_LONG] (loOp1, hiOp1), loOp2 [TYP_INT] ) +// +// With the expectation that we will generate: +// +// EDX = hiOp1 +// EAX = loOp1 +// reg = loOp2 +// idiv reg +// EDX is the remainder, and result of GT_UMOD +// mov hiReg = 0 +// +// Arguments: +// use - the LIR::Use object for the def that needs to be decomposed. +// +// Return Value: +// The next node to process. +// +GenTree* DecomposeLongs::DecomposeUMod(LIR::Use& use) +{ + assert(use.IsInitialized()); + + GenTree* tree = use.Def(); + genTreeOps oper = tree->OperGet(); + + assert(oper == GT_UMOD); + + GenTree* op1 = tree->gtGetOp1(); + GenTree* op2 = tree->gtGetOp2(); + assert(op1->OperGet() == GT_LONG); + assert(op2->OperGet() == GT_LONG); + + GenTree* loOp2 = op2->gtGetOp1(); + GenTree* hiOp2 = op2->gtGetOp2(); + + assert(loOp2->OperGet() == GT_CNS_INT); + assert(hiOp2->OperGet() == GT_CNS_INT); + assert((loOp2->gtIntCon.gtIconVal >= 2) && (loOp2->gtIntCon.gtIconVal <= 0x3fffffff)); + assert(hiOp2->gtIntCon.gtIconVal == 0); + + // Get rid of op2's hi part. We don't need it. + Range().Remove(hiOp2); + Range().Remove(op2); + + // Lo part is the GT_UMOD + GenTree* loResult = tree; + loResult->gtOp.gtOp2 = loOp2; + loResult->gtType = TYP_INT; - GenTree* call = m_compiler->gtNewHelperCallNode(helper, TYP_LONG, 0, argList); + // Set the high part to 0 + GenTree* hiResult = m_compiler->gtNewZeroConNode(TYP_INT); - GenTreeCall* callNode = call->AsCall(); - ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc(); - retTypeDesc->InitializeLongReturnType(m_compiler); + Range().InsertAfter(loResult, hiResult); - call = m_compiler->fgMorphArgs(callNode); - Range().InsertAfter(tree, LIR::SeqTree(m_compiler, call)); - - Range().Remove(tree); - use.ReplaceWith(m_compiler, call); - return call; + return FinalizeDecomposition(use, loResult, hiResult, hiResult); +} + +//------------------------------------------------------------------------ +// StoreNodeToVar: Check if the user is a STORE_LCL_VAR, and if it isn't, +// store the node to a var. Then decompose the new LclVar. +// +// Arguments: +// use - the LIR::Use object for the def that needs to be decomposed. +// +// Return Value: +// The next node to process. +// +GenTree* DecomposeLongs::StoreNodeToVar(LIR::Use& use) +{ + if (use.IsDummyUse()) + return use.Def()->gtNext; + + GenTree* tree = use.Def(); + GenTree* user = use.User(); + + if (user->OperGet() == GT_STORE_LCL_VAR) + { + // If parent is already a STORE_LCL_VAR, we can skip it if + // it is already marked as lvIsMultiRegRet. + unsigned varNum = user->AsLclVarCommon()->gtLclNum; + if (m_compiler->lvaTable[varNum].lvIsMultiRegRet) + { + return tree->gtNext; + } + else if (!m_compiler->lvaTable[varNum].lvPromoted) + { + // If var wasn't promoted, we can just set lvIsMultiRegRet. + m_compiler->lvaTable[varNum].lvIsMultiRegRet = true; + return tree->gtNext; + } + } + + // Otherwise, we need to force var = call() + unsigned varNum = use.ReplaceWithLclVar(m_compiler, m_blockWeight); + m_compiler->lvaTable[varNum].lvIsMultiRegRet = true; + + // Decompose the new LclVar use + return DecomposeLclVar(use); +} + +//------------------------------------------------------------------------ +// Check is op already local var, if not store it to local. +// +// Arguments: +// op - GenTree* to represent as local variable +// user - user of op +// edge - edge from user to op +// +// Return Value: +// op represented as local var +// +GenTree* DecomposeLongs::RepresentOpAsLocalVar(GenTree* op, GenTree* user, GenTree** edge) +{ + if (op->OperGet() == GT_LCL_VAR) + { + return op; + } + else + { + LIR::Use opUse(Range(), edge, user); + opUse.ReplaceWithLclVar(m_compiler, m_blockWeight); + return *edge; + } } //------------------------------------------------------------------------ @@ -965,9 +1648,6 @@ genTreeOps DecomposeLongs::GetHiOper(genTreeOps oper) case GT_SUB: return GT_SUB_HI; break; - case GT_MUL: - return GT_MUL_HI; - break; case GT_DIV: return GT_DIV_HI; break; diff --git a/src/jit/decomposelongs.h b/src/jit/decomposelongs.h index af9b342fb2..8965a0b330 100644 --- a/src/jit/decomposelongs.h +++ b/src/jit/decomposelongs.h @@ -35,13 +35,14 @@ private: } // Driver functions - void DecomposeRangeHelper(); - GenTree* DecomposeNode(LIR::Use& use); + void DecomposeRangeHelper(); + GenTree* DecomposeNode(GenTree* tree); // Per-node type decompose cases GenTree* DecomposeLclVar(LIR::Use& use); GenTree* DecomposeLclFld(LIR::Use& use); GenTree* DecomposeStoreLclVar(LIR::Use& use); + GenTree* DecomposeStoreLclFld(LIR::Use& use); GenTree* DecomposeCast(LIR::Use& use); GenTree* DecomposeCnsLng(LIR::Use& use); GenTree* DecomposeCall(LIR::Use& use); @@ -51,10 +52,15 @@ private: GenTree* DecomposeNeg(LIR::Use& use); GenTree* DecomposeArith(LIR::Use& use); GenTree* DecomposeShift(LIR::Use& use); + GenTree* DecomposeRotate(LIR::Use& use); + GenTree* DecomposeMul(LIR::Use& use); + GenTree* DecomposeUMod(LIR::Use& use); // Helper functions - GenTree* FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult); + GenTree* FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult, GenTree* insertResultAfter); + GenTree* RepresentOpAsLocalVar(GenTree* op, GenTree* user, GenTree** edge); + GenTree* StoreNodeToVar(LIR::Use& use); static genTreeOps GetHiOper(genTreeOps oper); static genTreeOps GetLoOper(genTreeOps oper); diff --git a/src/jit/dll/CMakeLists.txt b/src/jit/dll/CMakeLists.txt index 01e58dbbb8..43ed07eae5 100644 --- a/src/jit/dll/CMakeLists.txt +++ b/src/jit/dll/CMakeLists.txt @@ -1,20 +1,20 @@ project(ClrJit) -if(CLR_CMAKE_PLATFORM_ARCH_I386 OR CLR_CMAKE_PLATFORM_ARCH_ARM) +if(CLR_CMAKE_TARGET_ARCH_ARM) add_definitions(-DLEGACY_BACKEND) -endif(CLR_CMAKE_PLATFORM_ARCH_I386 OR CLR_CMAKE_PLATFORM_ARCH_ARM) +endif(CLR_CMAKE_TARGET_ARCH_ARM) # Disable the following for UNIX altjit on Windows if(CLR_CMAKE_PLATFORM_UNIX) add_compile_options(-fPIC) - add_library_clr(${JIT_BASE_NAME}_static + add_library_clr(clrjit_static STATIC ${SHARED_LIB_SOURCES} ) - add_dependencies(${JIT_BASE_NAME}_static coreclrpal gcinfo) + add_dependencies(clrjit_static coreclrpal gcinfo) else() - add_library_clr(${JIT_BASE_NAME}_static + add_library_clr(clrjit_static ${SOURCES} ) # Disable up to here (see above) the following for UNIX altjit on Windows diff --git a/src/jit/dll/jit.nativeproj b/src/jit/dll/jit.nativeproj index 97981e7eff..7505f5e8ef 100644 --- a/src/jit/dll/jit.nativeproj +++ b/src/jit/dll/jit.nativeproj @@ -37,9 +37,9 @@ <!-- Profile-guided optimization --> - <PogoOptimize Condition="('$(BuildArchitecture)' == 'arm')">false</PogoOptimize> - <PogoInstrument Condition="('$(BuildArchitecture)' == 'arm') and ('$(_BuildType)' == 'ret') and ('$(BuildProjectName)' == '')">true</PogoInstrument> - <PogoUpdate Condition="('$(BuildArchitecture)' == 'arm') and ('$(_BuildType)' == 'ret') and ('$(BuildProjectName)' == '')">true</PogoUpdate> + <PogoOptimize Condition="('$(BuildArchitecture)' == 'amd64' or '$(BuildArchitecture)' == 'arm')">false</PogoOptimize> + <PogoInstrument Condition="('$(BuildArchitecture)' == 'amd64' or '$(BuildArchitecture)' == 'arm') and ('$(_BuildType)' == 'ret') and ('$(BuildProjectName)' == '')">true</PogoInstrument> + <PogoUpdate Condition="('$(BuildArchitecture)' == 'amd64' or '$(BuildArchitecture)' == 'arm') and ('$(_BuildType)' == 'ret') and ('$(BuildProjectName)' == '')">true</PogoUpdate> <Win32DllLibs Condition="'$(PogoInstrument)' == 'true' and '$(BuildArchitecture)' == 'amd64'">$(Win32DllLibs);$(CrtLibPath)\pgort.lib</Win32DllLibs> <Win32DllLibs Condition="'$(PogoInstrument)' == 'true' and '$(BuildArchitecture)' == 'arm'">$(Win32DllLibs);$(CrtLibPath)\pgort.lib;$(SdkLibPath)\ntdll.lib</Win32DllLibs> <OptimizationDataRelativeDir>$(_BuildArch)\CLR\Base</OptimizationDataRelativeDir> diff --git a/src/jit/earlyprop.cpp b/src/jit/earlyprop.cpp index 70d1012aa0..51de631d19 100644 --- a/src/jit/earlyprop.cpp +++ b/src/jit/earlyprop.cpp @@ -189,8 +189,7 @@ void Compiler::optEarlyProp() // Walk the stmt tree in linear order to rewrite any array length reference with a // constant array length. - bool isRewritten = false; - bool bbHasNullCheck = (block->bbFlags & BBF_HAS_NULLCHECK) != 0; + bool isRewritten = false; for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree != nullptr; tree = tree->gtNext) { if (optEarlyPropRewriteTree(tree)) @@ -238,12 +237,8 @@ bool Compiler::optEarlyPropRewriteTree(GenTreePtr tree) objectRefPtr = tree->gtOp.gtOp1; propKind = optPropKind::OPK_ARRAYLEN; } - else if ((tree->OperGet() == GT_IND) && !varTypeIsStruct(tree)) + else if (tree->OperIsIndir()) { - // TODO-1stClassStructs: The above condition should apply equally to all indirections, - // but previously the implicit indirections due to a struct assignment were not - // considered, so we are currently limiting it to non-structs to preserve existing - // behavior. // optFoldNullCheck takes care of updating statement info if a null check is removed. optFoldNullCheck(tree); @@ -259,7 +254,7 @@ bool Compiler::optEarlyPropRewriteTree(GenTreePtr tree) return false; } - objectRefPtr = tree->gtOp.gtOp1; + objectRefPtr = tree->AsIndir()->Addr(); propKind = optPropKind::OPK_OBJ_GETTYPE; } else @@ -511,15 +506,23 @@ void Compiler::optFoldNullCheck(GenTreePtr tree) // | // x - assert(tree->OperGet() == GT_IND); - if (tree->gtGetOp1()->OperGet() == GT_LCL_VAR) + if ((compCurBB->bbFlags & BBF_HAS_NULLCHECK) == 0) + { + return; + } + + assert(tree->OperIsIndir()); + + GenTree* const addr = tree->AsIndir()->Addr(); + if (addr->OperGet() == GT_LCL_VAR) { // Check if we have the pattern above and find the nullcheck node if we do. // Find the definition of the indirected local (x in the picture) - GenTreePtr indLocalTree = tree->gtGetOp1(); - unsigned lclNum = indLocalTree->AsLclVarCommon()->GetLclNum(); - unsigned ssaNum = indLocalTree->AsLclVarCommon()->GetSsaNum(); + GenTreeLclVarCommon* const lclVarNode = addr->AsLclVarCommon(); + + const unsigned lclNum = lclVarNode->GetLclNum(); + const unsigned ssaNum = lclVarNode->GetSsaNum(); if (ssaNum != SsaConfig::RESERVED_SSA_NUM) { @@ -557,7 +560,7 @@ void Compiler::optFoldNullCheck(GenTreePtr tree) { // Walk from the use to the def in reverse execution order to see // if any nodes have unsafe side effects. - GenTreePtr currentTree = indLocalTree->gtPrev; + GenTreePtr currentTree = lclVarNode->gtPrev; bool isInsideTry = compCurBB->hasTryIndex(); bool canRemoveNullCheck = true; const unsigned maxNodesWalked = 25; @@ -612,13 +615,8 @@ void Compiler::optFoldNullCheck(GenTreePtr tree) additionNode->gtFlags & (GTF_EXCEPT | GTF_DONT_CSE); // Re-morph the statement. - fgMorphBlockStmt(compCurBB, curStmt DEBUGARG("optFoldNullCheck")); - - // Recalculate the gtCostSz, etc... - gtSetStmtInfo(curStmt); - - // Re-thread the nodes - fgSetStmtSeq(curStmt); + fgMorphBlockStmt(compCurBB, + curStmt->AsStmt() DEBUGARG("optFoldNullCheck")); } } } @@ -668,4 +666,4 @@ bool Compiler::optCanMoveNullCheckPastTree(GenTreePtr tree, bool isInsideTry) } } return result; -}
\ No newline at end of file +} diff --git a/src/jit/ee_il_dll.cpp b/src/jit/ee_il_dll.cpp index 527244221e..dcadaa9453 100755..100644 --- a/src/jit/ee_il_dll.cpp +++ b/src/jit/ee_il_dll.cpp @@ -284,21 +284,17 @@ CorJitResult CILJit::compileMethod( return g_realJitCompiler->compileMethod(compHnd, methodInfo, flags, entryAddress, nativeSizeOfCode); } - CORJIT_FLAGS jitFlags = {0}; + JitFlags jitFlags; - DWORD jitFlagsSize = 0; #if COR_JIT_EE_VERSION > 460 - if (flags == CORJIT_FLG_CALL_GETJITFLAGS) - { - jitFlagsSize = compHnd->getJitFlags(&jitFlags, sizeof(jitFlags)); - } -#endif - - assert(jitFlagsSize <= sizeof(jitFlags)); - if (jitFlagsSize == 0) - { - jitFlags.corJitFlags = flags; - } + assert(flags == CORJIT_FLAGS::CORJIT_FLAG_CALL_GETJITFLAGS); + CORJIT_FLAGS corJitFlags; + DWORD jitFlagsSize = compHnd->getJitFlags(&corJitFlags, sizeof(corJitFlags)); + assert(jitFlagsSize == sizeof(corJitFlags)); + jitFlags.SetFromFlags(corJitFlags); +#else // COR_JIT_EE_VERSION <= 460 + jitFlags.SetFromOldFlags(flags, 0); +#endif // COR_JIT_EE_VERSION <= 460 int result; void* methodCodePtr = nullptr; @@ -385,17 +381,31 @@ void CILJit::getVersionIdentifier(GUID* versionIdentifier) /***************************************************************************** * Determine the maximum length of SIMD vector supported by this JIT. */ + +#if COR_JIT_EE_VERSION > 460 +unsigned CILJit::getMaxIntrinsicSIMDVectorLength(CORJIT_FLAGS cpuCompileFlags) +#else unsigned CILJit::getMaxIntrinsicSIMDVectorLength(DWORD cpuCompileFlags) +#endif { if (g_realJitCompiler != nullptr) { return g_realJitCompiler->getMaxIntrinsicSIMDVectorLength(cpuCompileFlags); } -#ifdef _TARGET_AMD64_ + JitFlags jitFlags; + +#if COR_JIT_EE_VERSION > 460 + jitFlags.SetFromFlags(cpuCompileFlags); +#else // COR_JIT_EE_VERSION <= 460 + jitFlags.SetFromOldFlags(cpuCompileFlags, 0); +#endif // COR_JIT_EE_VERSION <= 460 + +#ifdef FEATURE_SIMD +#ifdef _TARGET_XARCH_ #ifdef FEATURE_AVX_SUPPORT - if (((cpuCompileFlags & CORJIT_FLG_PREJIT) == 0) && ((cpuCompileFlags & CORJIT_FLG_FEATURE_SIMD) != 0) && - ((cpuCompileFlags & CORJIT_FLG_USE_AVX2) != 0)) + if (!jitFlags.IsSet(JitFlags::JIT_FLAG_PREJIT) && jitFlags.IsSet(JitFlags::JIT_FLAG_FEATURE_SIMD) && + jitFlags.IsSet(JitFlags::JIT_FLAG_USE_AVX2)) { if (JitConfig.EnableAVX() != 0) { @@ -404,9 +414,10 @@ unsigned CILJit::getMaxIntrinsicSIMDVectorLength(DWORD cpuCompileFlags) } #endif // FEATURE_AVX_SUPPORT return 16; -#else // !_TARGET_AMD64_ +#endif // _TARGET_XARCH_ +#else // !FEATURE_SIMD return 0; -#endif // !_TARGET_AMD64_ +#endif // !FEATURE_SIMD } void CILJit::setRealJit(ICorJitCompiler* realJitCompiler) @@ -1378,7 +1389,7 @@ bool Compiler::eeRunWithErrorTrapImp(void (*function)(void*), void* param) * Utility functions */ -#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD) +#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD) || defined(FEATURE_TRACELOGGING) /*****************************************************************************/ @@ -1526,6 +1537,9 @@ const char* Compiler::eeGetClassName(CORINFO_CLASS_HANDLE clsHnd) const wchar_t* Compiler::eeGetCPString(size_t strHandle) { +#ifdef FEATURE_PAL + return nullptr; +#else char buff[512 + sizeof(CORINFO_String)]; // make this bulletproof, so it works even if we are wrong. @@ -1547,6 +1561,7 @@ const wchar_t* Compiler::eeGetCPString(size_t strHandle) } return (asString->chars); +#endif // FEATURE_PAL } #endif // DEBUG diff --git a/src/jit/ee_il_dll.hpp b/src/jit/ee_il_dll.hpp index d9bf95fde8..3899d92192 100644 --- a/src/jit/ee_il_dll.hpp +++ b/src/jit/ee_il_dll.hpp @@ -21,7 +21,11 @@ class CILJit : public ICorJitCompiler void getVersionIdentifier(GUID* versionIdentifier /* OUT */ ); +#if COR_JIT_EE_VERSION > 460 + unsigned getMaxIntrinsicSIMDVectorLength(CORJIT_FLAGS cpuCompileFlags); +#else unsigned getMaxIntrinsicSIMDVectorLength(DWORD cpuCompileFlags); +#endif void setRealJit(ICorJitCompiler* realJitCompiler); }; diff --git a/src/jit/emit.cpp b/src/jit/emit.cpp index 5c991ddf1b..0929b7392e 100644 --- a/src/jit/emit.cpp +++ b/src/jit/emit.cpp @@ -1264,9 +1264,9 @@ void* emitter::emitAllocInstr(size_t sz, emitAttr opsz) // ARM - This is currently broken on _TARGET_ARM_ // When nopSize is odd we misalign emitCurIGsize // - if (!(emitComp->opts.eeFlags & CORJIT_FLG_PREJIT) && !emitInInstrumentation && - !emitIGisInProlog(emitCurIG) // don't do this in prolog or epilog - && !emitIGisInEpilog(emitCurIG) && + if (!emitComp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) && !emitInInstrumentation && + !emitIGisInProlog(emitCurIG) && // don't do this in prolog or epilog + !emitIGisInEpilog(emitCurIG) && emitRandomNops // sometimes we turn off where exact codegen is needed (pinvoke inline) ) { @@ -1670,13 +1670,9 @@ void emitter::emitCreatePlaceholderIG(insGroupPlaceholderType igType, emitCurIGsize += MAX_PLACEHOLDER_IG_SIZE; emitCurCodeOffset += emitCurIGsize; -#ifdef DEBUGGING_SUPPORT - #if FEATURE_EH_FUNCLETS // Add the appropriate IP mapping debugging record for this placeholder - // group. - - // genExitCode() adds the mapping for main function epilogs + // group. genExitCode() adds the mapping for main function epilogs. if (emitComp->opts.compDbgInfo) { if (igType == IGPT_FUNCLET_PROLOG) @@ -1690,8 +1686,6 @@ void emitter::emitCreatePlaceholderIG(insGroupPlaceholderType igType, } #endif // FEATURE_EH_FUNCLETS -#endif // DEBUGGING_SUPPORT - /* Start a new IG if more code follows */ if (last) @@ -2320,7 +2314,7 @@ bool emitter::emitNoGChelper(unsigned IHX) case CORINFO_HELP_PROF_FCN_LEAVE: case CORINFO_HELP_PROF_FCN_ENTER: -#ifdef _TARGET_AMD64_ +#if defined(_TARGET_AMD64_) || (defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)) case CORINFO_HELP_PROF_FCN_TAILCALL: #endif case CORINFO_HELP_LLSH: @@ -3414,8 +3408,6 @@ size_t emitter::emitIssue1Instr(insGroup* ig, instrDesc* id, BYTE** dp) #endif -#if defined(DEBUGGING_SUPPORT) || defined(DEBUG) - /* Did the size of the instruction match our expectations? */ UNATIVE_OFFSET csz = (UNATIVE_OFFSET)(*dp - curInsAdr); @@ -3447,8 +3439,6 @@ size_t emitter::emitIssue1Instr(insGroup* ig, instrDesc* id, BYTE** dp) #endif } -#endif - #ifdef DEBUG /* Make sure the instruction descriptor size also matches our expectations */ if (is != emitSizeOfInsDsc(id)) @@ -6048,7 +6038,7 @@ unsigned char emitter::emitOutputLong(BYTE* dst, ssize_t val) #ifdef DEBUG if (emitComp->opts.dspEmit) { - printf("; emit_long 0%08XH\n", val); + printf("; emit_long 0%08XH\n", (int)val); } #ifdef _TARGET_AMD64_ // if we're emitting code bytes, ensure that we've already emitted the rex prefix! @@ -6072,9 +6062,9 @@ unsigned char emitter::emitOutputSizeT(BYTE* dst, ssize_t val) if (emitComp->opts.dspEmit) { #ifdef _TARGET_AMD64_ - printf("; emit_size_t 0%016llXH\n", (size_t)val); + printf("; emit_size_t 0%016llXH\n", val); #else // _TARGET_AMD64_ - printf("; emit_size_t 0%08XH\n", (size_t)val); + printf("; emit_size_t 0%08XH\n", val); #endif // _TARGET_AMD64_ } #endif // DEBUG @@ -6082,6 +6072,60 @@ unsigned char emitter::emitOutputSizeT(BYTE* dst, ssize_t val) return sizeof(size_t); } +//------------------------------------------------------------------------ +// Wrappers to emitOutputByte, emitOutputWord, emitOutputLong, emitOutputSizeT +// that take unsigned __int64 or size_t type instead of ssize_t. Used on RyuJIT/x86. +// +// Arguments: +// dst - passed through +// val - passed through +// +// Return Value: +// Same as wrapped function. +// + +#if !defined(LEGACY_BACKEND) && defined(_TARGET_X86_) +unsigned char emitter::emitOutputByte(BYTE* dst, size_t val) +{ + return emitOutputByte(dst, (ssize_t)val); +} + +unsigned char emitter::emitOutputWord(BYTE* dst, size_t val) +{ + return emitOutputWord(dst, (ssize_t)val); +} + +unsigned char emitter::emitOutputLong(BYTE* dst, size_t val) +{ + return emitOutputLong(dst, (ssize_t)val); +} + +unsigned char emitter::emitOutputSizeT(BYTE* dst, size_t val) +{ + return emitOutputSizeT(dst, (ssize_t)val); +} + +unsigned char emitter::emitOutputByte(BYTE* dst, unsigned __int64 val) +{ + return emitOutputByte(dst, (ssize_t)val); +} + +unsigned char emitter::emitOutputWord(BYTE* dst, unsigned __int64 val) +{ + return emitOutputWord(dst, (ssize_t)val); +} + +unsigned char emitter::emitOutputLong(BYTE* dst, unsigned __int64 val) +{ + return emitOutputLong(dst, (ssize_t)val); +} + +unsigned char emitter::emitOutputSizeT(BYTE* dst, unsigned __int64 val) +{ + return emitOutputSizeT(dst, (ssize_t)val); +} +#endif // !defined(LEGACY_BACKEND) && defined(_TARGET_X86_) + /***************************************************************************** * * Given a block cookie and a code position, return the actual code offset; diff --git a/src/jit/emit.h b/src/jit/emit.h index 8fb24bcd60..5b1a395379 100644 --- a/src/jit/emit.h +++ b/src/jit/emit.h @@ -427,6 +427,11 @@ public: // There seem to be some cases where this is used without being initialized via CodeGen::inst_set_SV_var(). emitVarRefOffs = 0; #endif // DEBUG + +#ifdef _TARGET_XARCH_ + SetUseSSE3_4(false); +#endif // _TARGET_XARCH_ + #ifdef FEATURE_AVX_SUPPORT SetUseAVX(false); #endif // FEATURE_AVX_SUPPORT @@ -1659,6 +1664,18 @@ private: unsigned char emitOutputLong(BYTE* dst, ssize_t val); unsigned char emitOutputSizeT(BYTE* dst, ssize_t val); +#if !defined(LEGACY_BACKEND) && defined(_TARGET_X86_) + unsigned char emitOutputByte(BYTE* dst, size_t val); + unsigned char emitOutputWord(BYTE* dst, size_t val); + unsigned char emitOutputLong(BYTE* dst, size_t val); + unsigned char emitOutputSizeT(BYTE* dst, size_t val); + + unsigned char emitOutputByte(BYTE* dst, unsigned __int64 val); + unsigned char emitOutputWord(BYTE* dst, unsigned __int64 val); + unsigned char emitOutputLong(BYTE* dst, unsigned __int64 val); + unsigned char emitOutputSizeT(BYTE* dst, unsigned __int64 val); +#endif // !defined(LEGACY_BACKEND) && defined(_TARGET_X86_) + size_t emitIssue1Instr(insGroup* ig, instrDesc* id, BYTE** dp); size_t emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp); @@ -1742,8 +1759,8 @@ private: BYTE* emitCurIGfreeEndp; // one byte past the last available byte in buffer BYTE* emitCurIGfreeBase; // first byte address - unsigned emitCurIGinsCnt; // # of collected instr's in buffer - unsigned emitCurIGsize; // estimated code size of current group in bytes + unsigned emitCurIGinsCnt; // # of collected instr's in buffer + unsigned emitCurIGsize; // estimated code size of current group in bytes UNATIVE_OFFSET emitCurCodeOffset; // current code offset within group UNATIVE_OFFSET emitTotalCodeSize; // bytes of code in entire method @@ -1822,8 +1839,12 @@ private: void emitInsertIGAfter(insGroup* insertAfterIG, insGroup* ig); void emitNewIG(); + +#if !defined(JIT32_GCENCODER) void emitDisableGC(); void emitEnableGC(); +#endif // !defined(JIT32_GCENCODER) + void emitGenIG(insGroup* ig); insGroup* emitSavIG(bool emitAdd = false); void emitNxtIG(bool emitAdd = false); @@ -2707,6 +2728,7 @@ inline void emitter::emitNewIG() emitGenIG(ig); } +#if !defined(JIT32_GCENCODER) // Start a new instruction group that is not interruptable inline void emitter::emitDisableGC() { @@ -2736,6 +2758,7 @@ inline void emitter::emitEnableGC() // instruction groups. emitForceNewIG = true; } +#endif // !defined(JIT32_GCENCODER) /*****************************************************************************/ #endif // _EMIT_H_ diff --git a/src/jit/emitarm.cpp b/src/jit/emitarm.cpp index 1f57048a80..45928ca2d2 100644 --- a/src/jit/emitarm.cpp +++ b/src/jit/emitarm.cpp @@ -4368,6 +4368,7 @@ void emitter::emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNu * * EC_FUNC_TOKEN : addr is the method address * EC_FUNC_ADDR : addr is the absolute address of the function + * if addr is NULL, it is a recursive call * * If callType is one of these emitCallTypes, addr has to be NULL. * EC_INDIR_R : "call ireg". @@ -4463,13 +4464,11 @@ void emitter::emitIns_Call(EmitCallType callType, assert(argSize % (int)sizeof(void*) == 0); argCnt = argSize / (int)sizeof(void*); -#ifdef DEBUGGING_SUPPORT /* Managed RetVal: emit sequence point for the call */ if (emitComp->opts.compDbgInfo && ilOffset != BAD_IL_OFFSET) { codeGen->genIPmappingAdd(ilOffset, false); } -#endif /* We need to allocate the appropriate instruction descriptor based @@ -4555,8 +4554,8 @@ void emitter::emitIns_Call(EmitCallType callType, assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR); - assert(addr != NULL); - assert(codeGen->validImmForBL((ssize_t)addr)); + // if addr is nullptr then this call is treated as a recursive call. + assert(addr == nullptr || codeGen->arm_Valid_Imm_For_BL((ssize_t)addr)); if (isJump) { @@ -5266,8 +5265,8 @@ BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i) else #endif { - assert(distVal >= -16777216); - assert(distVal <= 16777214); + assert(distVal >= CALL_DIST_MAX_NEG); + assert(distVal <= CALL_DIST_MAX_POS); if (distVal < 0) code |= 1 << 26; @@ -6211,7 +6210,14 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) sz = sizeof(instrDesc); } - addr = id->idAddr()->iiaAddr; + if (id->idAddr()->iiaAddr == NULL) /* a recursive call */ + { + addr = emitCodeBlock; + } + else + { + addr = id->idAddr()->iiaAddr; + } code = emitInsCode(ins, fmt); #ifdef RELOC_SUPPORT diff --git a/src/jit/emitarm64.cpp b/src/jit/emitarm64.cpp index a632ec12c8..12c4087115 100644 --- a/src/jit/emitarm64.cpp +++ b/src/jit/emitarm64.cpp @@ -6738,13 +6738,11 @@ void emitter::emitIns_Call(EmitCallType callType, assert(argSize % REGSIZE_BYTES == 0); argCnt = (int)(argSize / (int)sizeof(void*)); -#ifdef DEBUGGING_SUPPORT /* Managed RetVal: emit sequence point for the call */ if (emitComp->opts.compDbgInfo && ilOffset != BAD_IL_OFFSET) { codeGen->genIPmappingAdd(ilOffset, false); } -#endif /* We need to allocate the appropriate instruction descriptor based diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index d43f766ee8..b6bacfa520 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -30,6 +30,15 @@ bool IsSSE2Instruction(instruction ins) return (ins >= INS_FIRST_SSE2_INSTRUCTION && ins <= INS_LAST_SSE2_INSTRUCTION); } +bool IsSSE4Instruction(instruction ins) +{ +#ifdef LEGACY_BACKEND + return false; +#else + return (ins >= INS_FIRST_SSE4_INSTRUCTION && ins <= INS_LAST_SSE4_INSTRUCTION); +#endif +} + bool IsSSEOrAVXInstruction(instruction ins) { #ifdef FEATURE_AVX_SUPPORT @@ -48,7 +57,9 @@ bool emitter::IsAVXInstruction(instruction ins) #endif } +#ifdef _TARGET_AMD64_ #define REX_PREFIX_MASK 0xFF00000000LL +#endif // _TARGET_AMD64_ #ifdef FEATURE_AVX_SUPPORT // Returns true if the AVX instruction is a binary operator that requires 3 operands. @@ -75,10 +86,8 @@ bool emitter::IsThreeOperandBinaryAVXInstruction(instruction ins) ins == INS_maxss || ins == INS_maxsd || ins == INS_andnps || ins == INS_andnpd || ins == INS_paddb || ins == INS_paddw || ins == INS_paddd || ins == INS_paddq || ins == INS_psubb || ins == INS_psubw || ins == INS_psubd || ins == INS_psubq || ins == INS_pmuludq || ins == INS_pxor || ins == INS_pmaxub || - ins == INS_pminub || ins == INS_pmaxsw || ins == INS_pminsw || ins == INS_insertps || ins == INS_vinsertf128 || - ins == INS_punpckldq - - ); + ins == INS_pminub || ins == INS_pmaxsw || ins == INS_pminsw || ins == INS_insertps || + ins == INS_vinsertf128 || ins == INS_punpckldq || ins == INS_phaddd); } // Returns true if the AVX instruction is a move operator that requires 3 operands. @@ -92,22 +101,45 @@ bool emitter::IsThreeOperandMoveAVXInstruction(instruction ins) return IsAVXInstruction(ins) && (ins == INS_movlpd || ins == INS_movlps || ins == INS_movhpd || ins == INS_movhps || ins == INS_movss); } -#endif // FEATURE_AVX_SUPPORT -// Returns true if the AVX instruction is a 4-byte opcode. +// ------------------------------------------------------------------------------ +// Is4ByteAVXInstruction: Returns true if the AVX instruction is a 4-byte opcode. +// +// Arguments: +// ins - instructions +// // Note that this should be true for any of the instructions in instrsXArch.h // that use the SSE38 or SSE3A macro. +// // TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this // needs to be addressed by expanding instruction encodings. -bool Is4ByteAVXInstruction(instruction ins) +bool emitter::Is4ByteAVXInstruction(instruction ins) { -#ifdef FEATURE_AVX_SUPPORT - return (ins == INS_dpps || ins == INS_dppd || ins == INS_insertps || ins == INS_pcmpeqq || ins == INS_pcmpgtq || + return UseAVX() && + (ins == INS_dpps || ins == INS_dppd || ins == INS_insertps || ins == INS_pcmpeqq || ins == INS_pcmpgtq || ins == INS_vbroadcastss || ins == INS_vbroadcastsd || ins == INS_vpbroadcastb || ins == INS_vpbroadcastw || ins == INS_vpbroadcastd || ins == INS_vpbroadcastq || ins == INS_vextractf128 || ins == INS_vinsertf128 || - ins == INS_pmulld); -#else + ins == INS_pmulld || ins == INS_ptest || ins == INS_phaddd); +} +#endif // FEATURE_AVX_SUPPORT + +// ------------------------------------------------------------------- +// Is4ByteSSE4Instruction: Returns true if the SSE4 instruction +// is a 4-byte opcode. +// +// Arguments: +// ins - instruction +// +// Note that this should be true for any of the instructions in instrsXArch.h +// that use the SSE38 or SSE3A macro. +bool emitter::Is4ByteSSE4Instruction(instruction ins) +{ +#ifdef LEGACY_BACKEND + // On legacy backend SSE3_4 is not enabled. return false; +#else + return UseSSE3_4() && (ins == INS_dpps || ins == INS_dppd || ins == INS_insertps || ins == INS_pcmpeqq || + ins == INS_pcmpgtq || ins == INS_pmulld || ins == INS_ptest || ins == INS_phaddd); #endif } @@ -150,8 +182,9 @@ bool emitter::TakesVexPrefix(instruction ins) // prefix. Based on 'attr' param we could add 2-byte VEX prefix in case of scalar // and AVX-128 bit operations. #define DEFAULT_3BYTE_VEX_PREFIX 0xC4E07800000000ULL -#define LBIT_IN_3BYTE_VEX_PREFIX 0X00000400000000ULL -size_t emitter::AddVexPrefix(instruction ins, size_t code, emitAttr attr) +#define DEFAULT_3BYTE_VEX_PREFIX_MASK 0xFFFFFF00000000ULL +#define LBIT_IN_3BYTE_VEX_PREFIX 0x00000400000000ULL +emitter::code_t emitter::AddVexPrefix(instruction ins, code_t code, emitAttr attr) { // Only AVX instructions require VEX prefix assert(IsAVXInstruction(ins)); @@ -160,6 +193,7 @@ size_t emitter::AddVexPrefix(instruction ins, size_t code, emitAttr attr) assert(!hasVexPrefix(code)); // Set L bit to 1 in case of instructions that operate on 256-bits. + assert((code & DEFAULT_3BYTE_VEX_PREFIX_MASK) == 0); code |= DEFAULT_3BYTE_VEX_PREFIX; if (attr == EA_32BYTE) { @@ -296,25 +330,25 @@ bool IsXMMReg(regNumber reg) } // Returns bits to be encoded in instruction for the given register. -regNumber RegEncoding(regNumber reg) +unsigned RegEncoding(regNumber reg) { #ifndef LEGACY_BACKEND // XMM registers do not share the same reg numbers as integer registers. // But register encoding of integer and XMM registers is the same. // Therefore, subtract XMMBASE from regNumber to get the register encoding // in case of XMM registers. - return (regNumber)((IsXMMReg(reg) ? reg - XMMBASE : reg) & 0x7); + return (unsigned)((IsXMMReg(reg) ? reg - XMMBASE : reg) & 0x7); #else // LEGACY_BACKEND // Legacy X86: XMM registers share the same reg numbers as integer registers and // hence nothing to do to get reg encoding. - return (regNumber)(reg & 0x7); + return (unsigned)(reg & 0x7); #endif // LEGACY_BACKEND } // Utility routines that abstract the logic of adding REX.W, REX.R, REX.X, REX.B and REX prefixes // SSE2: separate 1-byte prefix gets added before opcode. // AVX: specific bits within VEX prefix need to be set in bit-inverted form. -size_t emitter::AddRexWPrefix(instruction ins, size_t code) +emitter::code_t emitter::AddRexWPrefix(instruction ins, code_t code) { #ifdef _TARGET_AMD64_ if (UseAVX() && IsAVXInstruction(ins)) @@ -335,7 +369,7 @@ size_t emitter::AddRexWPrefix(instruction ins, size_t code) #ifdef _TARGET_AMD64_ -size_t emitter::AddRexRPrefix(instruction ins, size_t code) +emitter::code_t emitter::AddRexRPrefix(instruction ins, code_t code) { if (UseAVX() && IsAVXInstruction(ins)) { @@ -349,7 +383,7 @@ size_t emitter::AddRexRPrefix(instruction ins, size_t code) return code | 0x4400000000ULL; } -size_t emitter::AddRexXPrefix(instruction ins, size_t code) +emitter::code_t emitter::AddRexXPrefix(instruction ins, code_t code) { if (UseAVX() && IsAVXInstruction(ins)) { @@ -363,7 +397,7 @@ size_t emitter::AddRexXPrefix(instruction ins, size_t code) return code | 0x4200000000ULL; } -size_t emitter::AddRexBPrefix(instruction ins, size_t code) +emitter::code_t emitter::AddRexBPrefix(instruction ins, code_t code) { if (UseAVX() && IsAVXInstruction(ins)) { @@ -378,12 +412,14 @@ size_t emitter::AddRexBPrefix(instruction ins, size_t code) } // Adds REX prefix (0x40) without W, R, X or B bits set -size_t emitter::AddRexPrefix(instruction ins, size_t code) +emitter::code_t emitter::AddRexPrefix(instruction ins, code_t code) { assert(!UseAVX() || !IsAVXInstruction(ins)); return code | 0x4000000000ULL; } +#endif //_TARGET_AMD64_ + bool isPrefix(BYTE b) { assert(b != 0); // Caller should check this @@ -401,17 +437,15 @@ bool isPrefix(BYTE b) return ((b == 0xF2) || (b == 0xF3) || (b == 0x66)); } -#endif //_TARGET_AMD64_ - // Outputs VEX prefix (in case of AVX instructions) and REX.R/X/W/B otherwise. -unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, size_t& code) +unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code) { -#ifdef _TARGET_AMD64_ // TODO-x86: This needs to be enabled for AVX support on x86. +#ifdef FEATURE_AVX_SUPPORT if (hasVexPrefix(code)) { // Only AVX instructions should have a VEX prefix assert(UseAVX() && IsAVXInstruction(ins)); - size_t vexPrefix = (code >> 32) & 0x00FFFFFF; + code_t vexPrefix = (code >> 32) & 0x00FFFFFF; code &= 0x00000000FFFFFFFFLL; WORD leadingBytes = 0; @@ -504,7 +538,10 @@ unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, s emitOutputByte(dst + 2, vexPrefix & 0xFF); return 3; } - else if (code > 0x00FFFFFFFFLL) +#endif // FEATURE_AVX_SUPPORT + +#ifdef _TARGET_AMD64_ + if (code > 0x00FFFFFFFFLL) { BYTE prefix = (code >> 32) & 0xFF; noway_assert(prefix >= 0x40 && prefix <= 0x4F); @@ -543,13 +580,13 @@ unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, s { // 3 prefixes were rex = rr, check = c1, check2 = c2 encoded as 0xrrc1c2XXXX // Change to c2rrc1XXXX, and emit check2 now - code = (((size_t)prefix << 24) | ((size_t)check << 16) | (code & 0x0000FFFFLL)); + code = (((code_t)prefix << 24) | ((code_t)check << 16) | (code & 0x0000FFFFLL)); } else { // 2 prefixes were rex = rr, check2 = c2 encoded as 0xrrXXc2XXXX, (check is part of the opcode) // Change to c2XXrrXXXX, and emit check2 now - code = (((size_t)check << 24) | ((size_t)prefix << 16) | (code & 0x0000FFFFLL)); + code = (((code_t)check << 24) | ((code_t)prefix << 16) | (code & 0x0000FFFFLL)); } return emitOutputByte(dst, check2); } @@ -593,7 +630,6 @@ void emitter::emitOutputPreEpilogNOP() // Size of rex prefix in bytes unsigned emitter::emitGetRexPrefixSize(instruction ins) { - // In case of AVX instructions, REX prefixes are part of VEX prefix. // And hence requires no additional byte to encode REX prefixes. if (IsAVXInstruction(ins)) @@ -630,7 +666,7 @@ unsigned emitter::emitGetVexPrefixSize(instruction ins, emitAttr attr) //=(opcodeSize - ExtrabytesSize) + vexPrefixSize //=opcodeSize + (vexPrefixSize - ExtrabytesSize) //=opcodeSize + vexPrefixAdjustedSize -unsigned emitter::emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, size_t code) +unsigned emitter::emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, code_t code) { #ifdef FEATURE_AVX_SUPPORT if (IsAVXInstruction(ins)) @@ -674,19 +710,19 @@ unsigned emitter::emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, s } // Get size of rex or vex prefix emitted in code -unsigned emitter::emitGetPrefixSize(size_t code) +unsigned emitter::emitGetPrefixSize(code_t code) { -#ifdef FEATURE_AVX_SUPPORT - if (code & VEX_PREFIX_MASK_3BYTE) + if (hasVexPrefix(code)) { return 3; } - else -#endif - if (code & REX_PREFIX_MASK) + +#ifdef _TARGET_AMD64_ + if (code & REX_PREFIX_MASK) { return 1; } +#endif // _TARGET_AMD64_ return 0; } @@ -1058,7 +1094,7 @@ size_t insCodesMR[] = // clang-format on // Returns true iff the give CPU instruction has an MR encoding. -inline size_t hasCodeMR(instruction ins) +inline bool hasCodeMR(instruction ins) { assert((unsigned)ins < sizeof(insCodesMR) / sizeof(insCodesMR[0])); return ((insCodesMR[ins] != BAD_CODE)); @@ -1083,7 +1119,7 @@ inline size_t insCodeMR(instruction ins) * part of an opcode. */ -inline unsigned emitter::insEncodeReg012(instruction ins, regNumber reg, emitAttr size, size_t* code) +inline unsigned emitter::insEncodeReg012(instruction ins, regNumber reg, emitAttr size, code_t* code) { assert(reg < REG_STK); @@ -1106,16 +1142,16 @@ inline unsigned emitter::insEncodeReg012(instruction ins, regNumber reg, emitAtt } #endif // _TARGET_AMD64_ - reg = RegEncoding(reg); - assert(reg < 8); - return reg; + unsigned regBits = RegEncoding(reg); #else // LEGACY_BACKEND - assert(reg < 8); - return reg; + unsigned regBits = reg; #endif // LEGACY_BACKEND + + assert(regBits < 8); + return regBits; } /***************************************************************************** @@ -1124,7 +1160,7 @@ inline unsigned emitter::insEncodeReg012(instruction ins, regNumber reg, emitAtt * part of an opcode. */ -inline unsigned emitter::insEncodeReg345(instruction ins, regNumber reg, emitAttr size, size_t* code) +inline unsigned emitter::insEncodeReg345(instruction ins, regNumber reg, emitAttr size, code_t* code) { assert(reg < REG_STK); @@ -1147,14 +1183,16 @@ inline unsigned emitter::insEncodeReg345(instruction ins, regNumber reg, emitAtt } #endif // _TARGET_AMD64_ - reg = RegEncoding(reg); - assert(reg < 8); - return (reg << 3); + unsigned regBits = RegEncoding(reg); + +#else // LEGACY_BACKEND + + unsigned regBits = reg; -#else // LEGACY_BACKEND - assert(reg < 8); - return (reg << 3); #endif // LEGACY_BACKEND + + assert(regBits < 8); + return (regBits << 3); } /*********************************************************************************** @@ -1162,7 +1200,7 @@ inline unsigned emitter::insEncodeReg345(instruction ins, regNumber reg, emitAtt * Returns modified AVX opcode with the specified register encoded in bits 3-6 of * byte 2 of VEX prefix. */ -inline size_t emitter::insEncodeReg3456(instruction ins, regNumber reg, emitAttr size, size_t code) +inline emitter::code_t emitter::insEncodeReg3456(instruction ins, regNumber reg, emitAttr size, code_t code) { #ifdef FEATURE_AVX_SUPPORT assert(reg < REG_STK); @@ -1172,7 +1210,7 @@ inline size_t emitter::insEncodeReg3456(instruction ins, regNumber reg, emitAttr // Get 4-bit register encoding // RegEncoding() gives lower 3 bits // IsExtendedReg() gives MSB. - size_t regBits = RegEncoding(reg); + code_t regBits = RegEncoding(reg); if (IsExtendedReg(reg)) { regBits |= 0x08; @@ -1196,7 +1234,7 @@ inline size_t emitter::insEncodeReg3456(instruction ins, regNumber reg, emitAttr * Used exclusively to generate the REX.X bit and truncate the register. */ -inline unsigned emitter::insEncodeRegSIB(instruction ins, regNumber reg, size_t* code) +inline unsigned emitter::insEncodeRegSIB(instruction ins, regNumber reg, code_t* code) { assert(reg < REG_STK); @@ -1210,11 +1248,13 @@ inline unsigned emitter::insEncodeRegSIB(instruction ins, regNumber reg, size_t* { *code = AddRexXPrefix(ins, *code); // REX.X } - reg = RegEncoding(reg); -#endif + unsigned regBits = RegEncoding(reg); +#else // !_TARGET_AMD64_ + unsigned regBits = reg; +#endif // !_TARGET_AMD64_ - assert(reg < 8); - return reg; + assert(regBits < 8); + return regBits; } /***************************************************************************** @@ -1222,7 +1262,7 @@ inline unsigned emitter::insEncodeRegSIB(instruction ins, regNumber reg, size_t* * Returns the "[r/m]" opcode with the mod/RM field set to register. */ -inline size_t emitter::insEncodeMRreg(instruction ins, size_t code) +inline emitter::code_t emitter::insEncodeMRreg(instruction ins, code_t code) { // If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes. // Otherwise, it will be placed after the 4 byte encoding. @@ -1237,22 +1277,10 @@ inline size_t emitter::insEncodeMRreg(instruction ins, size_t code) /***************************************************************************** * - * Returns the "[r/m], icon" opcode with the mod/RM field set to register. - */ - -inline size_t insEncodeMIreg(instruction ins, size_t code) -{ - assert((code & 0xC000) == 0); - code |= 0xC000; - return code; -} - -/***************************************************************************** - * * Returns the given "[r/m]" opcode with the mod/RM field set to register. */ -inline size_t insEncodeRMreg(instruction ins, size_t code) +inline emitter::code_t emitter::insEncodeRMreg(instruction ins, code_t code) { // If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes. // Otherwise, it will be placed after the 4 byte encoding. @@ -1270,7 +1298,7 @@ inline size_t insEncodeRMreg(instruction ins, size_t code) * the given register. */ -inline size_t emitter::insEncodeMRreg(instruction ins, regNumber reg, emitAttr size, size_t code) +inline emitter::code_t emitter::insEncodeMRreg(instruction ins, regNumber reg, emitAttr size, code_t code) { assert((code & 0xC000) == 0); code |= 0xC000; @@ -1285,7 +1313,7 @@ inline size_t emitter::insEncodeMRreg(instruction ins, regNumber reg, emitAttr s * the given register. */ -inline size_t emitter::insEncodeMIreg(instruction ins, regNumber reg, emitAttr size, size_t code) +inline emitter::code_t emitter::insEncodeMIreg(instruction ins, regNumber reg, emitAttr size, code_t code) { assert((code & 0xC000) == 0); code |= 0xC000; @@ -1310,12 +1338,12 @@ inline bool insNeedsRRIb(instruction ins) * Returns the "reg,reg,imm8" opcode with both the reg's set to the * the given register. */ -inline size_t emitter::insEncodeRRIb(instruction ins, regNumber reg, emitAttr size) +inline emitter::code_t emitter::insEncodeRRIb(instruction ins, regNumber reg, emitAttr size) { assert(size == EA_4BYTE); // All we handle for now. assert(insNeedsRRIb(ins)); // If this list gets longer, use a switch, or a table lookup. - size_t code = 0x69c0; + code_t code = 0x69c0; unsigned regcode = insEncodeReg012(ins, reg, size, &code); // We use the same register as source and destination. (Could have another version that does both regs...) code |= regcode; @@ -1329,9 +1357,9 @@ inline size_t emitter::insEncodeRRIb(instruction ins, regNumber reg, emitAttr si * nibble of the opcode */ -inline size_t emitter::insEncodeOpreg(instruction ins, regNumber reg, emitAttr size) +inline emitter::code_t emitter::insEncodeOpreg(instruction ins, regNumber reg, emitAttr size) { - size_t code = insCodeRR(ins); + code_t code = insCodeRR(ins); unsigned regcode = insEncodeReg012(ins, reg, size, &code); code |= regcode; return code; @@ -1342,7 +1370,7 @@ inline size_t emitter::insEncodeOpreg(instruction ins, regNumber reg, emitAttr s * Return the 'SS' field value for the given index scale factor. */ -inline unsigned insSSval(unsigned scale) +inline unsigned emitter::insSSval(unsigned scale) { assert(scale == 1 || scale == 2 || scale == 4 || scale == 8); @@ -1447,7 +1475,7 @@ bool emitter::emitVerifyEncodable(instruction ins, emitAttr size, regNumber reg1 * Estimate the size (in bytes of generated code) of the given instruction. */ -inline UNATIVE_OFFSET emitter::emitInsSize(size_t code) +inline UNATIVE_OFFSET emitter::emitInsSize(code_t code) { UNATIVE_OFFSET size = (code & 0xFF000000) ? 4 : (code & 0x00FF0000) ? 3 : 2; #ifdef _TARGET_AMD64_ @@ -1466,18 +1494,17 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instruction ins, regNumber reg1, re emitAttr size = EA_SIZE(attr); UNATIVE_OFFSET sz; -#ifdef _TARGET_AMD64_ - // If Byte 4 (which is 0xFF00) is non-zero, that's where the RM encoding goes. + + // If Byte 4 (which is 0xFF00) is zero, that's where the RM encoding goes. // Otherwise, it will be placed after the 4 byte encoding, making the total 5 bytes. // This would probably be better expressed as a different format or something? - if (insCodeRM(ins) & 0xFF00) + if ((insCodeRM(ins) & 0xFF00) != 0) { sz = 5; } else -#endif // _TARGET_AMD64_ { - size_t code = insCodeRM(ins); + code_t code = insCodeRM(ins); sz = emitInsSize(insEncodeRMreg(ins, code)); } @@ -1502,7 +1529,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instruction ins, regNumber reg1, re /*****************************************************************************/ -inline UNATIVE_OFFSET emitter::emitInsSizeSV(size_t code, int var, int dsp) +inline UNATIVE_OFFSET emitter::emitInsSizeSV(code_t code, int var, int dsp) { UNATIVE_OFFSET size = emitInsSize(code); UNATIVE_OFFSET offs; @@ -1777,7 +1804,7 @@ static bool baseRegisterRequiresDisplacement(regNumber base) #endif } -UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, size_t code) +UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code) { emitAttr attrSize = id->idOpSize(); instruction ins = id->idIns(); @@ -1994,7 +2021,7 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, size_t code) return size; } -inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, size_t code, int val) +inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code, int val) { instruction ins = id->idIns(); UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize()); @@ -2027,7 +2054,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, size_t code, int val return valSize + emitInsSizeAM(id, code); } -inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, size_t code) +inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code) { instruction ins = id->idIns(); @@ -2047,7 +2074,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, size_t code) return size + emitInsSize(code); } -inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, size_t code, int val) +inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code, int val) { instruction ins = id->idIns(); UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize()); @@ -2252,7 +2279,7 @@ void emitter::emitIns(instruction ins) { UNATIVE_OFFSET sz; instrDesc* id = emitNewInstr(); - size_t code = insCodeMR(ins); + code_t code = insCodeMR(ins); #ifdef DEBUG #if FEATURE_STACK_FP_X87 @@ -2328,7 +2355,7 @@ void emitter::emitIns(instruction ins, emitAttr attr) { UNATIVE_OFFSET sz; instrDesc* id = emitNewInstr(attr); - size_t code = insCodeMR(ins); + code_t code = insCodeMR(ins); assert(ins == INS_cdq); assert((code & 0xFFFFFF00) == 0); sz = 1; @@ -2499,8 +2526,9 @@ void emitter::emitHandleMemOp(GenTreeIndir* indir, instrDesc* id, insFormat fmt, // Absolute addresses marked as contained should fit within the base of addr mode. assert(memBase->AsIntConCommon()->FitsInAddrBase(emitComp)); - // Either not generating relocatable code or addr must be an icon handle - assert(!emitComp->opts.compReloc || memBase->IsIconHandle()); + // Either not generating relocatable code, or addr must be an icon handle, or the + // constant is zero (which we won't generate a relocation for). + assert(!emitComp->opts.compReloc || memBase->IsIconHandle() || memBase->IsIntegralConst(0)); if (memBase->AsIntConCommon()->AddrNeedsReloc(emitComp)) { @@ -2904,6 +2932,19 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G varNum = tmpDsc->tdTempNum(); offset = 0; } + else + { + // At this point we must have a memory operand that is a contained indir: if we do not, we should have handled + // this instruction above in the reg/imm or reg/reg case. + assert(mem != nullptr); + assert(memBase != nullptr); + + if (memBase->OperGet() == GT_LCL_VAR_ADDR) + { + varNum = memBase->AsLclVarCommon()->GetLclNum(); + offset = 0; + } + } // Spill temp numbers are negative and start with -1 // which also happens to be BAD_VAR_NUM. For this reason @@ -2911,7 +2952,7 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G if (varNum != BAD_VAR_NUM || tmpDsc != nullptr) { // Is the memory op in the source position? - if (src->isContainedLclField() || src->isContainedLclVar() || src->isContainedSpillTemp()) + if (src->isContainedMemoryOp()) { if (instrHasImplicitRegPairDest(ins)) { @@ -3351,22 +3392,7 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg) dispIns(id); emitCurIGsize += sz; -#if !FEATURE_FIXED_OUT_ARGS - - if (ins == INS_push) - { - emitCurStackLvl += emitCntStackDepth; - - if (emitMaxStackDepth < emitCurStackLvl) - emitMaxStackDepth = emitCurStackLvl; - } - else if (ins == INS_pop) - { - emitCurStackLvl -= emitCntStackDepth; - assert((int)emitCurStackLvl >= 0); - } - -#endif // !FEATURE_FIXED_OUT_ARGS + emitAdjustStackDepthPushPop(ins); } /***************************************************************************** @@ -3484,7 +3510,7 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t sz += emitGetRexPrefixSize(ins); } -#ifdef _TARGET_X86_ +#if defined(_TARGET_X86_) && defined(LEGACY_BACKEND) assert(reg < 8); #endif @@ -3504,34 +3530,10 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t dispIns(id); emitCurIGsize += sz; -#if !FEATURE_FIXED_OUT_ARGS - if (reg == REG_ESP) { - if (emitCntStackDepth) - { - if (ins == INS_sub) - { - S_UINT32 newStackLvl(emitCurStackLvl); - newStackLvl += S_UINT32(val); - noway_assert(!newStackLvl.IsOverflow()); - - emitCurStackLvl = newStackLvl.Value(); - - if (emitMaxStackDepth < emitCurStackLvl) - emitMaxStackDepth = emitCurStackLvl; - } - else if (ins == INS_add) - { - S_UINT32 newStackLvl = S_UINT32(emitCurStackLvl) - S_UINT32(val); - noway_assert(!newStackLvl.IsOverflow()); - - emitCurStackLvl = newStackLvl.Value(); - } - } + emitAdjustStackDepth(ins, val); } - -#endif // !FEATURE_FIXED_OUT_ARGS } /***************************************************************************** @@ -3584,17 +3586,7 @@ void emitter::emitIns_I(instruction ins, emitAttr attr, int val) dispIns(id); emitCurIGsize += sz; -#if !FEATURE_FIXED_OUT_ARGS - - if (ins == INS_push) - { - emitCurStackLvl += emitCntStackDepth; - - if (emitMaxStackDepth < emitCurStackLvl) - emitMaxStackDepth = emitCurStackLvl; - } - -#endif // !FEATURE_FIXED_OUT_ARGS + emitAdjustStackDepthPushPop(ins); } /***************************************************************************** @@ -3693,22 +3685,7 @@ void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fld dispIns(id); emitCurIGsize += sz; -#if !FEATURE_FIXED_OUT_ARGS - - if (ins == INS_push) - { - emitCurStackLvl += emitCntStackDepth; - - if (emitMaxStackDepth < emitCurStackLvl) - emitMaxStackDepth = emitCurStackLvl; - } - else if (ins == INS_pop) - { - emitCurStackLvl -= emitCntStackDepth; - assert((int)emitCurStackLvl >= 0); - } - -#endif // !FEATURE_FIXED_OUT_ARGS + emitAdjustStackDepthPushPop(ins); } /***************************************************************************** @@ -3757,11 +3734,14 @@ void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNum void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival) { - // SSE2 version requires 5 bytes and AVX version 6 bytes + // SSE2 version requires 5 bytes and SSE4/AVX version 6 bytes UNATIVE_OFFSET sz = 4; if (IsSSEOrAVXInstruction(ins)) { - sz = UseAVX() ? 6 : 5; + // AVX: 3 byte VEX prefix + 1 byte opcode + 1 byte ModR/M + 1 byte immediate + // SSE4: 4 byte opcode + 1 byte ModR/M + 1 byte immediate + // SSE2: 3 byte opcode + 1 byte ModR/M + 1 byte immediate + sz = (UseAVX() || UseSSE3_4()) ? 6 : 5; } #ifdef _TARGET_AMD64_ @@ -4014,7 +3994,7 @@ void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE f id->idIns(ins); id->idInsFmt(fmt); - size_t code = insCodeMI(ins); + code_t code = insCodeMI(ins); UNATIVE_OFFSET sz = emitInsSizeCV(id, code, val); #ifdef _TARGET_AMD64_ @@ -4387,22 +4367,7 @@ void emitter::emitIns_AR_R( dispIns(id); emitCurIGsize += sz; -#if !FEATURE_FIXED_OUT_ARGS - - if (ins == INS_push) - { - emitCurStackLvl += emitCntStackDepth; - - if (emitMaxStackDepth < emitCurStackLvl) - emitMaxStackDepth = emitCurStackLvl; - } - else if (ins == INS_pop) - { - emitCurStackLvl -= emitCntStackDepth; - assert((int)emitCurStackLvl >= 0); - } - -#endif // !FEATURE_FIXED_OUT_ARGS + emitAdjustStackDepthPushPop(ins); } void emitter::emitIns_AI_R(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp) @@ -4443,22 +4408,7 @@ void emitter::emitIns_AI_R(instruction ins, emitAttr attr, regNumber ireg, ssize dispIns(id); emitCurIGsize += sz; -#if !FEATURE_FIXED_OUT_ARGS - - if (ins == INS_push) - { - emitCurStackLvl += emitCntStackDepth; - - if (emitMaxStackDepth < emitCurStackLvl) - emitMaxStackDepth = emitCurStackLvl; - } - else if (ins == INS_pop) - { - emitCurStackLvl -= emitCntStackDepth; - assert((int)emitCurStackLvl >= 0); - } - -#endif // !FEATURE_FIXED_OUT_ARGS + emitAdjustStackDepthPushPop(ins); } void emitter::emitIns_I_ARR(instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, int disp) @@ -4575,22 +4525,7 @@ void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regN dispIns(id); emitCurIGsize += sz; -#if !FEATURE_FIXED_OUT_ARGS - - if (ins == INS_push) - { - emitCurStackLvl += emitCntStackDepth; - - if (emitMaxStackDepth < emitCurStackLvl) - emitMaxStackDepth = emitCurStackLvl; - } - else if (ins == INS_pop) - { - emitCurStackLvl -= emitCntStackDepth; - assert((int)emitCurStackLvl >= 0); - } - -#endif // !FEATURE_FIXED_OUT_ARGS + emitAdjustStackDepthPushPop(ins); } void emitter::emitIns_I_ARX( @@ -4711,22 +4646,7 @@ void emitter::emitIns_ARX_R( dispIns(id); emitCurIGsize += sz; -#if !FEATURE_FIXED_OUT_ARGS - - if (ins == INS_push) - { - emitCurStackLvl += emitCntStackDepth; - - if (emitMaxStackDepth < emitCurStackLvl) - emitMaxStackDepth = emitCurStackLvl; - } - else if (ins == INS_pop) - { - emitCurStackLvl -= emitCntStackDepth; - assert((int)emitCurStackLvl >= 0); - } - -#endif // !FEATURE_FIXED_OUT_ARGS + emitAdjustStackDepthPushPop(ins); } void emitter::emitIns_I_AX(instruction ins, emitAttr attr, int val, regNumber reg, unsigned mul, int disp) @@ -4842,22 +4762,7 @@ void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNu dispIns(id); emitCurIGsize += sz; -#if !FEATURE_FIXED_OUT_ARGS - - if (ins == INS_push) - { - emitCurStackLvl += emitCntStackDepth; - - if (emitMaxStackDepth < emitCurStackLvl) - emitMaxStackDepth = emitCurStackLvl; - } - else if (ins == INS_pop) - { - emitCurStackLvl -= emitCntStackDepth; - assert((int)emitCurStackLvl >= 0); - } - -#endif // !FEATURE_FIXED_OUT_ARGS + emitAdjustStackDepthPushPop(ins); } /***************************************************************************** @@ -4901,22 +4806,7 @@ void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs) dispIns(id); emitCurIGsize += sz; -#if !FEATURE_FIXED_OUT_ARGS - - if (ins == INS_push) - { - emitCurStackLvl += emitCntStackDepth; - - if (emitMaxStackDepth < emitCurStackLvl) - emitMaxStackDepth = emitCurStackLvl; - } - else if (ins == INS_pop) - { - emitCurStackLvl -= emitCntStackDepth; - assert((int)emitCurStackLvl >= 0); - } - -#endif // !FEATURE_FIXED_OUT_ARGS + emitAdjustStackDepthPushPop(ins); } void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs) @@ -5197,8 +5087,23 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount /* = 0 dispIns(id); emitCurIGsize += sz; + emitAdjustStackDepthPushPop(ins); +} + #if !FEATURE_FIXED_OUT_ARGS +//------------------------------------------------------------------------ +// emitAdjustStackDepthPushPop: Adjust the current and maximum stack depth. +// +// Arguments: +// ins - the instruction. Only INS_push and INS_pop adjust the stack depth. +// +// Notes: +// 1. Alters emitCurStackLvl and possibly emitMaxStackDepth. +// 2. emitCntStackDepth must be set (0 in prolog/epilog, one DWORD elsewhere) +// +void emitter::emitAdjustStackDepthPushPop(instruction ins) +{ if (ins == INS_push) { emitCurStackLvl += emitCntStackDepth; @@ -5206,10 +5111,53 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount /* = 0 if (emitMaxStackDepth < emitCurStackLvl) emitMaxStackDepth = emitCurStackLvl; } + else if (ins == INS_pop) + { + emitCurStackLvl -= emitCntStackDepth; + assert((int)emitCurStackLvl >= 0); + } +} -#endif // !FEATURE_FIXED_OUT_ARGS +//------------------------------------------------------------------------ +// emitAdjustStackDepth: Adjust the current and maximum stack depth. +// +// Arguments: +// ins - the instruction. Only INS_add and INS_sub adjust the stack depth. +// It is assumed that the add/sub is on the stack pointer. +// val - the number of bytes to add to or subtract from the stack pointer. +// +// Notes: +// 1. Alters emitCurStackLvl and possibly emitMaxStackDepth. +// 2. emitCntStackDepth must be set (0 in prolog/epilog, one DWORD elsewhere) +// +void emitter::emitAdjustStackDepth(instruction ins, ssize_t val) +{ + // If we're in the prolog or epilog, or otherwise not tracking the stack depth, just return. + if (emitCntStackDepth == 0) + return; + + if (ins == INS_sub) + { + S_UINT32 newStackLvl(emitCurStackLvl); + newStackLvl += S_UINT32(val); + noway_assert(!newStackLvl.IsOverflow()); + + emitCurStackLvl = newStackLvl.Value(); + + if (emitMaxStackDepth < emitCurStackLvl) + emitMaxStackDepth = emitCurStackLvl; + } + else if (ins == INS_add) + { + S_UINT32 newStackLvl = S_UINT32(emitCurStackLvl) - S_UINT32(val); + noway_assert(!newStackLvl.IsOverflow()); + + emitCurStackLvl = newStackLvl.Value(); + } } +#endif // EMIT_TRACK_STACK_DEPTH + /***************************************************************************** * * Add a call instruction (direct or indirect). @@ -5393,13 +5341,11 @@ void emitter::emitIns_Call(EmitCallType callType, assert(argSize % sizeof(void*) == 0); argCnt = (int)(argSize / (ssize_t)sizeof(void*)); // we need a signed-divide -#ifdef DEBUGGING_SUPPORT /* Managed RetVal: emit sequence point for the call */ if (emitComp->opts.compDbgInfo && ilOffset != BAD_IL_OFFSET) { codeGen->genIPmappingAdd(ilOffset, false); } -#endif /* We need to allocate the appropriate instruction descriptor based @@ -5793,9 +5739,18 @@ const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varName) return emitXMMregName(reg); case EA_8BYTE: + if ((REG_XMM0 <= reg) && (reg <= REG_XMM15)) + { + return emitXMMregName(reg); + } break; case EA_4BYTE: + if ((REG_XMM0 <= reg) && (reg <= REG_XMM15)) + { + return emitXMMregName(reg); + } + if (reg > REG_R15) { break; @@ -5880,10 +5835,24 @@ const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varName) case EA_16BYTE: return emitXMMregName(reg); -#endif // LEGACY_BACKEND + case EA_8BYTE: + if ((REG_XMM0 <= reg) && (reg <= REG_XMM7)) + { + return emitXMMregName(reg); + } + break; + + case EA_4BYTE: + if ((REG_XMM0 <= reg) && (reg <= REG_XMM7)) + { + return emitXMMregName(reg); + } + break; +#else // LEGACY_BACKEND case EA_4BYTE: break; +#endif // LEGACY_BACKEND case EA_2BYTE: rn++; @@ -6661,9 +6630,9 @@ void emitter::emitDispIns( printf(" %-9s", sstr); } #ifndef FEATURE_PAL - if (strnlen_s(sstr, 10) > 8) + if (strnlen_s(sstr, 10) >= 8) #else // FEATURE_PAL - if (strnlen(sstr, 10) > 8) + if (strnlen(sstr, 10) >= 8) #endif // FEATURE_PAL { printf(" "); @@ -6808,17 +6777,8 @@ void emitter::emitDispIns( case IF_RRD_ARD: case IF_RWR_ARD: case IF_RRW_ARD: - if (IsAVXInstruction(ins)) - { - printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), sstr); - } - else if (IsSSE2Instruction(ins)) - { - printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), sstr); - } - else #ifdef _TARGET_AMD64_ - if (ins == INS_movsxd) + if (ins == INS_movsxd) { printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), sstr); } @@ -6841,18 +6801,7 @@ void emitter::emitDispIns( printf("%s", sstr); emitDispAddrMode(id); - if (IsAVXInstruction(ins)) - { - printf(", %s", emitYMMregName((unsigned)id->idReg1())); - } - else if (IsSSE2Instruction(ins)) - { - printf(", %s", emitXMMregName((unsigned)id->idReg1())); - } - else - { - printf(", %s", emitRegName(id->idReg1(), attr)); - } + printf(", %s", emitRegName(id->idReg1(), attr)); break; case IF_ARD_CNS: @@ -6930,18 +6879,7 @@ void emitter::emitDispIns( emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), id->idDebugOnlyInfo()->idVarRefOffs, asmfm); - if (IsAVXInstruction(ins)) - { - printf(", %s", emitYMMregName((unsigned)id->idReg1())); - } - else if (IsSSE2Instruction(ins)) - { - printf(", %s", emitXMMregName((unsigned)id->idReg1())); - } - else - { - printf(", %s", emitRegName(id->idReg1(), attr)); - } + printf(", %s", emitRegName(id->idReg1(), attr)); break; case IF_SRD_CNS: @@ -6983,17 +6921,8 @@ void emitter::emitDispIns( case IF_RRD_SRD: case IF_RWR_SRD: case IF_RRW_SRD: - if (IsAVXInstruction(ins)) - { - printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), sstr); - } - else if (IsSSE2Instruction(ins)) - { - printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), sstr); - } - else #ifdef _TARGET_AMD64_ - if (ins == INS_movsxd) + if (ins == INS_movsxd) { printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), sstr); } @@ -7016,36 +6945,31 @@ void emitter::emitDispIns( case IF_RRD_RRD: case IF_RWR_RRD: case IF_RRW_RRD: - if (ins == INS_mov_i2xmm) { - printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), emitRegName(id->idReg2(), attr)); + printf("%s, %s", emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr)); } else if (ins == INS_mov_xmm2i) { - printf("%s, %s", emitRegName(id->idReg2(), attr), emitXMMregName((unsigned)id->idReg1())); + printf("%s, %s", emitRegName(id->idReg2(), attr), emitRegName(id->idReg1(), EA_16BYTE)); + } + else if (ins == INS_pmovmskb) + { + printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr)); } #ifndef LEGACY_BACKEND else if ((ins == INS_cvtsi2ss) || (ins == INS_cvtsi2sd)) { - printf(" %s, %s", emitXMMregName((unsigned)id->idReg1()), emitRegName(id->idReg2(), attr)); + printf(" %s, %s", emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr)); } #endif else if ((ins == INS_cvttsd2si) #ifndef LEGACY_BACKEND || (ins == INS_cvtss2si) || (ins == INS_cvtsd2si) || (ins == INS_cvttss2si) #endif - ) - { - printf(" %s, %s", emitRegName(id->idReg1(), attr), emitXMMregName((unsigned)id->idReg2())); - } - else if (IsAVXInstruction(ins)) - { - printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), emitYMMregName((unsigned)id->idReg2())); - } - else if (IsSSE2Instruction(ins)) + || 0) { - printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), emitXMMregName((unsigned)id->idReg2())); + printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE)); } #ifdef _TARGET_AMD64_ else if (ins == INS_movsxd) @@ -7079,16 +7003,8 @@ void emitter::emitDispIns( break; #endif case IF_RRW_RRW_CNS: - if (IsAVXInstruction(ins)) - { - printf("%s,", emitYMMregName((unsigned)id->idReg1()), attr); - printf(" %s", emitYMMregName((unsigned)id->idReg2()), attr); - } - else - { - printf("%s,", emitRegName(id->idReg1(), attr)); - printf(" %s", emitRegName(id->idReg2(), attr)); - } + printf("%s,", emitRegName(id->idReg1(), attr)); + printf(" %s", emitRegName(id->idReg2(), attr)); val = emitGetInsSC(id); #ifdef _TARGET_AMD64_ // no 8-byte immediates allowed here! @@ -7133,18 +7049,7 @@ void emitter::emitDispIns( attr = EA_PTRSIZE; } #endif - if (IsAVXInstruction(ins)) - { - printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), sstr); - } - else if (IsSSE2Instruction(ins)) - { - printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), sstr); - } - else - { - printf("%s, %s", emitRegName(id->idReg1(), attr), sstr); - } + printf("%s, %s", emitRegName(id->idReg1(), attr), sstr); offs = emitGetInsDsp(id); emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); break; @@ -7521,7 +7426,7 @@ static BYTE* emitOutputNOP(BYTE* dst, size_t nBytes) * Output an instruction involving an address mode. */ -BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) +BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) { regNumber reg; regNumber rgx; @@ -7543,7 +7448,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) // Special case: call via a register if (id->idIsCallRegPtr()) { - size_t opcode = insEncodeMRreg(INS_call, reg, EA_PTRSIZE, insCodeMR(INS_call)); + code_t opcode = insEncodeMRreg(INS_call, reg, EA_PTRSIZE, insCodeMR(INS_call)); dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, opcode); dst += emitOutputWord(dst, opcode); @@ -7559,13 +7464,15 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) if (IsExtendedReg(reg, EA_PTRSIZE)) { insEncodeReg012(ins, reg, EA_PTRSIZE, &code); - reg = RegEncoding(reg); + // TODO-Cleanup: stop casting RegEncoding() back to a regNumber. + reg = (regNumber)RegEncoding(reg); } if (IsExtendedReg(rgx, EA_PTRSIZE)) { insEncodeRegSIB(ins, rgx, &code); - rgx = RegEncoding(rgx); + // TODO-Cleanup: stop casting RegEncoding() back to a regNumber. + rgx = (regNumber)RegEncoding(rgx); } // And emit the REX prefix @@ -7605,7 +7512,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) // For this format, moves do not support a third operand, so we only need to handle the binary ops. if (IsThreeOperandBinaryAVXInstruction(ins)) { - // Encode source operand reg in 'vvvv' bits in 1's compliement form + // Encode source operand reg in 'vvvv' bits in 1's complement form // The order of operands are reversed, therefore use reg2 as the source. code = insEncodeReg3456(ins, id->idReg1(), size, code); } @@ -7619,13 +7526,15 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) if (IsExtendedReg(reg, EA_PTRSIZE)) { insEncodeReg012(ins, reg, EA_PTRSIZE, &code); - reg = RegEncoding(reg); + // TODO-Cleanup: stop casting RegEncoding() back to a regNumber. + reg = (regNumber)RegEncoding(reg); } if (IsExtendedReg(rgx, EA_PTRSIZE)) { insEncodeRegSIB(ins, rgx, &code); - rgx = RegEncoding(rgx); + // TODO-Cleanup: stop casting RegEncoding() back to a regNumber. + rgx = (regNumber)RegEncoding(rgx); } // Is this a 'big' opcode? @@ -8185,7 +8094,7 @@ DONE: * Output an instruction involving a stack frame value. */ -BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) +BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) { int adr; int dsp; @@ -8234,7 +8143,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) // Special case emitting AVX instructions if (Is4ByteAVXInstruction(ins)) { - size_t regcode = insEncodeReg345(ins, id->idReg1(), size, &code); + unsigned regcode = insEncodeReg345(ins, id->idReg1(), size, &code); dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); // Emit last opcode byte @@ -8581,7 +8490,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) * Output an instruction with a static data member (class variable). */ -BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) +BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) { BYTE* addr; CORINFO_FIELD_HANDLE fldh; @@ -8646,20 +8555,18 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) { case IF_RWR_MRD: - assert((unsigned)code == - (insCodeRM(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500)); + assert(code == (insCodeRM(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500)); - code &= ~((size_t)0xFFFFFFFF); + code &= ~((code_t)0xFFFFFFFF); code |= 0xA0; isMoffset = true; break; case IF_MWR_RRD: - assert((unsigned)code == - (insCodeMR(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500)); + assert(code == (insCodeMR(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500)); - code &= ~((size_t)0xFFFFFFFF); + code &= ~((code_t)0xFFFFFFFF); code |= 0xA2; isMoffset = true; break; @@ -8674,7 +8581,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) // Special case emitting AVX instructions if (Is4ByteAVXInstruction(ins)) { - size_t regcode = insEncodeReg345(ins, id->idReg1(), size, &code); + unsigned regcode = insEncodeReg345(ins, id->idReg1(), size, &code); dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); // Emit last opcode byte @@ -9017,7 +8924,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc) BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id) { - size_t code; + code_t code; instruction ins = id->idIns(); regNumber reg = id->idReg1(); @@ -9228,7 +9135,7 @@ BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id) BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) { - size_t code; + code_t code; instruction ins = id->idIns(); regNumber reg1 = id->idReg1(); @@ -9238,7 +9145,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) // Get the 'base' opcode code = insCodeRM(ins); code = AddVexPrefixIfNeeded(ins, code, size); - if (IsSSE2Instruction(ins) || IsAVXInstruction(ins)) + if (IsSSEOrAVXInstruction(ins)) { code = insEncodeRMreg(ins, code); @@ -9322,12 +9229,12 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) // now we use the single source as source1 and source2. if (IsThreeOperandBinaryAVXInstruction(ins)) { - // encode source/dest operand reg in 'vvvv' bits in 1's compliement form + // encode source/dest operand reg in 'vvvv' bits in 1's complement form code = insEncodeReg3456(ins, reg1, size, code); } else if (IsThreeOperandMoveAVXInstruction(ins)) { - // encode source operand reg in 'vvvv' bits in 1's compliement form + // encode source operand reg in 'vvvv' bits in 1's complement form code = insEncodeReg3456(ins, reg2, size, code); } @@ -9340,6 +9247,13 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) // Output the highest word of the opcode dst += emitOutputWord(dst, code >> 16); code &= 0x0000FFFF; + + if (Is4ByteSSE4Instruction(ins)) + { + // Output 3rd byte of the opcode + dst += emitOutputByte(dst, code); + code &= 0xFF00; + } } else if (code & 0x00FF0000) { @@ -9349,13 +9263,13 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) // If byte 4 is 0xC0, then it contains the Mod/RM encoding for a 3-byte // encoding. Otherwise, this is an instruction with a 4-byte encoding, - // and the MOd/RM encoding needs to go in the 5th byte. + // and the Mod/RM encoding needs to go in the 5th byte. // TODO-XArch-CQ: Currently, this will only support registers in the 5th byte. // We probably need a different mechanism to identify the 4-byte encodings. if ((code & 0xFF) == 0x00) { - // This case happens for AVX instructions only - assert(IsAVXInstruction(ins)); + // This case happens for SSE4/AVX instructions only + assert(IsAVXInstruction(ins) || IsSSE4Instruction(ins)); if ((code & 0xFF00) == 0xC000) { dst += emitOutputByte(dst, (0xC0 | regCode)); @@ -9560,7 +9474,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) #ifdef FEATURE_AVX_SUPPORT BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id) { - size_t code; + code_t code; instruction ins = id->idIns(); assert(IsAVXInstruction(ins)); @@ -9642,7 +9556,7 @@ BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id) BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id) { - size_t code; + code_t code; emitAttr size = id->idOpSize(); instruction ins = id->idIns(); regNumber reg = id->idReg1(); @@ -10004,7 +9918,7 @@ DONE: BYTE* emitter::emitOutputIV(BYTE* dst, instrDesc* id) { - size_t code; + code_t code; instruction ins = id->idIns(); emitAttr size = id->idOpSize(); ssize_t val = emitGetInsSC(id); @@ -10286,27 +10200,29 @@ BYTE* emitter::emitOutputLJ(BYTE* dst, instrDesc* i) } else { - size_t code; + code_t code; // Long jump if (jmp) { + // clang-format off assert(INS_jmp + (INS_l_jmp - INS_jmp) == INS_l_jmp); - assert(INS_jo + (INS_l_jmp - INS_jmp) == INS_l_jo); - assert(INS_jb + (INS_l_jmp - INS_jmp) == INS_l_jb); + assert(INS_jo + (INS_l_jmp - INS_jmp) == INS_l_jo); + assert(INS_jb + (INS_l_jmp - INS_jmp) == INS_l_jb); assert(INS_jae + (INS_l_jmp - INS_jmp) == INS_l_jae); - assert(INS_je + (INS_l_jmp - INS_jmp) == INS_l_je); + assert(INS_je + (INS_l_jmp - INS_jmp) == INS_l_je); assert(INS_jne + (INS_l_jmp - INS_jmp) == INS_l_jne); assert(INS_jbe + (INS_l_jmp - INS_jmp) == INS_l_jbe); - assert(INS_ja + (INS_l_jmp - INS_jmp) == INS_l_ja); - assert(INS_js + (INS_l_jmp - INS_jmp) == INS_l_js); + assert(INS_ja + (INS_l_jmp - INS_jmp) == INS_l_ja); + assert(INS_js + (INS_l_jmp - INS_jmp) == INS_l_js); assert(INS_jns + (INS_l_jmp - INS_jmp) == INS_l_jns); assert(INS_jpe + (INS_l_jmp - INS_jmp) == INS_l_jpe); assert(INS_jpo + (INS_l_jmp - INS_jmp) == INS_l_jpo); - assert(INS_jl + (INS_l_jmp - INS_jmp) == INS_l_jl); + assert(INS_jl + (INS_l_jmp - INS_jmp) == INS_l_jl); assert(INS_jge + (INS_l_jmp - INS_jmp) == INS_l_jge); assert(INS_jle + (INS_l_jmp - INS_jmp) == INS_l_jle); - assert(INS_jg + (INS_l_jmp - INS_jmp) == INS_l_jg); + assert(INS_jg + (INS_l_jmp - INS_jmp) == INS_l_jg); + // clang-format on code = insCode((instruction)(ins + (INS_l_jmp - INS_jmp))); } @@ -10452,10 +10368,10 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) // What instruction format have we got? switch (id->idInsFmt()) { - size_t code; - size_t regcode; - int args; - CnsVal cnsVal; + code_t code; + unsigned regcode; + int args; + CnsVal cnsVal; BYTE* addr; bool recCall; @@ -10792,6 +10708,10 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutputWord(dst, code); dst += emitOutputByte(dst, emitGetInsSC(id)); sz = emitSizeOfInsDsc(id); + + // Update GC info. + assert(!id->idGCref()); + emitGCregDeadUpd(id->idReg1(), dst); break; case IF_RRD_RRD: @@ -10871,7 +10791,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); - if (UseAVX() && Is4ByteAVXInstruction(ins)) + if (Is4ByteAVXInstruction(ins)) { // We just need to output the last byte of the opcode. assert((code & 0xFF) == 0); @@ -10883,6 +10803,12 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) { dst += emitOutputWord(dst, code >> 16); code &= 0x0000FFFF; + + if (Is4ByteSSE4Instruction(ins)) + { + dst += emitOutputWord(dst, code); + code = 0; + } } else if (code & 0x00FF0000) { @@ -10898,9 +10824,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) } else { - // This case occurs for AVX instructions. + // This case occurs for SSE4/AVX instructions. // Note that regcode is left shifted by 8-bits. - assert(Is4ByteAVXInstruction(ins)); + assert(Is4ByteAVXInstruction(ins) || Is4ByteSSE4Instruction(ins)); dst += emitOutputByte(dst, 0xC0 | (regcode >> 8)); } diff --git a/src/jit/emitxarch.h b/src/jit/emitxarch.h index dfd7e6ec50..98256cdaa7 100644 --- a/src/jit/emitxarch.h +++ b/src/jit/emitxarch.h @@ -28,6 +28,15 @@ inline static bool isDoubleReg(regNumber reg) /* Routines that compute the size of / encode instructions */ /************************************************************************/ +// code_t is a type used to accumulate bits of opcode + prefixes. On amd64, it must be 64 bits +// to support the REX prefixes. On both x86 and amd64, it must be 64 bits to support AVX, with +// its 3-byte VEX prefix. For legacy backend (which doesn't support AVX), leave it as size_t. +#if defined(LEGACY_BACKEND) +typedef size_t code_t; +#else // !defined(LEGACY_BACKEND) +typedef unsigned __int64 code_t; +#endif // !defined(LEGACY_BACKEND) + struct CnsVal { ssize_t cnsVal; @@ -36,19 +45,19 @@ struct CnsVal #endif }; -UNATIVE_OFFSET emitInsSize(size_t code); +UNATIVE_OFFSET emitInsSize(code_t code); UNATIVE_OFFSET emitInsSizeRM(instruction ins); -UNATIVE_OFFSET emitInsSizeSV(size_t code, int var, int dsp); +UNATIVE_OFFSET emitInsSizeSV(code_t code, int var, int dsp); UNATIVE_OFFSET emitInsSizeSV(instrDesc* id, int var, int dsp, int val); UNATIVE_OFFSET emitInsSizeRR(instruction ins, regNumber reg1, regNumber reg2, emitAttr attr); -UNATIVE_OFFSET emitInsSizeAM(instrDesc* id, size_t code); -UNATIVE_OFFSET emitInsSizeAM(instrDesc* id, size_t code, int val); -UNATIVE_OFFSET emitInsSizeCV(instrDesc* id, size_t code); -UNATIVE_OFFSET emitInsSizeCV(instrDesc* id, size_t code, int val); +UNATIVE_OFFSET emitInsSizeAM(instrDesc* id, code_t code); +UNATIVE_OFFSET emitInsSizeAM(instrDesc* id, code_t code, int val); +UNATIVE_OFFSET emitInsSizeCV(instrDesc* id, code_t code); +UNATIVE_OFFSET emitInsSizeCV(instrDesc* id, code_t code, int val); -BYTE* emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc = nullptr); -BYTE* emitOutputSV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc = nullptr); -BYTE* emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc = nullptr); +BYTE* emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc = nullptr); +BYTE* emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc = nullptr); +BYTE* emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc = nullptr); BYTE* emitOutputR(BYTE* dst, instrDesc* id); BYTE* emitOutputRI(BYTE* dst, instrDesc* id); @@ -61,42 +70,60 @@ BYTE* emitOutputRRR(BYTE* dst, instrDesc* id); BYTE* emitOutputLJ(BYTE* dst, instrDesc* id); -unsigned emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, size_t& code); +unsigned emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code); unsigned emitGetRexPrefixSize(instruction ins); unsigned emitGetVexPrefixSize(instruction ins, emitAttr attr); -unsigned emitGetPrefixSize(size_t code); -unsigned emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, size_t code); +unsigned emitGetPrefixSize(code_t code); +unsigned emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, code_t code); + +unsigned insEncodeReg012(instruction ins, regNumber reg, emitAttr size, code_t* code); +unsigned insEncodeReg345(instruction ins, regNumber reg, emitAttr size, code_t* code); +code_t insEncodeReg3456(instruction ins, regNumber reg, emitAttr size, code_t code); +unsigned insEncodeRegSIB(instruction ins, regNumber reg, code_t* code); -unsigned insEncodeReg345(instruction ins, regNumber reg, emitAttr size, size_t* code); -unsigned insEncodeReg012(instruction ins, regNumber reg, emitAttr size, size_t* code); -size_t insEncodeReg3456(instruction ins, regNumber reg, emitAttr size, size_t code); -unsigned insEncodeRegSIB(instruction ins, regNumber reg, size_t* code); +code_t insEncodeMRreg(instruction ins, code_t code); +code_t insEncodeRMreg(instruction ins, code_t code); +code_t insEncodeMRreg(instruction ins, regNumber reg, emitAttr size, code_t code); +code_t insEncodeRRIb(instruction ins, regNumber reg, emitAttr size); +code_t insEncodeOpreg(instruction ins, regNumber reg, emitAttr size); -size_t insEncodeMRreg(instruction ins, size_t code); -size_t insEncodeMRreg(instruction ins, regNumber reg, emitAttr size, size_t code); -size_t insEncodeRRIb(instruction ins, regNumber reg, emitAttr size); -size_t insEncodeOpreg(instruction ins, regNumber reg, emitAttr size); +unsigned insSSval(unsigned scale); bool IsAVXInstruction(instruction ins); -size_t insEncodeMIreg(instruction ins, regNumber reg, emitAttr size, size_t code); +code_t insEncodeMIreg(instruction ins, regNumber reg, emitAttr size, code_t code); -size_t AddRexWPrefix(instruction ins, size_t code); -size_t AddRexRPrefix(instruction ins, size_t code); -size_t AddRexXPrefix(instruction ins, size_t code); -size_t AddRexBPrefix(instruction ins, size_t code); -size_t AddRexPrefix(instruction ins, size_t code); +code_t AddRexWPrefix(instruction ins, code_t code); +code_t AddRexRPrefix(instruction ins, code_t code); +code_t AddRexXPrefix(instruction ins, code_t code); +code_t AddRexBPrefix(instruction ins, code_t code); +code_t AddRexPrefix(instruction ins, code_t code); + +bool useSSE3_4Encodings; +bool UseSSE3_4() +{ + return useSSE3_4Encodings; +} +void SetUseSSE3_4(bool value) +{ + useSSE3_4Encodings = value; +} +bool Is4ByteSSE4Instruction(instruction ins); #ifdef FEATURE_AVX_SUPPORT + // 3-byte VEX prefix starts with byte 0xC4 -#define VEX_PREFIX_MASK_3BYTE 0xC4000000000000LL +#define VEX_PREFIX_MASK_3BYTE 0xFF000000000000ULL +#define VEX_PREFIX_CODE_3BYTE 0xC4000000000000ULL + bool TakesVexPrefix(instruction ins); + // Returns true if the instruction encoding already contains VEX prefix -bool hasVexPrefix(size_t code) +bool hasVexPrefix(code_t code) { - return (code & VEX_PREFIX_MASK_3BYTE) != 0; + return (code & VEX_PREFIX_MASK_3BYTE) == VEX_PREFIX_CODE_3BYTE; } -size_t AddVexPrefix(instruction ins, size_t code, emitAttr attr); -size_t AddVexPrefixIfNeeded(instruction ins, size_t code, emitAttr size) +code_t AddVexPrefix(instruction ins, code_t code, emitAttr attr); +code_t AddVexPrefixIfNeeded(instruction ins, code_t code, emitAttr size) { if (TakesVexPrefix(ins)) { @@ -104,7 +131,7 @@ size_t AddVexPrefixIfNeeded(instruction ins, size_t code, emitAttr size) } return code; } -size_t AddVexPrefixIfNeededAndNotPresent(instruction ins, size_t code, emitAttr size) +code_t AddVexPrefixIfNeededAndNotPresent(instruction ins, code_t code, emitAttr size) { if (TakesVexPrefix(ins) && !hasVexPrefix(code)) { @@ -112,6 +139,7 @@ size_t AddVexPrefixIfNeededAndNotPresent(instruction ins, size_t code, emitAttr } return code; } + bool useAVXEncodings; bool UseAVX() { @@ -121,18 +149,20 @@ void SetUseAVX(bool value) { useAVXEncodings = value; } + bool IsThreeOperandBinaryAVXInstruction(instruction ins); bool IsThreeOperandMoveAVXInstruction(instruction ins); bool IsThreeOperandAVXInstruction(instruction ins) { return (IsThreeOperandBinaryAVXInstruction(ins) || IsThreeOperandMoveAVXInstruction(ins)); } +bool Is4ByteAVXInstruction(instruction ins); #else // !FEATURE_AVX_SUPPORT -bool UseAVX() +bool UseAVX() { return false; } -bool hasVexPrefix(size_t code) +bool hasVexPrefix(code_t code) { return false; } @@ -148,15 +178,19 @@ bool IsThreeOperandAVXInstruction(instruction ins) { return false; } +bool Is4ByteAVXInstruction(instruction ins) +{ + return false; +} bool TakesVexPrefix(instruction ins) { return false; } -size_t AddVexPrefixIfNeeded(instruction ins, size_t code, emitAttr attr) +code_t AddVexPrefixIfNeeded(instruction ins, code_t code, emitAttr attr) { return code; } -size_t AddVexPrefixIfNeededAndNotPresent(instruction ins, size_t code, emitAttr size) +code_t AddVexPrefixIfNeededAndNotPresent(instruction ins, code_t code, emitAttr size) { return code; } @@ -226,6 +260,18 @@ bool emitVerifyEncodable(instruction ins, emitAttr size, regNumber reg1, regNumb bool emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id); +#if FEATURE_FIXED_OUT_ARGS +void emitAdjustStackDepthPushPop(instruction ins) +{ +} +void emitAdjustStackDepth(instruction ins, ssize_t val) +{ +} +#else // !FEATURE_FIXED_OUT_ARGS +void emitAdjustStackDepthPushPop(instruction ins); +void emitAdjustStackDepth(instruction ins, ssize_t val); +#endif // !FEATURE_FIXED_OUT_ARGS + /***************************************************************************** * * Convert between an index scale in bytes to a smaller encoding used for diff --git a/src/jit/error.cpp b/src/jit/error.cpp index 71c3301045..f42dcef5c6 100644 --- a/src/jit/error.cpp +++ b/src/jit/error.cpp @@ -129,7 +129,7 @@ void noWayAssertBodyConditional( } } -#if !defined(_TARGET_X86_) || !defined(LEGACY_BACKEND) +#if defined(ALT_JIT) && (!defined(_TARGET_X86_) || !defined(LEGACY_BACKEND)) /*****************************************************************************/ void notYetImplemented(const char* msg, const char* filename, unsigned line) @@ -193,7 +193,7 @@ void notYetImplemented(const char* msg, const char* filename, unsigned line) } } -#endif // #if !defined(_TARGET_X86_) || !defined(LEGACY_BACKEND) +#endif // #if defined(ALT_JIT) && (!defined(_TARGET_X86_) || !defined(LEGACY_BACKEND)) /*****************************************************************************/ LONG __JITfilter(PEXCEPTION_POINTERS pExceptionPointers, LPVOID lpvParam) diff --git a/src/jit/error.h b/src/jit/error.h index c56971aaf7..0535601055 100644 --- a/src/jit/error.h +++ b/src/jit/error.h @@ -58,10 +58,11 @@ extern LONG __JITfilter(PEXCEPTION_POINTERS pExceptionPointers, LPVOID lpvParam) /*****************************************************************************/ +// clang-format off + extern void debugError(const char* msg, const char* file, unsigned line); extern void DECLSPEC_NORETURN badCode(); -extern void DECLSPEC_NORETURN -badCode3(const char* msg, const char* msg2, int arg, __in_z const char* file, unsigned line); +extern void DECLSPEC_NORETURN badCode3(const char* msg, const char* msg2, int arg, __in_z const char* file, unsigned line); extern void DECLSPEC_NORETURN noWay(); extern void DECLSPEC_NORETURN NOMEM(); extern void DECLSPEC_NORETURN fatal(int errCode); @@ -79,120 +80,6 @@ extern void noWayAssertBodyConditional( ); extern void noWayAssertBodyConditional(const char* cond, const char* file, unsigned line); -#if !defined(_TARGET_X86_) || !defined(LEGACY_BACKEND) - -// This guy can return based on Config flag/Debugger -extern void notYetImplemented(const char* msg, const char* file, unsigned line); -#define NYI(msg) notYetImplemented("NYI: " #msg, __FILE__, __LINE__) -#define NYI_IF(cond, msg) \ - if (cond) \ - notYetImplemented("NYI: " #msg, __FILE__, __LINE__) - -#ifdef _TARGET_AMD64_ - -#define NYI_AMD64(msg) notYetImplemented("NYI_AMD64: " #msg, __FILE__, __LINE__) -#define NYI_X86(msg) \ - do \ - { \ - } while (0) -#define NYI_ARM(msg) \ - do \ - { \ - } while (0) -#define NYI_ARM64(msg) \ - do \ - { \ - } while (0) - -#elif defined(_TARGET_X86_) - -#define NYI_AMD64(msg) \ - do \ - { \ - } while (0) -#define NYI_X86(msg) notYetImplemented("NYI_X86: " #msg, __FILE__, __LINE__) -#define NYI_ARM(msg) \ - do \ - { \ - } while (0) -#define NYI_ARM64(msg) \ - do \ - { \ - } while (0) - -#elif defined(_TARGET_ARM_) - -#define NYI_AMD64(msg) \ - do \ - { \ - } while (0) -#define NYI_X86(msg) \ - do \ - { \ - } while (0) -#define NYI_ARM(msg) notYetImplemented("NYI_ARM: " #msg, __FILE__, __LINE__) -#define NYI_ARM64(msg) \ - do \ - { \ - } while (0) - -#elif defined(_TARGET_ARM64_) - -#define NYI_AMD64(msg) \ - do \ - { \ - } while (0) -#define NYI_X86(msg) \ - do \ - { \ - } while (0) -#define NYI_ARM(msg) \ - do \ - { \ - } while (0) -#define NYI_ARM64(msg) notYetImplemented("NYI_ARM64: " #msg, __FILE__, __LINE__) - -#else - -#error "Unknown platform, not x86, ARM, or AMD64?" - -#endif - -#else // defined(_TARGET_X86_) && defined(LEGACY_BACKEND) - -#define NYI(msg) assert(!msg) -#define NYI_AMD64(msg) \ - do \ - { \ - } while (0) -#define NYI_ARM(msg) \ - do \ - { \ - } while (0) -#define NYI_ARM64(msg) \ - do \ - { \ - } while (0) - -#endif // _TARGET_X86_ - -#if !defined(_TARGET_X86_) && !defined(FEATURE_STACK_FP_X87) -#define NYI_FLAT_FP_X87(msg) notYetImplemented("NYI: " #msg, __FILE__, __LINE__) -#define NYI_FLAT_FP_X87_NC(msg) notYetImplemented("NYI: " #msg, __FILE__, __LINE__) - -#else - -#define NYI_FLAT_FP_X87(msg) \ - do \ - { \ - } while (0) -#define NYI_FLAT_FP_X87_NC(msg) \ - do \ - { \ - } while (0) - -#endif // !_TARGET_X86_ && !FEATURE_STACK_FP_X87 - #ifdef DEBUG #define NO_WAY(msg) (debugError(msg, __FILE__, __LINE__), noWay()) // Used for fallback stress mode @@ -210,6 +97,8 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line); } while (0) #define unreached() noWayAssertBody("unreached", __FILE__, __LINE__) +#define NOWAY_MSG(msg) noWayAssertBodyConditional(msg, __FILE__, __LINE__) + #else #define NO_WAY(msg) noWay() @@ -232,6 +121,8 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line); } while (0) #define unreached() noWayAssertBody() +#define NOWAY_MSG(msg) noWayAssertBodyConditional(NOWAY_ASSERT_BODY_ARGUMENTS) + #endif // IMPL_LIMITATION is called when we encounter valid IL that is not @@ -239,7 +130,81 @@ extern void notYetImplemented(const char* msg, const char* file, unsigned line); // limitations (that could be removed in the future) #define IMPL_LIMITATION(msg) NO_WAY(msg) -#if defined(_HOST_X86_) +#if !defined(_TARGET_X86_) || !defined(LEGACY_BACKEND) + +#if defined(ALT_JIT) + +// This guy can return based on Config flag/Debugger +extern void notYetImplemented(const char* msg, const char* file, unsigned line); +#define NYIRAW(msg) notYetImplemented(msg, __FILE__, __LINE__) + +#else // !defined(ALT_JIT) + +#define NYIRAW(msg) NOWAY_MSG(msg) + +#endif // !defined(ALT_JIT) + +#define NYI(msg) NYIRAW("NYI: " msg) +#define NYI_IF(cond, msg) if (cond) NYIRAW("NYI: " msg) + +#ifdef _TARGET_AMD64_ + +#define NYI_AMD64(msg) NYIRAW("NYI_AMD64: " msg) +#define NYI_X86(msg) do { } while (0) +#define NYI_ARM(msg) do { } while (0) +#define NYI_ARM64(msg) do { } while (0) + +#elif defined(_TARGET_X86_) + +#define NYI_AMD64(msg) do { } while (0) +#define NYI_X86(msg) NYIRAW("NYI_X86: " msg) +#define NYI_ARM(msg) do { } while (0) +#define NYI_ARM64(msg) do { } while (0) + +#elif defined(_TARGET_ARM_) + +#define NYI_AMD64(msg) do { } while (0) +#define NYI_X86(msg) do { } while (0) +#define NYI_ARM(msg) NYIRAW("NYI_ARM: " msg) +#define NYI_ARM64(msg) do { } while (0) + +#elif defined(_TARGET_ARM64_) + +#define NYI_AMD64(msg) do { } while (0) +#define NYI_X86(msg) do { } while (0) +#define NYI_ARM(msg) do { } while (0) +#define NYI_ARM64(msg) NYIRAW("NYI_ARM64: " msg) + +#else + +#error "Unknown platform, not x86, ARM, or AMD64?" + +#endif + +#else // NYI not available; make it an assert. + +#define NYI(msg) assert(!msg) +#define NYI_AMD64(msg) do { } while (0) +#define NYI_ARM(msg) do { } while (0) +#define NYI_ARM64(msg) do { } while (0) + +#endif // NYI not available + +#if !defined(_TARGET_X86_) && !defined(FEATURE_STACK_FP_X87) + +#define NYI_FLAT_FP_X87(msg) NYI(msg) +#define NYI_FLAT_FP_X87_NC(msg) NYI(msg) + +#else + +#define NYI_FLAT_FP_X87(msg) do { } while (0) +#define NYI_FLAT_FP_X87_NC(msg) do { } while (0) + +#endif // !_TARGET_X86_ && !FEATURE_STACK_FP_X87 + +// clang-format on + +#if defined(_HOST_X86_) && !defined(FEATURE_PAL) // While debugging in an Debugger, the "int 3" will cause the program to break // Outside, the exception handler will just filter out the "int 3". diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp index 1c68bfd96a..441569c339 100644 --- a/src/jit/flowgraph.cpp +++ b/src/jit/flowgraph.cpp @@ -2985,8 +2985,8 @@ void Compiler::fgRemovePreds() { C_ASSERT(offsetof(BasicBlock, bbPreds) == offsetof(BasicBlock, bbCheapPreds)); // bbPreds and bbCheapPreds are at the same place in a union, - C_ASSERT(sizeof(((BasicBlock*)0)->bbPreds) == - sizeof(((BasicBlock*)0)->bbCheapPreds)); // and are the same size. So, this function removes both. + C_ASSERT(sizeof(((BasicBlock*)nullptr)->bbPreds) == + sizeof(((BasicBlock*)nullptr)->bbCheapPreds)); // and are the same size. So, this function removes both. for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) { @@ -3890,8 +3890,7 @@ bool Compiler::fgCreateGCPoll(GCPollType pollType, BasicBlock* block) BBjumpKinds oldJumpKind = top->bbJumpKind; // Update block flags - unsigned originalFlags; - originalFlags = top->bbFlags | BBF_GC_SAFE_POINT; + const unsigned __int64 originalFlags = top->bbFlags | BBF_GC_SAFE_POINT; // Unlike Fei's inliner from puclr, I'm allowed to split loops. // And we keep a few other flags... @@ -4269,6 +4268,7 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* const bool isForceInline = (info.compFlags & CORINFO_FLG_FORCEINLINE) != 0; const bool makeInlineObservations = (compInlineResult != nullptr); const bool isInlining = compIsForInlining(); + unsigned retBlocks = 0; if (makeInlineObservations) { @@ -4638,6 +4638,7 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* break; case CEE_JMP: + retBlocks++; #if !defined(_TARGET_X86_) && !defined(_TARGET_ARM_) if (!isInlining) @@ -4730,6 +4731,8 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* fgObserveInlineConstants(opcode, pushedStack, isInlining); } break; + case CEE_RET: + retBlocks++; default: break; @@ -4758,6 +4761,27 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* { compInlineResult->Note(InlineObservation::CALLEE_END_OPCODE_SCAN); + if (!compInlineResult->UsesLegacyPolicy()) + { + // If there are no return blocks we know it does not return, however if there + // return blocks we don't know it returns as it may be counting unreachable code. + // However we will still make the CALLEE_DOES_NOT_RETURN observation. + + compInlineResult->NoteBool(InlineObservation::CALLEE_DOES_NOT_RETURN, retBlocks == 0); + + if (retBlocks == 0 && isInlining) + { + // Mark the call node as "no return" as it can impact caller's code quality. + impInlineInfo->iciCall->gtCallMoreFlags |= GTF_CALL_M_DOES_NOT_RETURN; + } + } + + // Determine if call site is within a try. + if (isInlining && impInlineInfo->iciBlock->hasTryIndex()) + { + compInlineResult->Note(InlineObservation::CALLSITE_IN_TRY_REGION); + } + // If the inline is viable and discretionary, do the // profitability screening. if (compInlineResult->IsDiscretionaryCandidate()) @@ -5062,22 +5086,23 @@ void Compiler::fgLinkBasicBlocks() /***************************************************************************** * - * Walk the instrs to create the basic blocks. + * Walk the instrs to create the basic blocks. Returns the number of BBJ_RETURN in method */ -void Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* jumpTarget) +unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* jumpTarget) { + unsigned retBlocks; const BYTE* codeBegp = codeAddr; const BYTE* codeEndp = codeAddr + codeSize; bool tailCall = false; unsigned curBBoffs; BasicBlock* curBBdesc; + retBlocks = 0; /* Clear the beginning offset for the first BB */ curBBoffs = 0; -#ifdef DEBUGGING_SUPPORT if (opts.compDbgCode && (info.compVarScopesCount > 0)) { compResetScopeLists(); @@ -5090,7 +5115,6 @@ void Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* { /* do nothing */ } } -#endif BBjumpKinds jmpKind; @@ -5280,7 +5304,8 @@ void Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* // TODO-CQ: We can inline some callees with explicit tail calls if we can guarantee that the calls // can be dispatched as tail calls from the caller. compInlineResult->NoteFatal(InlineObservation::CALLEE_EXPLICIT_TAIL_PREFIX); - return; + retBlocks++; + return retBlocks; } __fallthrough; @@ -5391,6 +5416,7 @@ void Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* But instead of directly returning to the caller we jump and execute something else in between */ case CEE_RET: + retBlocks++; jmpKind = BBJ_RETURN; break; @@ -5473,8 +5499,6 @@ void Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* nxtBBoffs = (IL_OFFSET)(codeAddr - codeBegp); -#ifdef DEBUGGING_SUPPORT - bool foundScope = false; if (opts.compDbgCode && (info.compVarScopesCount > 0)) @@ -5488,7 +5512,6 @@ void Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* foundScope = true; } } -#endif /* Do we have a jump? */ @@ -5505,7 +5528,6 @@ void Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* bool makeBlock = (jumpTarget[nxtBBoffs] != JT_NONE); -#ifdef DEBUGGING_SUPPORT if (!makeBlock && foundScope) { makeBlock = true; @@ -5516,7 +5538,6 @@ void Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* } #endif // DEBUG } -#endif // DEBUGGING_SUPPORT if (!makeBlock) { @@ -5581,6 +5602,8 @@ void Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* /* Finally link up the bbJumpDest of the blocks together */ fgLinkBasicBlocks(); + + return retBlocks; } /***************************************************************************** @@ -5726,44 +5749,23 @@ void Compiler::fgFindBasicBlocks() /* Now create the basic blocks */ - fgMakeBasicBlocks(info.compCode, info.compILCodeSize, jumpTarget); + unsigned retBlocks = fgMakeBasicBlocks(info.compCode, info.compILCodeSize, jumpTarget); if (compIsForInlining()) { - if (compInlineResult->IsFailure()) - { - return; - } - - bool hasReturnBlocks = false; - bool hasMoreThanOneReturnBlock = false; - for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) - { - if (block->bbJumpKind == BBJ_RETURN) - { - if (hasReturnBlocks) - { - hasMoreThanOneReturnBlock = true; - break; - } - - hasReturnBlocks = true; - } - } - - if (!hasReturnBlocks && !compInlineResult->UsesLegacyPolicy()) +#ifdef DEBUG + // If fgFindJumpTargets marked the call as "no return" there + // really should be no BBJ_RETURN blocks in the method. + // + // Note LegacyPolicy does not mark calls as no return, so if + // it's active, skip the check. + if (!compInlineResult->UsesLegacyPolicy()) { - // - // Mark the call node as "no return". The inliner might ignore CALLEE_DOES_NOT_RETURN and - // fail inline for a different reasons. In that case we still want to make the "no return" - // information available to the caller as it can impact caller's code quality. - // - - impInlineInfo->iciCall->gtCallMoreFlags |= GTF_CALL_M_DOES_NOT_RETURN; + bool markedNoReturn = (impInlineInfo->iciCall->gtCallMoreFlags & GTF_CALL_M_DOES_NOT_RETURN) != 0; + assert((markedNoReturn && (retBlocks == 0)) || (!markedNoReturn && (retBlocks >= 1))); } - - compInlineResult->NoteBool(InlineObservation::CALLEE_DOES_NOT_RETURN, !hasReturnBlocks); +#endif // DEBUG if (compInlineResult->IsFailure()) { @@ -5777,12 +5779,14 @@ void Compiler::fgFindBasicBlocks() compHndBBtabCount = impInlineInfo->InlinerCompiler->compHndBBtabCount; info.compXcptnsCount = impInlineInfo->InlinerCompiler->info.compXcptnsCount; - if (info.compRetNativeType != TYP_VOID && hasMoreThanOneReturnBlock) + // Use a spill temp for the return value if there are multiple return blocks. + if ((info.compRetNativeType != TYP_VOID) && (retBlocks > 1)) { // The lifetime of this var might expand multiple BBs. So it is a long lifetime compiler temp. lvaInlineeReturnSpillTemp = lvaGrabTemp(false DEBUGARG("Inline candidate multiple BBJ_RETURN spill temp")); lvaTable[lvaInlineeReturnSpillTemp].lvType = info.compRetNativeType; } + return; } @@ -6666,7 +6670,7 @@ void Compiler::fgImport() impImport(fgFirstBB); - if (!(opts.eeFlags & CORJIT_FLG_SKIP_VERIFICATION)) + if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_SKIP_VERIFICATION)) { CorInfoMethodRuntimeFlags verFlag; verFlag = tiIsVerifiableCode ? CORINFO_FLG_VERIFIABLE : CORINFO_FLG_UNVERIFIABLE; @@ -6936,7 +6940,7 @@ GenTreePtr Compiler::fgGetSharedCCtor(CORINFO_CLASS_HANDLE cls) if (opts.IsReadyToRun()) { CORINFO_RESOLVED_TOKEN resolvedToken; - ZeroMemory(&resolvedToken, sizeof(resolvedToken)); + memset(&resolvedToken, 0, sizeof(resolvedToken)); resolvedToken.hClass = cls; return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF); @@ -8248,8 +8252,8 @@ void Compiler::fgAddInternal() if (!varTypeIsFloating(info.compRetType)) { lvaTable[genReturnLocal].setPrefReg(REG_INTRET, this); -#ifdef REG_FLOATRET } +#ifdef REG_FLOATRET else { lvaTable[genReturnLocal].setPrefReg(REG_FLOATRET, this); @@ -8301,7 +8305,7 @@ void Compiler::fgAddInternal() CORINFO_JUST_MY_CODE_HANDLE* pDbgHandle = nullptr; CORINFO_JUST_MY_CODE_HANDLE dbgHandle = nullptr; - if (opts.compDbgCode && !(opts.eeFlags & CORJIT_FLG_IL_STUB)) + if (opts.compDbgCode && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB)) { dbgHandle = info.compCompHnd->getJustMyCodeHandle(info.compMethodHnd, &pDbgHandle); } @@ -8589,17 +8593,12 @@ GenTreeStmt* Compiler::fgNewStmtFromTree(GenTreePtr tree, IL_OFFSETX offs) // The first good IL offset of a statement in the block, or BAD_IL_OFFSET if such an IL offset // cannot be found. // -// If we are not built with DEBUGGING_SUPPORT or DEBUG, then always report BAD_IL_OFFSET, -// since in that case statements don't contain an IL offset. The effect will be that split -// blocks will lose their IL offset information. - IL_OFFSET Compiler::fgFindBlockILOffset(BasicBlock* block) { // This function searches for IL offsets in statement nodes, so it can't be used in LIR. We // could have a similar function for LIR that searches for GT_IL_OFFSET nodes. assert(!block->IsLIR()); -#if defined(DEBUGGING_SUPPORT) || defined(DEBUG) for (GenTree* stmt = block->bbTreeList; stmt != nullptr; stmt = stmt->gtNext) { assert(stmt->IsStatement()); @@ -8608,7 +8607,6 @@ IL_OFFSET Compiler::fgFindBlockILOffset(BasicBlock* block) return jitGetILoffs(stmt->gtStmt.gtStmtILoffsx); } } -#endif // defined(DEBUGGING_SUPPORT) || defined(DEBUG) return BAD_IL_OFFSET; } @@ -8949,10 +8947,10 @@ void Compiler::fgSimpleLowering() for (GenTreePtr tree = stmt->gtStmtList; tree; tree = tree->gtNext) { #else - LIR::Range& range = LIR::AsRange(block); - for (GenTree* tree : range) + LIR::Range& range = LIR::AsRange(block); + for (GenTree* tree : range) + { { - { #endif if (tree->gtOper == GT_ARR_LENGTH) { @@ -9000,7 +8998,7 @@ void Compiler::fgSimpleLowering() add->gtNext = tree; tree->gtPrev = add; #else - range.InsertAfter(arr, con, add); + range.InsertAfter(arr, con, add); #endif } @@ -9339,6 +9337,7 @@ inline bool OperIsControlFlow(genTreeOps oper) switch (oper) { case GT_JTRUE: + case GT_JCC: case GT_SWITCH: case GT_LABEL: @@ -10019,10 +10018,10 @@ void Compiler::fgUnreachableBlock(BasicBlock* block) /***************************************************************************************************** * - * Function called to remove or morph a GT_JTRUE statement when we jump to the same + * Function called to remove or morph a jump when we jump to the same * block when both the condition is true or false. */ -void Compiler::fgRemoveJTrue(BasicBlock* block) +void Compiler::fgRemoveConditionalJump(BasicBlock* block) { noway_assert(block->bbJumpKind == BBJ_COND && block->bbJumpDest == block->bbNext); assert(compRationalIRForm == block->IsLIR()); @@ -10053,7 +10052,7 @@ void Compiler::fgRemoveJTrue(BasicBlock* block) LIR::Range& blockRange = LIR::AsRange(block); GenTree* test = blockRange.LastNode(); - assert(test->OperGet() == GT_JTRUE); + assert(test->OperIsConditionalJump()); bool isClosed; unsigned sideEffects; @@ -10109,7 +10108,7 @@ void Compiler::fgRemoveJTrue(BasicBlock* block) { test->gtStmtExpr = sideEffList; - fgMorphBlockStmt(block, test DEBUGARG("fgRemoveJTrue")); + fgMorphBlockStmt(block, test DEBUGARG("fgRemoveConditionalJump")); } } } @@ -10545,7 +10544,7 @@ void Compiler::fgRemoveBlock(BasicBlock* block, bool unreachable) // Make sure we are replacing "block" with "succBlock" in predBlock->bbJumpDest. noway_assert(predBlock->bbJumpDest == block); predBlock->bbJumpDest = succBlock; - fgRemoveJTrue(predBlock); + fgRemoveConditionalJump(predBlock); break; } @@ -10605,7 +10604,7 @@ void Compiler::fgRemoveBlock(BasicBlock* block, bool unreachable) /* Check for branch to next block */ if (bPrev->bbJumpDest == bPrev->bbNext) { - fgRemoveJTrue(bPrev); + fgRemoveConditionalJump(bPrev); } break; @@ -11031,10 +11030,10 @@ bool Compiler::fgExpandRarelyRunBlocks() noway_assert(tmpbb->isBBCallAlwaysPair()); bPrevPrev = tmpbb; #else - if (tmpbb->bbJumpKind == BBJ_CALLFINALLY) - { - bPrevPrev = tmpbb; - } + if (tmpbb->bbJumpKind == BBJ_CALLFINALLY) + { + bPrevPrev = tmpbb; + } #endif } @@ -11566,60 +11565,60 @@ BasicBlock* Compiler::fgRelocateEHRange(unsigned regionIndex, FG_RELOCATE_TYPE r #else // FEATURE_EH_FUNCLETS - for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++) + for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++) + { + if (XTnum == regionIndex) { - if (XTnum == regionIndex) - { - // Don't update our handler's Last info - continue; - } + // Don't update our handler's Last info + continue; + } - if (HBtab->ebdTryLast == bLast) + if (HBtab->ebdTryLast == bLast) + { + // If we moved a set of blocks that were at the end of + // a different try region then we may need to update ebdTryLast + for (block = HBtab->ebdTryBeg; block != NULL; block = block->bbNext) { - // If we moved a set of blocks that were at the end of - // a different try region then we may need to update ebdTryLast - for (block = HBtab->ebdTryBeg; block != NULL; block = block->bbNext) + if (block == bPrev) { - if (block == bPrev) - { - fgSetTryEnd(HBtab, bPrev); - break; - } - else if (block == HBtab->ebdTryLast->bbNext) - { - // bPrev does not come after the TryBeg - break; - } + fgSetTryEnd(HBtab, bPrev); + break; + } + else if (block == HBtab->ebdTryLast->bbNext) + { + // bPrev does not come after the TryBeg + break; } } - if (HBtab->ebdHndLast == bLast) + } + if (HBtab->ebdHndLast == bLast) + { + // If we moved a set of blocks that were at the end of + // a different handler region then we must update ebdHndLast + for (block = HBtab->ebdHndBeg; block != NULL; block = block->bbNext) { - // If we moved a set of blocks that were at the end of - // a different handler region then we must update ebdHndLast - for (block = HBtab->ebdHndBeg; block != NULL; block = block->bbNext) + if (block == bPrev) { - if (block == bPrev) - { - fgSetHndEnd(HBtab, bPrev); - break; - } - else if (block == HBtab->ebdHndLast->bbNext) - { - // bPrev does not come after the HndBeg - break; - } + fgSetHndEnd(HBtab, bPrev); + break; + } + else if (block == HBtab->ebdHndLast->bbNext) + { + // bPrev does not come after the HndBeg + break; } } - } // end exception table iteration + } + } // end exception table iteration - // We have decided to insert the block(s) after fgLastBlock - fgMoveBlocksAfter(bStart, bLast, insertAfterBlk); + // We have decided to insert the block(s) after fgLastBlock + fgMoveBlocksAfter(bStart, bLast, insertAfterBlk); - // If bPrev falls through, we will insert a jump to block - fgConnectFallThrough(bPrev, bStart); + // If bPrev falls through, we will insert a jump to block + fgConnectFallThrough(bPrev, bStart); - // If bLast falls through, we will insert a jump to bNext - fgConnectFallThrough(bLast, bNext); + // If bLast falls through, we will insert a jump to bNext + fgConnectFallThrough(bLast, bNext); #endif // FEATURE_EH_FUNCLETS @@ -12060,70 +12059,70 @@ void Compiler::fgCreateFunclets() #else // !FEATURE_EH_FUNCLETS - /***************************************************************************** - * - * Function called to relocate any and all EH regions. - * Only entire consecutive EH regions will be moved and they will be kept together. - * Except for the first block, the range can not have any blocks that jump into or out of the region. - */ +/***************************************************************************** + * + * Function called to relocate any and all EH regions. + * Only entire consecutive EH regions will be moved and they will be kept together. + * Except for the first block, the range can not have any blocks that jump into or out of the region. + */ - bool Compiler::fgRelocateEHRegions() - { - bool result = false; // Our return value +bool Compiler::fgRelocateEHRegions() +{ + bool result = false; // Our return value #ifdef DEBUG - if (verbose) - printf("*************** In fgRelocateEHRegions()\n"); + if (verbose) + printf("*************** In fgRelocateEHRegions()\n"); #endif - if (fgCanRelocateEHRegions) - { - unsigned XTnum; - EHblkDsc* HBtab; + if (fgCanRelocateEHRegions) + { + unsigned XTnum; + EHblkDsc* HBtab; - for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++) + for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++) + { + // Nested EH regions cannot be moved. + // Also we don't want to relocate an EH region that has a filter + if ((HBtab->ebdHandlerNestingLevel == 0) && !HBtab->HasFilter()) { - // Nested EH regions cannot be moved. - // Also we don't want to relocate an EH region that has a filter - if ((HBtab->ebdHandlerNestingLevel == 0) && !HBtab->HasFilter()) - { - bool movedTry = false; + bool movedTry = false; #if DEBUG - bool movedHnd = false; + bool movedHnd = false; #endif // DEBUG - // Only try to move the outermost try region - if (HBtab->ebdEnclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX) + // Only try to move the outermost try region + if (HBtab->ebdEnclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX) + { + // Move the entire try region if it can be moved + if (HBtab->ebdTryBeg->isRunRarely()) { - // Move the entire try region if it can be moved - if (HBtab->ebdTryBeg->isRunRarely()) + BasicBlock* bTryLastBB = fgRelocateEHRange(XTnum, FG_RELOCATE_TRY); + if (bTryLastBB != NULL) { - BasicBlock* bTryLastBB = fgRelocateEHRange(XTnum, FG_RELOCATE_TRY); - if (bTryLastBB != NULL) - { - result = true; - movedTry = true; - } + result = true; + movedTry = true; } + } #if DEBUG - if (verbose && movedTry) - { - printf("\nAfter relocating an EH try region"); - fgDispBasicBlocks(); - fgDispHandlerTab(); + if (verbose && movedTry) + { + printf("\nAfter relocating an EH try region"); + fgDispBasicBlocks(); + fgDispHandlerTab(); - // Make sure that the predecessor lists are accurate - if (expensiveDebugCheckLevel >= 2) - { - fgDebugCheckBBlist(); - } + // Make sure that the predecessor lists are accurate + if (expensiveDebugCheckLevel >= 2) + { + fgDebugCheckBBlist(); } -#endif // DEBUG } +#endif // DEBUG + } - // Currently it is not good to move the rarely run handler regions to the end of the method - // because fgDetermineFirstColdBlock() must put the start of any handler region in the hot section. - CLANG_FORMAT_COMMENT_ANCHOR; + // Currently it is not good to move the rarely run handler regions to the end of the method + // because fgDetermineFirstColdBlock() must put the start of any handler region in the hot section. + CLANG_FORMAT_COMMENT_ANCHOR; #if 0 // Now try to move the entire handler region if it can be moved. @@ -12142,38 +12141,38 @@ void Compiler::fgCreateFunclets() #endif // 0 #if DEBUG - if (verbose && movedHnd) - { - printf("\nAfter relocating an EH handler region"); - fgDispBasicBlocks(); - fgDispHandlerTab(); + if (verbose && movedHnd) + { + printf("\nAfter relocating an EH handler region"); + fgDispBasicBlocks(); + fgDispHandlerTab(); - // Make sure that the predecessor lists are accurate - if (expensiveDebugCheckLevel >= 2) - { - fgDebugCheckBBlist(); - } + // Make sure that the predecessor lists are accurate + if (expensiveDebugCheckLevel >= 2) + { + fgDebugCheckBBlist(); } -#endif // DEBUG } +#endif // DEBUG } } + } #if DEBUG - fgVerifyHandlerTab(); + fgVerifyHandlerTab(); - if (verbose && result) - { - printf("\nAfter fgRelocateEHRegions()"); - fgDispBasicBlocks(); - fgDispHandlerTab(); - // Make sure that the predecessor lists are accurate - fgDebugCheckBBlist(); - } + if (verbose && result) + { + printf("\nAfter fgRelocateEHRegions()"); + fgDispBasicBlocks(); + fgDispHandlerTab(); + // Make sure that the predecessor lists are accurate + fgDebugCheckBBlist(); + } #endif // DEBUG - return result; - } + return result; +} #endif // !FEATURE_EH_FUNCLETS @@ -13489,6 +13488,7 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block) GenTree* switchVal = switchTree->gtOp.gtOp1; noway_assert(genActualTypeIsIntOrI(switchVal->TypeGet())); +#ifndef LEGACY_BACKEND // If we are in LIR, remove the jump table from the block. if (block->IsLIR()) { @@ -13496,6 +13496,7 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block) assert(jumpTable->OperGet() == GT_JMPTABLE); blockRange->Remove(jumpTable); } +#endif // Change the GT_SWITCH(switchVal) into GT_JTRUE(GT_EQ(switchVal==0)). // Also mark the node as GTF_DONT_CSE as further down JIT is not capable of handling it. @@ -13793,7 +13794,7 @@ bool Compiler::fgOptimizeBranchToNext(BasicBlock* block, BasicBlock* bNext, Basi { LIR::Range& blockRange = LIR::AsRange(block); GenTree* jmp = blockRange.LastNode(); - assert(jmp->OperGet() == GT_JTRUE); + assert(jmp->OperIsConditionalJump()); bool isClosed; unsigned sideEffects; @@ -14034,7 +14035,7 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) // we are willing to have more code expansion since we // won't be running code from this page // - if (opts.eeFlags & CORJIT_FLG_PREJIT) + if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) { if (rareJump) { @@ -14169,16 +14170,16 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) // gtReverseCond(condTree); + // We need to update the following flags of the bJump block if they were set in the bDest block + bJump->bbFlags |= + (bDest->bbFlags & (BBF_HAS_NEWOBJ | BBF_HAS_NEWARRAY | BBF_HAS_NULLCHECK | BBF_HAS_IDX_LEN | BBF_HAS_VTABREF)); + bJump->bbJumpKind = BBJ_COND; bJump->bbJumpDest = bDest->bbNext; /* Mark the jump dest block as being a jump target */ bJump->bbJumpDest->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL; - // We need to update the following flags of the bJump block if they were set in the bbJumpDest block - bJump->bbFlags |= (bJump->bbJumpDest->bbFlags & - (BBF_HAS_NEWOBJ | BBF_HAS_NEWARRAY | BBF_HAS_NULLCHECK | BBF_HAS_IDX_LEN | BBF_HAS_VTABREF)); - /* Update bbRefs and bbPreds */ // bJump now falls through into the next block @@ -15879,11 +15880,18 @@ bool Compiler::fgUpdateFlowGraph(bool doTailDuplication) /* Reverse the jump condition */ GenTree* test = block->lastNode(); - noway_assert(test->gtOper == GT_JTRUE); + noway_assert(test->OperIsConditionalJump()); - GenTree* cond = gtReverseCond(test->gtOp.gtOp1); - assert(cond == test->gtOp.gtOp1); // Ensure `gtReverseCond` did not create a new node. - test->gtOp.gtOp1 = cond; + if (test->OperGet() == GT_JTRUE) + { + GenTree* cond = gtReverseCond(test->gtOp.gtOp1); + assert(cond == test->gtOp.gtOp1); // Ensure `gtReverseCond` did not create a new node. + test->gtOp.gtOp1 = cond; + } + else + { + gtReverseCond(test); + } // Optimize the Conditional JUMP to go to the new target block->bbJumpDest = bNext->bbJumpDest; @@ -18020,9 +18028,13 @@ void Compiler::fgSetTreeSeqFinish(GenTreePtr tree, bool isLIR) { // If we are sequencing a node that does not appear in LIR, // do not add it to the list. - if (isLIR && (((tree->OperGet() == GT_LIST) && !tree->AsArgList()->IsAggregate()) || tree->OperGet() == GT_ARGPLACE)) + if (isLIR) { - return; + if ((tree->OperGet() == GT_LIST) || (tree->OperGet() == GT_ARGPLACE) || + (tree->OperGet() == GT_FIELD_LIST && !tree->AsFieldList()->IsFieldListHead())) + { + return; + } } /* Append to the node list */ @@ -18359,7 +18371,7 @@ void Compiler::fgSetBlockOrder(BasicBlock* block) // // For the (usual) case of GT_BLK or GT_OBJ, the size is always "evaluated" (i.e. // instantiated into a register) last. In those cases, the GTF_REVERSE_OPS flag -// on the assignment works as usual. +// on the assignment works as usual. // In order to preserve previous possible orderings, the order for evaluating // the size of a GT_DYN_BLK node is controlled by its gtEvalSizeFirst flag. If // that is set, the size is evaluated first, and then the src and dst are evaluated @@ -18549,20 +18561,20 @@ static escapeMapping_t s_EscapeMapping[] = {'"', """}, {0, nullptr} }; -// clang-formt on +// clang-format on -const char* Compiler::fgProcessEscapes(const char* nameIn, escapeMapping_t* map) +const char* Compiler::fgProcessEscapes(const char* nameIn, escapeMapping_t* map) { - const char* nameOut = nameIn; - unsigned lengthOut; - unsigned index; - bool match; - bool subsitutionRequired; - const char* pChar; - - lengthOut = 1; + const char* nameOut = nameIn; + unsigned lengthOut; + unsigned index; + bool match; + bool subsitutionRequired; + const char* pChar; + + lengthOut = 1; subsitutionRequired = false; - pChar = nameIn; + pChar = nameIn; while (*pChar != '\0') { match = false; @@ -18590,8 +18602,8 @@ const char* Compiler::fgProcessEscapes(const char* nameIn, escapeMapping_t* ma if (subsitutionRequired) { - char* newName = (char*) compGetMemA(lengthOut, CMK_DebugOnly); - char* pDest; + char* newName = (char*)compGetMemA(lengthOut, CMK_DebugOnly); + char* pDest; pDest = newName; pChar = nameIn; while (*pChar != '\0') @@ -18619,7 +18631,7 @@ const char* Compiler::fgProcessEscapes(const char* nameIn, escapeMapping_t* ma pChar++; } *pDest++ = '\0'; - nameOut = (const char*) newName; + nameOut = (const char*)newName; } return nameOut; @@ -18655,44 +18667,47 @@ static void fprintfDouble(FILE* fgxFile, double value) // Opens a file to which a flowgraph can be dumped, whose name is based on the current // config vales. -FILE* Compiler::fgOpenFlowGraphFile(bool* wbDontClose, Phases phase, LPCWSTR type) +FILE* Compiler::fgOpenFlowGraphFile(bool* wbDontClose, Phases phase, LPCWSTR type) { - FILE* fgxFile; - LPCWSTR pattern = nullptr; - LPCWSTR filename = nullptr; - LPCWSTR pathname = nullptr; - const char* escapedString; - bool createDuplicateFgxFiles = true; + FILE* fgxFile; + LPCWSTR pattern = nullptr; + LPCWSTR filename = nullptr; + LPCWSTR pathname = nullptr; + const char* escapedString; + bool createDuplicateFgxFiles = true; #ifdef DEBUG - if (opts.eeFlags & CORJIT_FLG_PREJIT) + if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) { - pattern = JitConfig.NgenDumpFg(); + pattern = JitConfig.NgenDumpFg(); filename = JitConfig.NgenDumpFgFile(); pathname = JitConfig.NgenDumpFgDir(); } else { - pattern = JitConfig.JitDumpFg(); + pattern = JitConfig.JitDumpFg(); filename = JitConfig.JitDumpFgFile(); pathname = JitConfig.JitDumpFgDir(); } #endif // DEBUG - if (fgBBcount <= 1) { + if (fgBBcount <= 1) + { return nullptr; -} + } - if (pattern == nullptr) { + if (pattern == nullptr) + { return nullptr; -} + } - if (wcslen(pattern) == 0) { + if (wcslen(pattern) == 0) + { return nullptr; -} + } LPCWSTR phasePattern = JitConfig.JitDumpFgPhase(); - LPCWSTR phaseName = PhaseShortNames[phase]; + LPCWSTR phaseName = PhaseShortNames[phase]; if (phasePattern == nullptr) { if (phase != PHASE_DETERMINE_FIRST_COLD_BLOCK) @@ -18723,9 +18738,10 @@ FILE* Compiler::fgOpenFlowGraphFile(bool* wbDontClose, Phases phas { while ((*pattern != W(':')) && (*pattern != W('*'))) { - if (*pattern != *className) { + if (*pattern != *className) + { return nullptr; -} + } pattern++; className++; @@ -18736,12 +18752,14 @@ FILE* Compiler::fgOpenFlowGraphFile(bool* wbDontClose, Phases phas } else { - if (*className != 0) { + if (*className != 0) + { return nullptr; -} - } + } } - if (*pattern != W(':')) { + } + if (*pattern != W(':')) + { return nullptr; } @@ -18757,9 +18775,10 @@ FILE* Compiler::fgOpenFlowGraphFile(bool* wbDontClose, Phases phas { while ((*pattern != 0) && (*pattern != W('*'))) { - if (*pattern != *methodName) { + if (*pattern != *methodName) + { return nullptr; -} + } pattern++; methodName++; @@ -18770,12 +18789,14 @@ FILE* Compiler::fgOpenFlowGraphFile(bool* wbDontClose, Phases phas } else { - if (*methodName != 0) { + if (*methodName != 0) + { return nullptr; -} - } + } } - if (*pattern != 0) { + } + if (*pattern != 0) + { return nullptr; } } @@ -18838,15 +18859,15 @@ FILE* Compiler::fgOpenFlowGraphFile(bool* wbDontClose, Phases phas { createDuplicateFgxFiles = true; -ONE_FILE_PER_METHOD:; + ONE_FILE_PER_METHOD:; - escapedString = fgProcessEscapes(info.compFullName, s_EscapeFileMapping); + escapedString = fgProcessEscapes(info.compFullName, s_EscapeFileMapping); size_t wCharCount = strlen(escapedString) + wcslen(phaseName) + 1 + strlen("~999") + wcslen(type) + 1; if (pathname != nullptr) { wCharCount += wcslen(pathname) + 1; } - filename = (LPCWSTR) alloca(wCharCount * sizeof(WCHAR)); + filename = (LPCWSTR)alloca(wCharCount * sizeof(WCHAR)); if (pathname != nullptr) { swprintf_s((LPWSTR)filename, wCharCount, W("%s\\%S-%s.%s"), pathname, escapedString, phaseName, type); @@ -18855,7 +18876,7 @@ ONE_FILE_PER_METHOD:; { swprintf_s((LPWSTR)filename, wCharCount, W("%S.%s"), escapedString, type); } - fgxFile = _wfopen(filename, W("r")); // Check if this file already exists + fgxFile = _wfopen(filename, W("r")); // Check if this file already exists if (fgxFile != nullptr) { // For Generic methods we will have both hot and cold versions @@ -18876,10 +18897,11 @@ ONE_FILE_PER_METHOD:; { swprintf_s((LPWSTR)filename, wCharCount, W("%S~%d.%s"), escapedString, i, type); } - fgxFile = _wfopen(filename, W("r")); // Check if this file exists - if (fgxFile == nullptr) { + fgxFile = _wfopen(filename, W("r")); // Check if this file exists + if (fgxFile == nullptr) + { break; - } + } } // If we have already created 1000 files with this name then just fail if (fgxFile != nullptr) @@ -18888,28 +18910,28 @@ ONE_FILE_PER_METHOD:; return nullptr; } } - fgxFile = _wfopen(filename, W("a+")); + fgxFile = _wfopen(filename, W("a+")); *wbDontClose = false; } else if (wcscmp(filename, W("stdout")) == 0) { - fgxFile = jitstdout; + fgxFile = jitstdout; *wbDontClose = true; } else if (wcscmp(filename, W("stderr")) == 0) { - fgxFile = stderr; + fgxFile = stderr; *wbDontClose = true; } else { LPCWSTR origFilename = filename; - size_t wCharCount = wcslen(origFilename) + wcslen(type) + 2; + size_t wCharCount = wcslen(origFilename) + wcslen(type) + 2; if (pathname != nullptr) { wCharCount += wcslen(pathname) + 1; } - filename = (LPCWSTR) alloca(wCharCount * sizeof(WCHAR)); + filename = (LPCWSTR)alloca(wCharCount * sizeof(WCHAR)); if (pathname != nullptr) { swprintf_s((LPWSTR)filename, wCharCount, W("%s\\%s.%s"), pathname, origFilename, type); @@ -18918,7 +18940,7 @@ ONE_FILE_PER_METHOD:; { swprintf_s((LPWSTR)filename, wCharCount, W("%s.%s"), origFilename, type); } - fgxFile = _wfopen(filename, W("a+")); + fgxFile = _wfopen(filename, W("a+")); *wbDontClose = false; } @@ -18959,39 +18981,39 @@ ONE_FILE_PER_METHOD:; // phases. // COMPlus_JitDumpFgDot Set to non-zero to emit Dot instead of Xml Flowgraph dump. (Default is xml format.) -bool Compiler::fgDumpFlowGraph(Phases phase) +bool Compiler::fgDumpFlowGraph(Phases phase) { - bool result = false; - bool dontClose = false; - bool createDotFile = false; + bool result = false; + bool dontClose = false; + bool createDotFile = false; if (JitConfig.JitDumpFgDot()) { createDotFile = true; } - - FILE* fgxFile = fgOpenFlowGraphFile(&dontClose, phase, createDotFile ? W("dot") : W("fgx")); + + FILE* fgxFile = fgOpenFlowGraphFile(&dontClose, phase, createDotFile ? W("dot") : W("fgx")); if (fgxFile == nullptr) { return false; } - bool validWeights = fgHaveValidEdgeWeights; - unsigned calledCount = max(fgCalledWeight, BB_UNITY_WEIGHT) / BB_UNITY_WEIGHT; - double weightDivisor = (double) (calledCount * BB_UNITY_WEIGHT); - const char* escapedString; - const char* regionString = "NONE"; + bool validWeights = fgHaveValidEdgeWeights; + unsigned calledCount = max(fgCalledWeight, BB_UNITY_WEIGHT) / BB_UNITY_WEIGHT; + double weightDivisor = (double)(calledCount * BB_UNITY_WEIGHT); + const char* escapedString; + const char* regionString = "NONE"; - if (info.compMethodInfo->regionKind == CORINFO_REGION_HOT) + if (info.compMethodInfo->regionKind == CORINFO_REGION_HOT) { - regionString="HOT"; + regionString = "HOT"; } else if (info.compMethodInfo->regionKind == CORINFO_REGION_COLD) { - regionString="COLD"; + regionString = "COLD"; } else if (info.compMethodInfo->regionKind == CORINFO_REGION_JIT) { - regionString="JIT"; + regionString = "JIT"; } if (createDotFile) @@ -19001,7 +19023,7 @@ bool Compiler::fgDumpFlowGraph(Phases phase) } else { - fprintf(fgxFile, "<method"); + fprintf(fgxFile, "<method"); escapedString = fgProcessEscapes(info.compFullName, s_EscapeMapping); fprintf(fgxFile, "\n name=\"%s\"", escapedString); @@ -19042,77 +19064,74 @@ bool Compiler::fgDumpFlowGraph(Phases phase) fprintf(fgxFile, "\n firstColdBlock=\"%d\"", fgFirstColdBlock->bbNum); } - fprintf(fgxFile, ">"); + fprintf(fgxFile, ">"); fprintf(fgxFile, "\n <blocks"); fprintf(fgxFile, "\n blockCount=\"%d\"", fgBBcount); - fprintf(fgxFile, ">"); + fprintf(fgxFile, ">"); } - static const char* kindImage[] = { "EHFINALLYRET", "EHFILTERRET", "EHCATCHRET", - "THROW", "RETURN", "NONE", "ALWAYS", "LEAVE", - "CALLFINALLY", "COND", "SWITCH" }; + static const char* kindImage[] = {"EHFINALLYRET", "EHFILTERRET", "EHCATCHRET", "THROW", "RETURN", "NONE", + "ALWAYS", "LEAVE", "CALLFINALLY", "COND", "SWITCH"}; BasicBlock* block; unsigned blockOrdinal; - for (block = fgFirstBB , blockOrdinal = 1; - block != nullptr; - block = block->bbNext, blockOrdinal++) + for (block = fgFirstBB, blockOrdinal = 1; block != nullptr; block = block->bbNext, blockOrdinal++) { if (createDotFile) { // Add constraint edges to try to keep nodes ordered. // It seems to work best if these edges are all created first. - switch(block->bbJumpKind) + switch (block->bbJumpKind) { - case BBJ_COND: - case BBJ_NONE: - assert(block->bbNext != nullptr); - fprintf(fgxFile, " BB%02u -> BB%02u\n", block->bbNum, block->bbNext->bbNum); - break; - default: - // These may or may not have an edge to the next block. - // Add a transparent edge to keep nodes ordered. - if (block->bbNext != nullptr) - { - fprintf(fgxFile, " BB%02u -> BB%02u [arrowtail=none,color=transparent]\n", block->bbNum, block->bbNext->bbNum); - } + case BBJ_COND: + case BBJ_NONE: + assert(block->bbNext != nullptr); + fprintf(fgxFile, " BB%02u -> BB%02u\n", block->bbNum, block->bbNext->bbNum); + break; + default: + // These may or may not have an edge to the next block. + // Add a transparent edge to keep nodes ordered. + if (block->bbNext != nullptr) + { + fprintf(fgxFile, " BB%02u -> BB%02u [arrowtail=none,color=transparent]\n", block->bbNum, + block->bbNext->bbNum); + } } } else { - fprintf(fgxFile,"\n <block"); - fprintf(fgxFile,"\n id=\"%d\"", block->bbNum); - fprintf(fgxFile,"\n ordinal=\"%d\"", blockOrdinal); - fprintf(fgxFile,"\n jumpKind=\"%s\"", kindImage[block->bbJumpKind]); + fprintf(fgxFile, "\n <block"); + fprintf(fgxFile, "\n id=\"%d\"", block->bbNum); + fprintf(fgxFile, "\n ordinal=\"%d\"", blockOrdinal); + fprintf(fgxFile, "\n jumpKind=\"%s\"", kindImage[block->bbJumpKind]); if (block->hasTryIndex()) { - fprintf(fgxFile,"\n inTry=\"%s\"", "true"); + fprintf(fgxFile, "\n inTry=\"%s\"", "true"); } if (block->hasHndIndex()) { - fprintf(fgxFile,"\n inHandler=\"%s\"", "true"); + fprintf(fgxFile, "\n inHandler=\"%s\"", "true"); } - if (((fgFirstBB->bbFlags & BBF_PROF_WEIGHT) != 0) && - ((block->bbFlags & BBF_COLD) == 0) ) + if (((fgFirstBB->bbFlags & BBF_PROF_WEIGHT) != 0) && ((block->bbFlags & BBF_COLD) == 0)) { - fprintf(fgxFile,"\n hot=\"true\""); + fprintf(fgxFile, "\n hot=\"true\""); } if (block->bbFlags & (BBF_HAS_NEWOBJ | BBF_HAS_NEWARRAY)) { - fprintf(fgxFile,"\n callsNew=\"true\""); + fprintf(fgxFile, "\n callsNew=\"true\""); } if (block->bbFlags & BBF_LOOP_HEAD) { - fprintf(fgxFile,"\n loopHead=\"true\""); + fprintf(fgxFile, "\n loopHead=\"true\""); } - fprintf(fgxFile,"\n weight="); - fprintfDouble(fgxFile, ((double) block->bbWeight) / weightDivisor); - fprintf(fgxFile,"\n codeEstimate=\"%d\"", fgGetCodeEstimate(block)); - fprintf(fgxFile,"\n startOffset=\"%d\"", block->bbCodeOffs); - fprintf(fgxFile,"\n endOffset=\"%d\"", block->bbCodeOffsEnd); - fprintf(fgxFile, ">"); - fprintf(fgxFile,"\n </block>"); + fprintf(fgxFile, "\n weight="); + fprintfDouble(fgxFile, ((double)block->bbWeight) / weightDivisor); + fprintf(fgxFile, "\n codeEstimate=\"%d\"", fgGetCodeEstimate(block)); + fprintf(fgxFile, "\n startOffset=\"%d\"", block->bbCodeOffs); + fprintf(fgxFile, "\n endOffset=\"%d\"", block->bbCodeOffsEnd); + fprintf(fgxFile, ">"); + fprintf(fgxFile, "\n </block>"); } } @@ -19122,10 +19141,10 @@ bool Compiler::fgDumpFlowGraph(Phases phase) fprintf(fgxFile, "\n <edges"); fprintf(fgxFile, "\n edgeCount=\"%d\"", fgEdgeCount); - fprintf(fgxFile, ">"); + fprintf(fgxFile, ">"); } - unsigned edgeNum = 1; + unsigned edgeNum = 1; BasicBlock* bTarget; for (bTarget = fgFirstBB; bTarget != nullptr; bTarget = bTarget->bbNext) { @@ -19136,21 +19155,21 @@ bool Compiler::fgDumpFlowGraph(Phases phase) } else { - targetWeightDivisor = (double) bTarget->bbWeight; + targetWeightDivisor = (double)bTarget->bbWeight; } flowList* edge; for (edge = bTarget->bbPreds; edge != nullptr; edge = edge->flNext, edgeNum++) { - BasicBlock* bSource = edge->flBlock; - double sourceWeightDivisor; + BasicBlock* bSource = edge->flBlock; + double sourceWeightDivisor; if (bSource->bbWeight == BB_ZERO_WEIGHT) { sourceWeightDivisor = 1.0; } else { - sourceWeightDivisor = (double) bSource->bbWeight; + sourceWeightDivisor = (double)bSource->bbWeight; } if (createDotFile) { @@ -19172,54 +19191,54 @@ bool Compiler::fgDumpFlowGraph(Phases phase) } else { - fprintf(fgxFile,"\n <edge"); - fprintf(fgxFile,"\n id=\"%d\"", edgeNum); - fprintf(fgxFile,"\n source=\"%d\"", bSource->bbNum); - fprintf(fgxFile,"\n target=\"%d\"", bTarget->bbNum); + fprintf(fgxFile, "\n <edge"); + fprintf(fgxFile, "\n id=\"%d\"", edgeNum); + fprintf(fgxFile, "\n source=\"%d\"", bSource->bbNum); + fprintf(fgxFile, "\n target=\"%d\"", bTarget->bbNum); if (bSource->bbJumpKind == BBJ_SWITCH) { if (edge->flDupCount >= 2) { - fprintf(fgxFile,"\n switchCases=\"%d\"", edge->flDupCount); + fprintf(fgxFile, "\n switchCases=\"%d\"", edge->flDupCount); } if (bSource->bbJumpSwt->getDefault() == bTarget) { - fprintf(fgxFile,"\n switchDefault=\"true\""); + fprintf(fgxFile, "\n switchDefault=\"true\""); } } if (validWeights) { unsigned edgeWeight = (edge->flEdgeWeightMin + edge->flEdgeWeightMax) / 2; - fprintf(fgxFile,"\n weight="); - fprintfDouble(fgxFile, ((double) edgeWeight) / weightDivisor); + fprintf(fgxFile, "\n weight="); + fprintfDouble(fgxFile, ((double)edgeWeight) / weightDivisor); if (edge->flEdgeWeightMin != edge->flEdgeWeightMax) { - fprintf(fgxFile,"\n minWeight="); - fprintfDouble(fgxFile, ((double) edge->flEdgeWeightMin) / weightDivisor); - fprintf(fgxFile,"\n maxWeight="); - fprintfDouble(fgxFile, ((double) edge->flEdgeWeightMax) / weightDivisor); + fprintf(fgxFile, "\n minWeight="); + fprintfDouble(fgxFile, ((double)edge->flEdgeWeightMin) / weightDivisor); + fprintf(fgxFile, "\n maxWeight="); + fprintfDouble(fgxFile, ((double)edge->flEdgeWeightMax) / weightDivisor); } if (edgeWeight > 0) { if (edgeWeight < bSource->bbWeight) { - fprintf(fgxFile,"\n out="); - fprintfDouble(fgxFile, ((double) edgeWeight) / sourceWeightDivisor ); + fprintf(fgxFile, "\n out="); + fprintfDouble(fgxFile, ((double)edgeWeight) / sourceWeightDivisor); } if (edgeWeight < bTarget->bbWeight) { - fprintf(fgxFile,"\n in="); - fprintfDouble(fgxFile, ((double) edgeWeight) / targetWeightDivisor); + fprintf(fgxFile, "\n in="); + fprintfDouble(fgxFile, ((double)edgeWeight) / targetWeightDivisor); } } } } if (!createDotFile) { - fprintf(fgxFile, ">"); - fprintf(fgxFile,"\n </edge>"); + fprintf(fgxFile, ">"); + fprintf(fgxFile, "\n </edge>"); } } } @@ -19251,7 +19270,7 @@ bool Compiler::fgDumpFlowGraph(Phases phase) /*****************************************************************************/ #ifdef DEBUG -void Compiler::fgDispReach() +void Compiler::fgDispReach() { printf("------------------------------------------------\n"); printf("BBnum Reachable by \n"); @@ -19269,7 +19288,7 @@ void Compiler::fgDispReach() } } -void Compiler::fgDispDoms() +void Compiler::fgDispDoms() { // Don't bother printing this when we have a large number of BasicBlocks in the method if (fgBBcount > 256) @@ -19296,23 +19315,17 @@ void Compiler::fgDispDoms() /*****************************************************************************/ -void Compiler::fgTableDispBasicBlock(BasicBlock* block, - int ibcColWidth /* = 0 */) +void Compiler::fgTableDispBasicBlock(BasicBlock* block, int ibcColWidth /* = 0 */) { - unsigned flags = block->bbFlags; + const unsigned __int64 flags = block->bbFlags; + unsigned bbNumMax = compIsForInlining() ? impInlineInfo->InlinerCompiler->fgBBNumMax : fgBBNumMax; + int maxBlockNumWidth = CountDigits(bbNumMax); + maxBlockNumWidth = max(maxBlockNumWidth, 2); + int blockNumWidth = CountDigits(block->bbNum); + blockNumWidth = max(blockNumWidth, 2); + int blockNumPadding = maxBlockNumWidth - blockNumWidth; - unsigned bbNumMax = compIsForInlining() ? impInlineInfo->InlinerCompiler->fgBBNumMax : fgBBNumMax; - int maxBlockNumWidth = CountDigits(bbNumMax); - maxBlockNumWidth = max(maxBlockNumWidth, 2); - int blockNumWidth = CountDigits(block->bbNum); - blockNumWidth = max(blockNumWidth, 2); - int blockNumPadding = maxBlockNumWidth - blockNumWidth; - - printf("BB%02u%*s [%08p] %2u", - block->bbNum, - blockNumPadding, "", - dspPtr(block), - block->bbRefs); + printf("BB%02u%*s [%08p] %2u", block->bbNum, blockNumPadding, "", dspPtr(block), block->bbRefs); // // Display EH 'try' region index @@ -19406,86 +19419,89 @@ void Compiler::fgTableDispBasicBlock(BasicBlock* block, // Display block branch target // - if (flags & BBF_REMOVED) + if (flags & BBF_REMOVED) { - printf( "[removed] "); + printf("[removed] "); } else { switch (block->bbJumpKind) { - case BBJ_COND: - printf("-> BB%02u%*s ( cond )", block->bbJumpDest->bbNum, maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), ""); - break; + case BBJ_COND: + printf("-> BB%02u%*s ( cond )", block->bbJumpDest->bbNum, + maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), ""); + break; - case BBJ_CALLFINALLY: - printf("-> BB%02u%*s (callf )", block->bbJumpDest->bbNum, maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), ""); - break; + case BBJ_CALLFINALLY: + printf("-> BB%02u%*s (callf )", block->bbJumpDest->bbNum, + maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), ""); + break; - case BBJ_ALWAYS: - if (flags & BBF_KEEP_BBJ_ALWAYS) - { - printf("-> BB%02u%*s (ALWAYS)", block->bbJumpDest->bbNum, maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), ""); - } - else - { - printf("-> BB%02u%*s (always)", block->bbJumpDest->bbNum, maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), ""); - } - break; + case BBJ_ALWAYS: + if (flags & BBF_KEEP_BBJ_ALWAYS) + { + printf("-> BB%02u%*s (ALWAYS)", block->bbJumpDest->bbNum, + maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), ""); + } + else + { + printf("-> BB%02u%*s (always)", block->bbJumpDest->bbNum, + maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), ""); + } + break; - case BBJ_LEAVE: - printf("-> BB%02u%*s (leave )", block->bbJumpDest->bbNum, maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), ""); - break; + case BBJ_LEAVE: + printf("-> BB%02u%*s (leave )", block->bbJumpDest->bbNum, + maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), ""); + break; - case BBJ_EHFINALLYRET: - printf( "%*s (finret)", maxBlockNumWidth - 2, ""); - break; + case BBJ_EHFINALLYRET: + printf("%*s (finret)", maxBlockNumWidth - 2, ""); + break; - case BBJ_EHFILTERRET: - printf( "%*s (fltret)", maxBlockNumWidth - 2, ""); - break; + case BBJ_EHFILTERRET: + printf("%*s (fltret)", maxBlockNumWidth - 2, ""); + break; - case BBJ_EHCATCHRET: - printf("-> BB%02u%*s ( cret )", block->bbJumpDest->bbNum, maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), ""); - break; + case BBJ_EHCATCHRET: + printf("-> BB%02u%*s ( cret )", block->bbJumpDest->bbNum, + maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), ""); + break; - case BBJ_THROW: - printf( "%*s (throw )", maxBlockNumWidth - 2, ""); - break; + case BBJ_THROW: + printf("%*s (throw )", maxBlockNumWidth - 2, ""); + break; - case BBJ_RETURN: - printf( "%*s (return)", maxBlockNumWidth - 2, ""); - break; + case BBJ_RETURN: + printf("%*s (return)", maxBlockNumWidth - 2, ""); + break; - default: - printf( "%*s ", maxBlockNumWidth - 2, ""); - break; + default: + printf("%*s ", maxBlockNumWidth - 2, ""); + break; - case BBJ_SWITCH: - printf("->"); - - unsigned jumpCnt; - jumpCnt = block->bbJumpSwt->bbsCount; - BasicBlock** jumpTab; - jumpTab = block->bbJumpSwt->bbsDstTab; - int switchWidth; - switchWidth = 0; - do - { - printf("%cBB%02u", - (jumpTab == block->bbJumpSwt->bbsDstTab) ? ' ' : ',', - (*jumpTab)->bbNum); - switchWidth += 1 /* space/comma */ + 2 /* BB */ + max(CountDigits((*jumpTab)->bbNum), 2); - } - while (++jumpTab, --jumpCnt); + case BBJ_SWITCH: + printf("->"); - if (switchWidth < 7) - { - printf("%*s", 8 - switchWidth, ""); - } + unsigned jumpCnt; + jumpCnt = block->bbJumpSwt->bbsCount; + BasicBlock** jumpTab; + jumpTab = block->bbJumpSwt->bbsDstTab; + int switchWidth; + switchWidth = 0; + do + { + printf("%cBB%02u", (jumpTab == block->bbJumpSwt->bbsDstTab) ? ' ' : ',', (*jumpTab)->bbNum); + switchWidth += 1 /* space/comma */ + 2 /* BB */ + max(CountDigits((*jumpTab)->bbNum), 2); + } while (++jumpTab, --jumpCnt); - printf(" (switch)"); - break; + if (switchWidth < 7) + { + printf("%*s", 8 - switchWidth, ""); + } + + printf(" (switch)"); + break; } } @@ -19526,12 +19542,28 @@ void Compiler::fgTableDispBasicBlock(BasicBlock* block, switch (block->bbCatchTyp) { - case BBCT_NONE: break; - case BBCT_FAULT: printf("fault "); cnt += 6; break; - case BBCT_FINALLY: printf("finally "); cnt += 8; break; - case BBCT_FILTER: printf("filter "); cnt += 7; break; - case BBCT_FILTER_HANDLER: printf("filtHnd "); cnt += 8; break; - default: printf("catch "); cnt += 6; break; + case BBCT_NONE: + break; + case BBCT_FAULT: + printf("fault "); + cnt += 6; + break; + case BBCT_FINALLY: + printf("finally "); + cnt += 8; + break; + case BBCT_FILTER: + printf("filter "); + cnt += 7; + break; + case BBCT_FILTER_HANDLER: + printf("filtHnd "); + cnt += 8; + break; + default: + printf("catch "); + cnt += 6; + break; } if (block->bbCatchTyp != BBCT_NONE) @@ -19548,9 +19580,7 @@ void Compiler::fgTableDispBasicBlock(BasicBlock* block, EHblkDsc* HBtab; EHblkDsc* HBtabEnd; - for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount; - HBtab < HBtabEnd; - HBtab++) + for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount; HBtab < HBtabEnd; HBtab++) { if (HBtab->ebdTryBeg == block) { @@ -19564,9 +19594,7 @@ void Compiler::fgTableDispBasicBlock(BasicBlock* block, EHblkDsc* HBtab; EHblkDsc* HBtabEnd; - for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount; - HBtab < HBtabEnd; - HBtab++) + for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount; HBtab < HBtabEnd; HBtab++) { if (HBtab->ebdTryLast == block) { @@ -19607,9 +19635,7 @@ void Compiler::fgTableDispBasicBlock(BasicBlock* block, Dump blocks from firstBlock to lastBlock. */ -void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock, - BasicBlock* lastBlock, - bool dumpTrees) +void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock, BasicBlock* lastBlock, bool dumpTrees) { BasicBlock* block; @@ -19627,24 +19653,27 @@ void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock, if (block->bbFlags & BBF_PROF_WEIGHT) { int thisIbcWidth = CountDigits(block->bbWeight); - ibcColWidth = max(ibcColWidth, thisIbcWidth); + ibcColWidth = max(ibcColWidth, thisIbcWidth); } - if (block == lastBlock) { + if (block == lastBlock) + { break; - } + } } if (ibcColWidth > 0) { ibcColWidth = max(ibcColWidth, 3) + 1; // + 1 for the leading space } - unsigned bbNumMax = compIsForInlining() ? impInlineInfo->InlinerCompiler->fgBBNumMax : fgBBNumMax; - int maxBlockNumWidth = CountDigits(bbNumMax); - maxBlockNumWidth = max(maxBlockNumWidth, 2); + unsigned bbNumMax = compIsForInlining() ? impInlineInfo->InlinerCompiler->fgBBNumMax : fgBBNumMax; + int maxBlockNumWidth = CountDigits(bbNumMax); + maxBlockNumWidth = max(maxBlockNumWidth, 2); padWidth += maxBlockNumWidth - 2; // Account for functions with a large number of blocks. + // clang-format off + printf("\n"); printf("------%*s------------------------------------%*s-----------------------%*s----------------------------------------\n", padWidth, "------------", @@ -19665,9 +19694,9 @@ void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock, ibcColWidth, "------------", maxBlockNumWidth, "----"); - for (block = firstBlock; - block; - block = block->bbNext) + // clang-format on + + for (block = firstBlock; block; block = block->bbNext) { // First, do some checking on the bbPrev links if (block->bbPrev) @@ -19681,36 +19710,34 @@ void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock, { printf("bad prev link!\n"); } - + if (block == fgFirstColdBlock) { - printf("~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", - padWidth, "~~~~~~~~~~~~", - ibcColWidth, "~~~~~~~~~~~~", - maxBlockNumWidth, "~~~~"); + printf("~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~~~" + "~~~~~~~~~~~~~~~\n", + padWidth, "~~~~~~~~~~~~", ibcColWidth, "~~~~~~~~~~~~", maxBlockNumWidth, "~~~~"); } #if FEATURE_EH_FUNCLETS if (block == fgFirstFuncletBB) { - printf("++++++%*s++++++++++++++++++++++++++++++++++++%*s+++++++++++++++++++++++%*s++++++++++++++++++++++++++++++++++++++++ funclets follow\n", - padWidth, "++++++++++++", - ibcColWidth, "++++++++++++", - maxBlockNumWidth, "++++"); + printf("++++++%*s++++++++++++++++++++++++++++++++++++%*s+++++++++++++++++++++++%*s+++++++++++++++++++++++++" + "+++++++++++++++ funclets follow\n", + padWidth, "++++++++++++", ibcColWidth, "++++++++++++", maxBlockNumWidth, "++++"); } #endif // FEATURE_EH_FUNCLETS fgTableDispBasicBlock(block, ibcColWidth); - if (block == lastBlock) { + if (block == lastBlock) + { break; - } + } } - printf("------%*s------------------------------------%*s-----------------------%*s----------------------------------------\n", - padWidth, "------------", - ibcColWidth, "------------", - maxBlockNumWidth, "----"); + printf("------%*s------------------------------------%*s-----------------------%*s---------------------------------" + "-------\n", + padWidth, "------------", ibcColWidth, "------------", maxBlockNumWidth, "----"); if (dumpTrees) { @@ -19720,7 +19747,7 @@ void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock, /*****************************************************************************/ -void Compiler::fgDispBasicBlocks(bool dumpTrees) +void Compiler::fgDispBasicBlocks(bool dumpTrees) { fgDispBasicBlocks(fgFirstBB, nullptr, dumpTrees); } @@ -19728,9 +19755,9 @@ void Compiler::fgDispBasicBlocks(bool dumpTrees) /*****************************************************************************/ // Increment the stmtNum and dump the tree using gtDispTree // -void Compiler::fgDumpStmtTree(GenTreePtr stmt, unsigned blkNum) +void Compiler::fgDumpStmtTree(GenTreePtr stmt, unsigned blkNum) { - compCurStmtNum++; // Increment the current stmtNum + compCurStmtNum++; // Increment the current stmtNum printf("\n***** BB%02u, stmt %d\n", blkNum, compCurStmtNum); @@ -19750,7 +19777,7 @@ void Compiler::fgDumpStmtTree(GenTreePtr stmt, unsigned blkNum) // Arguments: // block - The block to dump. // -void Compiler::fgDumpBlock(BasicBlock* block) +void Compiler::fgDumpBlock(BasicBlock* block) { printf("\n------------ "); block->dspBlockHeader(this); @@ -19762,7 +19789,7 @@ void Compiler::fgDumpBlock(BasicBlock* block) fgDumpStmtTree(stmt, block->bbNum); if (stmt == block->bbTreeList) { - block->bbStmtNum = compCurStmtNum; // Set the block->bbStmtNum + block->bbStmtNum = compCurStmtNum; // Set the block->bbStmtNum } } } @@ -19775,63 +19802,81 @@ void Compiler::fgDumpBlock(BasicBlock* block) /*****************************************************************************/ // Walk the BasicBlock list calling fgDumpTree once per Stmt // -void Compiler::fgDumpTrees(BasicBlock* firstBlock, - BasicBlock* lastBlock) +void Compiler::fgDumpTrees(BasicBlock* firstBlock, BasicBlock* lastBlock) { - compCurStmtNum = 0; // Reset the current stmtNum + compCurStmtNum = 0; // Reset the current stmtNum /* Walk the basic blocks */ - // Note that typically we have already called fgDispBasicBlocks() + // Note that typically we have already called fgDispBasicBlocks() // so we don't need to print the preds and succs again here // for (BasicBlock* block = firstBlock; block; block = block->bbNext) { fgDumpBlock(block); - if (block == lastBlock) { + if (block == lastBlock) + { break; + } } - } - printf("\n-------------------------------------------------------------------------------------------------------------------\n"); + printf("\n---------------------------------------------------------------------------------------------------------" + "----------\n"); } - /***************************************************************************** * Try to create as many candidates for GTF_MUL_64RSLT as possible. * We convert 'intOp1*intOp2' into 'int(long(nop(intOp1))*long(intOp2))'. */ /* static */ -Compiler::fgWalkResult Compiler::fgStress64RsltMulCB(GenTreePtr* pTree, fgWalkData* data) +Compiler::fgWalkResult Compiler::fgStress64RsltMulCB(GenTreePtr* pTree, fgWalkData* data) { - GenTreePtr tree = *pTree; + GenTreePtr tree = *pTree; Compiler* pComp = data->compiler; - - if (tree->gtOper != GT_MUL || tree->gtType != TYP_INT || (tree->gtOverflow())) { + + if (tree->gtOper != GT_MUL || tree->gtType != TYP_INT || (tree->gtOverflow())) + { return WALK_CONTINUE; -} + } + +#ifdef DEBUG + if (pComp->verbose) + { + printf("STRESS_64RSLT_MUL before:\n"); + pComp->gtDispTree(tree); + } +#endif // DEBUG // To ensure optNarrowTree() doesn't fold back to the original tree. - tree->gtOp.gtOp1 = pComp->gtNewOperNode(GT_NOP, TYP_LONG, tree->gtOp.gtOp1); tree->gtOp.gtOp1 = pComp->gtNewCastNode(TYP_LONG, tree->gtOp.gtOp1, TYP_LONG); - tree->gtOp.gtOp2 = pComp->gtNewCastNode(TYP_LONG, tree->gtOp.gtOp2, TYP_LONG); - tree->gtType = TYP_LONG; - *pTree = pComp->gtNewCastNode(TYP_INT, tree, TYP_INT); + tree->gtOp.gtOp1 = pComp->gtNewOperNode(GT_NOP, TYP_LONG, tree->gtOp.gtOp1); + tree->gtOp.gtOp1 = pComp->gtNewCastNode(TYP_LONG, tree->gtOp.gtOp1, TYP_LONG); + tree->gtOp.gtOp2 = pComp->gtNewCastNode(TYP_LONG, tree->gtOp.gtOp2, TYP_LONG); + tree->gtType = TYP_LONG; + *pTree = pComp->gtNewCastNode(TYP_INT, tree, TYP_INT); + +#ifdef DEBUG + if (pComp->verbose) + { + printf("STRESS_64RSLT_MUL after:\n"); + pComp->gtDispTree(*pTree); + } +#endif // DEBUG return WALK_SKIP_SUBTREES; } -void Compiler::fgStress64RsltMul() +void Compiler::fgStress64RsltMul() { - if (!compStressCompile(STRESS_64RSLT_MUL, 20)) { + if (!compStressCompile(STRESS_64RSLT_MUL, 20)) + { return; -} + } fgWalkAllTreesPre(fgStress64RsltMulCB, (void*)this); } - // This variable is used to generate "traversal labels": one-time constants with which // we label basic blocks that are members of the basic block list, in order to have a // fast, high-probability test for membership in that list. Type is "volatile" because @@ -19847,8 +19892,7 @@ static volatile int bbTraverseLabel = 1; * *****************************************************************************/ -void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, - bool checkBBRefs /* = true */) +void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRefs /* = true */) { #ifdef DEBUG if (verbose) @@ -19858,7 +19902,7 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, #endif // DEBUG fgDebugCheckBlockLinks(); - + if (fgBBcount > 10000 && expensiveDebugCheckLevel < 1) { // The basic block checks are too expensive if there are too many blocks, @@ -19875,7 +19919,7 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, unsigned blockRefs; #if FEATURE_EH_FUNCLETS - bool reachedFirstFunclet = false; + bool reachedFirstFunclet = false; if (fgFuncletsCreated) { // @@ -19898,15 +19942,13 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, block->bbTraversalStamp = curTraversalStamp; } - for (prevBlock = nullptr, block = fgFirstBB; - block; - prevBlock = block, block = block->bbNext) + for (prevBlock = nullptr, block = fgFirstBB; block; prevBlock = block, block = block->bbNext) { blockRefs = 0; /* First basic block has countOfInEdges() >= 1 */ - if (block == fgFirstBB) + if (block == fgFirstBB) { noway_assert(block->countOfInEdges() >= 1); blockRefs = 1; @@ -19920,27 +19962,24 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, // If the block is a BBJ_COND, a BBJ_SWITCH or a // lowered GT_SWITCH_TABLE node then make sure it - // ends with a GT_JTRUE or a GT_SWITCH + // ends with a conditional jump or a GT_SWITCH if (block->bbJumpKind == BBJ_COND) { - noway_assert(block->lastNode()->gtNext == nullptr && block->lastNode()->gtOper == GT_JTRUE); + noway_assert(block->lastNode()->gtNext == nullptr && block->lastNode()->OperIsConditionalJump()); } else if (block->bbJumpKind == BBJ_SWITCH) { #ifndef LEGACY_BACKEND noway_assert(block->lastNode()->gtNext == nullptr && - (block->lastNode()->gtOper == GT_SWITCH || - block->lastNode()->gtOper == GT_SWITCH_TABLE)); -#else // LEGACY_BACKEND - noway_assert(block->lastStmt()->gtNext == NULL && - block->lastStmt()->gtStmtExpr->gtOper == GT_SWITCH); + (block->lastNode()->gtOper == GT_SWITCH || block->lastNode()->gtOper == GT_SWITCH_TABLE)); +#else // LEGACY_BACKEND + noway_assert(block->lastStmt()->gtNext == NULL && block->lastStmt()->gtStmtExpr->gtOper == GT_SWITCH); #endif // LEGACY_BACKEND } - else if (!( block->bbJumpKind == BBJ_ALWAYS - || block->bbJumpKind == BBJ_RETURN)) + else if (!(block->bbJumpKind == BBJ_ALWAYS || block->bbJumpKind == BBJ_RETURN)) { - //this block cannot have a poll + // this block cannot have a poll noway_assert(!(block->bbFlags & BBF_NEEDS_GCPOLL)); } @@ -19981,7 +20020,8 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, #endif // FEATURE_EH_FUNCLETS // Don't check cheap preds. - for (pred = (fgCheapPredsValid ? nullptr : block->bbPreds); pred != nullptr; blockRefs += pred->flDupCount, pred = pred->flNext) + for (pred = (fgCheapPredsValid ? nullptr : block->bbPreds); pred != nullptr; + blockRefs += pred->flDupCount, pred = pred->flNext) { assert(fgComputePredsDone); // If this isn't set, why do we have a preds list? @@ -19994,95 +20034,101 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, if (ehTryDsc != nullptr) { // You can jump to the start of a try - if (ehTryDsc->ebdTryBeg == block) { + if (ehTryDsc->ebdTryBeg == block) + { goto CHECK_HND; -} + } // You can jump within the same try region - if (bbInTryRegions(block->getTryIndex(), blockPred)) { + if (bbInTryRegions(block->getTryIndex(), blockPred)) + { goto CHECK_HND; -} + } // The catch block can jump back into the middle of the try - if (bbInCatchHandlerRegions(block, blockPred)) { + if (bbInCatchHandlerRegions(block, blockPred)) + { goto CHECK_HND; -} + } // The end of a finally region is a BBJ_EHFINALLYRET block (during importing, BBJ_LEAVE) which // is marked as "returning" to the BBJ_ALWAYS block following the BBJ_CALLFINALLY // block that does a local call to the finally. This BBJ_ALWAYS is within // the try region protected by the finally (for x86, ARM), but that's ok. - if (prevBlock->bbJumpKind == BBJ_CALLFINALLY && - block->bbJumpKind == BBJ_ALWAYS && - blockPred->bbJumpKind == BBJ_EHFINALLYRET) { + if (prevBlock->bbJumpKind == BBJ_CALLFINALLY && block->bbJumpKind == BBJ_ALWAYS && + blockPred->bbJumpKind == BBJ_EHFINALLYRET) + { goto CHECK_HND; -} + } - printf("Jump into the middle of try region: BB%02u branches to BB%02u\n", blockPred->bbNum, block->bbNum); + printf("Jump into the middle of try region: BB%02u branches to BB%02u\n", blockPred->bbNum, + block->bbNum); noway_assert(!"Jump into middle of try region"); } -CHECK_HND:; + CHECK_HND:; EHblkDsc* ehHndDsc = ehGetBlockHndDsc(block); if (ehHndDsc != nullptr) { // You can do a BBJ_EHFINALLYRET or BBJ_EHFILTERRET into a handler region - if ( (blockPred->bbJumpKind == BBJ_EHFINALLYRET) - || (blockPred->bbJumpKind == BBJ_EHFILTERRET)) { + if ((blockPred->bbJumpKind == BBJ_EHFINALLYRET) || (blockPred->bbJumpKind == BBJ_EHFILTERRET)) + { goto CHECK_JUMP; -} + } // Our try block can call our finally block - if ((block->bbCatchTyp == BBCT_FINALLY) && - (blockPred->bbJumpKind == BBJ_CALLFINALLY) && + if ((block->bbCatchTyp == BBCT_FINALLY) && (blockPred->bbJumpKind == BBJ_CALLFINALLY) && ehCallFinallyInCorrectRegion(blockPred, block->getHndIndex())) { goto CHECK_JUMP; } // You can jump within the same handler region - if (bbInHandlerRegions(block->getHndIndex(), blockPred)) { + if (bbInHandlerRegions(block->getHndIndex(), blockPred)) + { goto CHECK_JUMP; -} + } // A filter can jump to the start of the filter handler - if (ehHndDsc->HasFilter()) { + if (ehHndDsc->HasFilter()) + { goto CHECK_JUMP; -} + } - printf("Jump into the middle of handler region: BB%02u branches to BB%02u\n", blockPred->bbNum, block->bbNum); + printf("Jump into the middle of handler region: BB%02u branches to BB%02u\n", blockPred->bbNum, + block->bbNum); noway_assert(!"Jump into the middle of handler region"); } -CHECK_JUMP:; + CHECK_JUMP:; switch (blockPred->bbJumpKind) { - case BBJ_COND: - noway_assert(blockPred->bbNext == block || blockPred->bbJumpDest == block); - break; + case BBJ_COND: + noway_assert(blockPred->bbNext == block || blockPred->bbJumpDest == block); + break; - case BBJ_NONE: - noway_assert(blockPred->bbNext == block); - break; + case BBJ_NONE: + noway_assert(blockPred->bbNext == block); + break; - case BBJ_CALLFINALLY: - case BBJ_ALWAYS: - case BBJ_EHCATCHRET: - case BBJ_EHFILTERRET: - noway_assert(blockPred->bbJumpDest == block); - break; + case BBJ_CALLFINALLY: + case BBJ_ALWAYS: + case BBJ_EHCATCHRET: + case BBJ_EHFILTERRET: + noway_assert(blockPred->bbJumpDest == block); + break; - case BBJ_EHFINALLYRET: + case BBJ_EHFINALLYRET: { // If the current block is a successor to a BBJ_EHFINALLYRET (return from finally), // then the lexically previous block should be a call to the same finally. // Verify all of that. - unsigned hndIndex = blockPred->getHndIndex(); - EHblkDsc* ehDsc = ehGetDsc(hndIndex); - BasicBlock* finBeg = ehDsc->ebdHndBeg; + unsigned hndIndex = blockPred->getHndIndex(); + EHblkDsc* ehDsc = ehGetDsc(hndIndex); + BasicBlock* finBeg = ehDsc->ebdHndBeg; // Because there is no bbPrev, we have to search for the lexically previous // block. We can shorten the search by only looking in places where it is legal @@ -20094,13 +20140,15 @@ CHECK_JUMP:; for (BasicBlock* bcall = begBlk; bcall != endBlk; bcall = bcall->bbNext) { - if (bcall->bbJumpKind != BBJ_CALLFINALLY || bcall->bbJumpDest != finBeg) { + if (bcall->bbJumpKind != BBJ_CALLFINALLY || bcall->bbJumpDest != finBeg) + { continue; -} + } - if (block == bcall->bbNext) { + if (block == bcall->bbNext) + { goto PRED_OK; - } + } } #if FEATURE_EH_FUNCLETS @@ -20114,19 +20162,22 @@ CHECK_JUMP:; for (BasicBlock* bcall = fgFirstFuncletBB; bcall; bcall = bcall->bbNext) { - if (bcall->bbJumpKind != BBJ_CALLFINALLY || bcall->bbJumpDest != finBeg) { + if (bcall->bbJumpKind != BBJ_CALLFINALLY || bcall->bbJumpDest != finBeg) + { continue; -} + } - if (block != bcall->bbNext) { + if (block != bcall->bbNext) + { continue; -} + } - if (ehCallFinallyInCorrectRegion(bcall, hndIndex)) { + if (ehCallFinallyInCorrectRegion(bcall, hndIndex)) + { goto PRED_OK; + } } } - } #endif // FEATURE_EH_FUNCLETS @@ -20134,34 +20185,34 @@ CHECK_JUMP:; } break; - case BBJ_THROW: - case BBJ_RETURN: - noway_assert(!"THROW and RETURN block cannot be in the predecessor list!"); - break; + case BBJ_THROW: + case BBJ_RETURN: + noway_assert(!"THROW and RETURN block cannot be in the predecessor list!"); + break; - case BBJ_SWITCH: - unsigned jumpCnt; jumpCnt = blockPred->bbJumpSwt->bbsCount; - BasicBlock** jumpTab; jumpTab = blockPred->bbJumpSwt->bbsDstTab; + case BBJ_SWITCH: + unsigned jumpCnt; + jumpCnt = blockPred->bbJumpSwt->bbsCount; + BasicBlock** jumpTab; + jumpTab = blockPred->bbJumpSwt->bbsDstTab; - do - { - if (block == *jumpTab) + do { - goto PRED_OK; - } - } - while (++jumpTab, --jumpCnt); + if (block == *jumpTab) + { + goto PRED_OK; + } + } while (++jumpTab, --jumpCnt); - noway_assert(!"SWITCH in the predecessor list with no jump label to BLOCK!"); - break; + noway_assert(!"SWITCH in the predecessor list with no jump label to BLOCK!"); + break; - default: - noway_assert(!"Unexpected bbJumpKind"); - break; + default: + noway_assert(!"Unexpected bbJumpKind"); + break; } -PRED_OK:; - + PRED_OK:; } /* Check the bbRefs */ @@ -20200,7 +20251,7 @@ PRED_OK:; copiedForGenericsCtxt = ((info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_THIS) != 0); #else // JIT32_GCENCODER copiedForGenericsCtxt = FALSE; -#endif // JIT32_GCENCODER +#endif // JIT32_GCENCODER // This if only in support of the noway_asserts it contains. if (info.compIsStatic) @@ -20213,16 +20264,18 @@ PRED_OK:; // For instance method: assert(info.compThisArg != BAD_VAR_NUM); bool compThisArgAddrExposedOK = !lvaTable[info.compThisArg].lvAddrExposed; + #ifndef JIT32_GCENCODER compThisArgAddrExposedOK = compThisArgAddrExposedOK || copiedForGenericsCtxt; -#endif // !JIT32_GCENCODER - noway_assert(compThisArgAddrExposedOK && // should never expose the address of arg 0 or - !lvaTable[info.compThisArg].lvArgWrite && // write to arg 0. - ( // In addition, - lvaArg0Var == info.compThisArg || // lvArg0Var should remain 0 if arg0 is not written to or address-exposed. - lvaArg0Var != info.compThisArg && - (lvaTable[lvaArg0Var].lvAddrExposed || lvaTable[lvaArg0Var].lvArgWrite || copiedForGenericsCtxt) - )); +#endif // !JIT32_GCENCODER + + // Should never expose the address of arg 0 or write to arg 0. + // In addition, lvArg0Var should remain 0 if arg0 is not + // written to or address-exposed. + noway_assert(compThisArgAddrExposedOK && !lvaTable[info.compThisArg].lvArgWrite && + (lvaArg0Var == info.compThisArg || + lvaArg0Var != info.compThisArg && (lvaTable[lvaArg0Var].lvAddrExposed || + lvaTable[lvaArg0Var].lvArgWrite || copiedForGenericsCtxt))); } } @@ -20232,40 +20285,40 @@ PRED_OK:; * ****************************************************************************/ -void Compiler::fgDebugCheckFlags(GenTreePtr tree) +void Compiler::fgDebugCheckFlags(GenTreePtr tree) { noway_assert(tree->gtOper != GT_STMT); - genTreeOps oper = tree->OperGet(); - unsigned kind = tree->OperKind(); - unsigned treeFlags = tree->gtFlags & GTF_ALL_EFFECT; - unsigned chkFlags = 0; + genTreeOps oper = tree->OperGet(); + unsigned kind = tree->OperKind(); + unsigned treeFlags = tree->gtFlags & GTF_ALL_EFFECT; + unsigned chkFlags = 0; /* Is this a leaf node? */ - if (kind & GTK_LEAF) + if (kind & GTK_LEAF) { switch (oper) { - case GT_CLS_VAR: - chkFlags |= GTF_GLOB_REF; - break; + case GT_CLS_VAR: + chkFlags |= GTF_GLOB_REF; + break; - case GT_CATCH_ARG: - chkFlags |= GTF_ORDER_SIDEEFF; - break; + case GT_CATCH_ARG: + chkFlags |= GTF_ORDER_SIDEEFF; + break; - default: - break; + default: + break; } } /* Is it a 'simple' unary/binary operator? */ - else if (kind & GTK_SMPOP) + else if (kind & GTK_SMPOP) { - GenTreePtr op1 = tree->gtOp.gtOp1; - GenTreePtr op2 = tree->gtGetOp2(); + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtGetOp2(); // During GS work, we make shadow copies for params. // In gsParamsToShadows(), we create a shadow var of TYP_INT for every small type param. @@ -20275,48 +20328,88 @@ void Compiler::fgDebugCheckFlags(GenTreePtr tree) // TYP_INT up to the GT_ASG tree is only correct if we don't need to propagate the TYP_INT back up. // The following checks will ensure this. - // Is the left child of "tree" a GT_ASG?, + // Is the left child of "tree" a GT_ASG? + // + // If parent is a TYP_VOID, we don't no need to propagate TYP_INT up. We are fine. + // (or) If GT_ASG is the left child of a GT_COMMA, the type of the GT_COMMA node will + // be determined by its right child. So we don't need to propagate TYP_INT up either. We are fine. if (op1 && op1->gtOper == GT_ASG) { - assert(tree->gtType == TYP_VOID || // If parent is a TYP_VOID, we don't no need to propagate TYP_INT up. We are fine. - tree->gtOper == GT_COMMA); // (or) If GT_ASG is the left child of a GT_COMMA, the type of the GT_COMMA node will - } // be determined by its right child. So we don't need to propagate TYP_INT up either. We are fine. + assert(tree->gtType == TYP_VOID || tree->gtOper == GT_COMMA); + } - // Is the right child of "tree" a GT_ASG?, + // Is the right child of "tree" a GT_ASG? + // + // If parent is a TYP_VOID, we don't no need to propagate TYP_INT up. We are fine. if (op2 && op2->gtOper == GT_ASG) { - assert(tree->gtType == TYP_VOID); // If parent is a TYP_VOID, we don't no need to propagate TYP_INT up. We are fine. + assert(tree->gtType == TYP_VOID); } switch (oper) { - case GT_QMARK: - if (op1->OperIsCompare()) - { - noway_assert(op1->gtFlags & GTF_DONT_CSE); - } - else - { - noway_assert( (op1->gtOper == GT_CNS_INT) && - ((op1->gtIntCon.gtIconVal == 0) || (op1->gtIntCon.gtIconVal == 1)) ); - } - break; + case GT_QMARK: + if (op1->OperIsCompare()) + { + noway_assert(op1->gtFlags & GTF_DONT_CSE); + } + else + { + noway_assert((op1->gtOper == GT_CNS_INT) && + ((op1->gtIntCon.gtIconVal == 0) || (op1->gtIntCon.gtIconVal == 1))); + } + break; - default: - break; + case GT_LIST: + case GT_FIELD_LIST: + if ((op2 != nullptr) && op2->OperIsAnyList()) + { + ArrayStack<GenTree*> stack(this); + while ((tree->gtGetOp2() != nullptr) && tree->gtGetOp2()->OperIsAnyList()) + { + stack.Push(tree); + tree = tree->gtGetOp2(); + } + + fgDebugCheckFlags(tree); + + while (stack.Height() > 0) + { + tree = stack.Pop(); + assert((tree->gtFlags & GTF_REVERSE_OPS) == 0); + fgDebugCheckFlags(tree->gtOp.gtOp1); + chkFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT); + chkFlags |= (tree->gtGetOp2()->gtFlags & GTF_ALL_EFFECT); + fgDebugCheckFlagsHelper(tree, (tree->gtFlags & GTF_ALL_EFFECT), chkFlags); + } + + return; + } + break; + + default: + break; } /* Recursively check the subtrees */ - if (op1) { fgDebugCheckFlags(op1); -} - if (op2) { fgDebugCheckFlags(op2); -} + if (op1) + { + fgDebugCheckFlags(op1); + } + if (op2) + { + fgDebugCheckFlags(op2); + } - if (op1) { chkFlags |= (op1->gtFlags & GTF_ALL_EFFECT); -} - if (op2) { chkFlags |= (op2->gtFlags & GTF_ALL_EFFECT); -} + if (op1) + { + chkFlags |= (op1->gtFlags & GTF_ALL_EFFECT); + } + if (op2) + { + chkFlags |= (op2->gtFlags & GTF_ALL_EFFECT); + } // We reuse the value of GTF_REVERSE_OPS for a GT_IND-specific flag, // so exempt that (unary) operator. @@ -20331,7 +20424,7 @@ void Compiler::fgDebugCheckFlags(GenTreePtr tree) was set and thus GTF_ASG cannot be considered here. */ /* For a GT_ASG(GT_IND(x), y) we are interested in the side effects of x */ - GenTreePtr op1p; + GenTreePtr op1p; if ((kind & GTK_ASGOP) && (op1->gtOper == GT_IND)) { op1p = op1->gtOp.gtOp1; @@ -20355,20 +20448,18 @@ void Compiler::fgDebugCheckFlags(GenTreePtr tree) if (kind & GTK_ASGOP) { - chkFlags |= GTF_ASG; + chkFlags |= GTF_ASG; } /* Note that it is OK for treeFlags not to have a GTF_EXCEPT, AssertionProp's non-Null may have cleared it */ if (tree->OperMayThrow()) { - chkFlags |= (treeFlags & GTF_EXCEPT); + chkFlags |= (treeFlags & GTF_EXCEPT); } - if (oper == GT_ADDR && - (op1->OperIsLocal() || - op1->gtOper == GT_CLS_VAR || - (op1->gtOper == GT_IND && op1->gtOp.gtOp1->gtOper == GT_CLS_VAR_ADDR))) + if (oper == GT_ADDR && (op1->OperIsLocal() || op1->gtOper == GT_CLS_VAR || + (op1->gtOper == GT_IND && op1->gtOp.gtOp1->gtOper == GT_CLS_VAR_ADDR))) { /* &aliasedVar doesn't need GTF_GLOB_REF, though alisasedVar does. Similarly for clsVar */ @@ -20378,131 +20469,149 @@ void Compiler::fgDebugCheckFlags(GenTreePtr tree) /* See what kind of a special operator we have here */ - else { switch (tree->OperGet()) + else { - case GT_CALL: + switch (tree->OperGet()) + { + case GT_CALL: - GenTreePtr args; - GenTreePtr argx; - GenTreeCall* call; - - call = tree->AsCall(); + GenTreePtr args; + GenTreePtr argx; + GenTreeCall* call; - chkFlags |= GTF_CALL; + call = tree->AsCall(); - if ((treeFlags & GTF_EXCEPT) && !(chkFlags & GTF_EXCEPT)) - { - switch (eeGetHelperNum(tree->gtCall.gtCallMethHnd)) - { - // Is this a helper call that can throw an exception ? - case CORINFO_HELP_LDIV: - case CORINFO_HELP_LMOD: - case CORINFO_HELP_METHOD_ACCESS_CHECK: - case CORINFO_HELP_FIELD_ACCESS_CHECK: - case CORINFO_HELP_CLASS_ACCESS_CHECK: - case CORINFO_HELP_DELEGATE_SECURITY_CHECK: - chkFlags |= GTF_EXCEPT; - break; - default: - break; - } - } + chkFlags |= GTF_CALL; - if (call->gtCallObjp) - { - fgDebugCheckFlags(call->gtCallObjp); - chkFlags |= (call->gtCallObjp->gtFlags & GTF_SIDE_EFFECT); + if ((treeFlags & GTF_EXCEPT) && !(chkFlags & GTF_EXCEPT)) + { + switch (eeGetHelperNum(tree->gtCall.gtCallMethHnd)) + { + // Is this a helper call that can throw an exception ? + case CORINFO_HELP_LDIV: + case CORINFO_HELP_LMOD: + case CORINFO_HELP_METHOD_ACCESS_CHECK: + case CORINFO_HELP_FIELD_ACCESS_CHECK: + case CORINFO_HELP_CLASS_ACCESS_CHECK: + case CORINFO_HELP_DELEGATE_SECURITY_CHECK: + chkFlags |= GTF_EXCEPT; + break; + default: + break; + } + } - if (call->gtCallObjp->gtFlags & GTF_ASG) - { - treeFlags |= GTF_ASG; - } - } + if (call->gtCallObjp) + { + fgDebugCheckFlags(call->gtCallObjp); + chkFlags |= (call->gtCallObjp->gtFlags & GTF_SIDE_EFFECT); - for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2) - { - argx = args->gtOp.gtOp1; - fgDebugCheckFlags(argx); + if (call->gtCallObjp->gtFlags & GTF_ASG) + { + treeFlags |= GTF_ASG; + } + } - chkFlags |= (argx->gtFlags & GTF_SIDE_EFFECT); + for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2) + { + argx = args->gtOp.gtOp1; + fgDebugCheckFlags(argx); - if (argx->gtFlags & GTF_ASG) - { - treeFlags |= GTF_ASG; - } - } + chkFlags |= (argx->gtFlags & GTF_SIDE_EFFECT); - for (args = call->gtCallLateArgs; args; args = args->gtOp.gtOp2) - { - argx = args->gtOp.gtOp1; - fgDebugCheckFlags(argx); + if (argx->gtFlags & GTF_ASG) + { + treeFlags |= GTF_ASG; + } + } - chkFlags |= (argx->gtFlags & GTF_SIDE_EFFECT); + for (args = call->gtCallLateArgs; args; args = args->gtOp.gtOp2) + { + argx = args->gtOp.gtOp1; + fgDebugCheckFlags(argx); - if (argx->gtFlags & GTF_ASG) - { - treeFlags |= GTF_ASG; - } - } + chkFlags |= (argx->gtFlags & GTF_SIDE_EFFECT); - if ((call->gtCallType == CT_INDIRECT) && (call->gtCallCookie != nullptr)) - { - fgDebugCheckFlags(call->gtCallCookie); - chkFlags |= (call->gtCallCookie->gtFlags & GTF_SIDE_EFFECT); - } + if (argx->gtFlags & GTF_ASG) + { + treeFlags |= GTF_ASG; + } + } - if (call->gtCallType == CT_INDIRECT) - { - fgDebugCheckFlags(call->gtCallAddr); - chkFlags |= (call->gtCallAddr->gtFlags & GTF_SIDE_EFFECT); - } + if ((call->gtCallType == CT_INDIRECT) && (call->gtCallCookie != nullptr)) + { + fgDebugCheckFlags(call->gtCallCookie); + chkFlags |= (call->gtCallCookie->gtFlags & GTF_SIDE_EFFECT); + } - if (call->IsUnmanaged() && - (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)) - { - if (call->gtCallArgs->gtOp.gtOp1->OperGet() == GT_NOP) - { - noway_assert(call->gtCallLateArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL || - call->gtCallLateArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF); - } - else - { - noway_assert(call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL || - call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF); - } - } - break; + if (call->gtCallType == CT_INDIRECT) + { + fgDebugCheckFlags(call->gtCallAddr); + chkFlags |= (call->gtCallAddr->gtFlags & GTF_SIDE_EFFECT); + } - case GT_ARR_ELEM: + if (call->IsUnmanaged() && (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)) + { + if (call->gtCallArgs->gtOp.gtOp1->OperGet() == GT_NOP) + { + noway_assert(call->gtCallLateArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL || + call->gtCallLateArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF); + } + else + { + noway_assert(call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL || + call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF); + } + } + break; - GenTreePtr arrObj; - unsigned dim; + case GT_ARR_ELEM: - arrObj = tree->gtArrElem.gtArrObj; - fgDebugCheckFlags(arrObj); - chkFlags |= (arrObj->gtFlags & GTF_ALL_EFFECT); + GenTreePtr arrObj; + unsigned dim; - for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++) - { - fgDebugCheckFlags(tree->gtArrElem.gtArrInds[dim]); - chkFlags |= tree->gtArrElem.gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT; - } - break; + arrObj = tree->gtArrElem.gtArrObj; + fgDebugCheckFlags(arrObj); + chkFlags |= (arrObj->gtFlags & GTF_ALL_EFFECT); - case GT_ARR_OFFSET: - fgDebugCheckFlags(tree->gtArrOffs.gtOffset); - chkFlags |= (tree->gtArrOffs.gtOffset->gtFlags & GTF_ALL_EFFECT); - fgDebugCheckFlags(tree->gtArrOffs.gtIndex); - chkFlags |= (tree->gtArrOffs.gtIndex->gtFlags & GTF_ALL_EFFECT); - fgDebugCheckFlags(tree->gtArrOffs.gtArrObj); - chkFlags |= (tree->gtArrOffs.gtArrObj->gtFlags & GTF_ALL_EFFECT); - break; + for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++) + { + fgDebugCheckFlags(tree->gtArrElem.gtArrInds[dim]); + chkFlags |= tree->gtArrElem.gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT; + } + break; - default: - break; + case GT_ARR_OFFSET: + fgDebugCheckFlags(tree->gtArrOffs.gtOffset); + chkFlags |= (tree->gtArrOffs.gtOffset->gtFlags & GTF_ALL_EFFECT); + fgDebugCheckFlags(tree->gtArrOffs.gtIndex); + chkFlags |= (tree->gtArrOffs.gtIndex->gtFlags & GTF_ALL_EFFECT); + fgDebugCheckFlags(tree->gtArrOffs.gtArrObj); + chkFlags |= (tree->gtArrOffs.gtArrObj->gtFlags & GTF_ALL_EFFECT); + break; + + default: + break; + } } + + fgDebugCheckFlagsHelper(tree, treeFlags, chkFlags); } +//------------------------------------------------------------------------------ +// fgDebugCheckFlagsHelper : Check if all bits that are set in chkFlags are also set in treeFlags. +// +// +// Arguments: +// tree - Tree whose flags are being checked +// treeFlags - Actual flags on the tree +// chkFlags - Expected flags +// +// Note: +// Checking that all bits that are set in treeFlags are also set in chkFlags is currently disabled. + +void Compiler::fgDebugCheckFlagsHelper(GenTreePtr tree, unsigned treeFlags, unsigned chkFlags) +{ if (chkFlags & ~treeFlags) { // Print the tree so we can see it in the log. @@ -20524,12 +20633,12 @@ void Compiler::fgDebugCheckFlags(GenTreePtr tree) #if 0 // TODO-Cleanup: /* The tree has extra flags set. However, this will happen if we - replace a subtree with something, but don't clear the flags up - the tree. Can't flag this unless we start clearing flags above. + replace a subtree with something, but don't clear the flags up + the tree. Can't flag this unless we start clearing flags above. - Note: we need this working for GTF_CALL and CSEs, so I'm enabling - it for calls. - */ + Note: we need this working for GTF_CALL and CSEs, so I'm enabling + it for calls. + */ if (tree->OperGet() != GT_CALL && (treeFlags & GTF_CALL) && !(chkFlags & GTF_CALL)) { // Print the tree so we can see it in the log. @@ -20545,7 +20654,7 @@ void Compiler::fgDebugCheckFlags(GenTreePtr tree) GenTree::gtDispFlags(treeFlags & ~chkFlags, GTF_DEBUG_NONE); printf("\n"); gtDispTree(tree); - } + } #endif // 0 } } @@ -20569,14 +20678,13 @@ void Compiler::fgDebugCheckNodeLinks(BasicBlock* block, GenTree* node) noway_assert(stmt->gtStmtList); // The first node's gtPrev must be nullptr (the gtPrev list is not circular). - // The last node's gtNext must be nullptr (the gtNext list is not circular). This is tested if the loop below terminates. + // The last node's gtNext must be nullptr (the gtNext list is not circular). This is tested if the loop below + // terminates. assert(stmt->gtStmtList->gtPrev == nullptr); - for (GenTreePtr tree = stmt->gtStmtList; - tree != nullptr; - tree = tree->gtNext) + for (GenTreePtr tree = stmt->gtStmtList; tree != nullptr; tree = tree->gtNext) { - if (tree->gtPrev) + if (tree->gtPrev) { noway_assert(tree->gtPrev->gtNext == tree); } @@ -20585,7 +20693,7 @@ void Compiler::fgDebugCheckNodeLinks(BasicBlock* block, GenTree* node) noway_assert(tree == stmt->gtStmtList); } - if (tree->gtNext) + if (tree->gtNext) { noway_assert(tree->gtNext->gtPrev == tree); } @@ -20621,40 +20729,40 @@ void Compiler::fgDebugCheckNodeLinks(BasicBlock* block, GenTree* node) { switch (tree->gtOper) { - case GT_QMARK: - expectedPrevTree = tree->gtOp.gtOp2->AsColon()->ThenNode(); // "then" operand of the GT_COLON (generated second). - break; + case GT_QMARK: + expectedPrevTree = + tree->gtOp.gtOp2->AsColon()->ThenNode(); // "then" operand of the GT_COLON (generated second). + break; - case GT_COLON: - expectedPrevTree = tree->AsColon()->ElseNode(); // "else" branch result (generated first). - break; + case GT_COLON: + expectedPrevTree = tree->AsColon()->ElseNode(); // "else" branch result (generated first). + break; - default: - if (tree->gtOp.gtOp2) - { - if (tree->gtFlags & GTF_REVERSE_OPS) + default: + if (tree->gtOp.gtOp2) { - expectedPrevTree = tree->gtOp.gtOp1; + if (tree->gtFlags & GTF_REVERSE_OPS) + { + expectedPrevTree = tree->gtOp.gtOp1; + } + else + { + expectedPrevTree = tree->gtOp.gtOp2; + } } else { - expectedPrevTree = tree->gtOp.gtOp2; + expectedPrevTree = tree->gtOp.gtOp1; } - } - else - { - expectedPrevTree = tree->gtOp.gtOp1; - } - break; + break; } } - noway_assert(expectedPrevTree == nullptr || // No expectations about the prev node - tree->gtPrev == expectedPrevTree); // The "normal" case + noway_assert(expectedPrevTree == nullptr || // No expectations about the prev node + tree->gtPrev == expectedPrevTree); // The "normal" case } } - /***************************************************************************** * * A DEBUG routine to check the correctness of the links between GT_STMT nodes @@ -20662,15 +20770,14 @@ void Compiler::fgDebugCheckNodeLinks(BasicBlock* block, GenTree* node) * ****************************************************************************/ -void Compiler::fgDebugCheckLinks(bool morphTrees) +void Compiler::fgDebugCheckLinks(bool morphTrees) { // This used to be only on for stress, and there was a comment stating that // it was "quite an expensive operation" but I did not find that to be true. // Set DO_SANITY_DEBUG_CHECKS to false to revert to that behavior. const bool DO_SANITY_DEBUG_CHECKS = true; - if (!DO_SANITY_DEBUG_CHECKS && - !compStressCompile(STRESS_CHK_FLOW_UPDATE, 30)) + if (!DO_SANITY_DEBUG_CHECKS && !compStressCompile(STRESS_CHK_FLOW_UPDATE, 30)) { return; } @@ -20680,7 +20787,7 @@ void Compiler::fgDebugCheckLinks(bool morphTrees) /* For each basic block check the bbTreeList links */ for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) { -PROCESS_BLOCK_AGAIN:; + PROCESS_BLOCK_AGAIN:; if (block->IsLIR()) { LIR::AsRange(block).CheckLIR(this); @@ -20690,11 +20797,12 @@ PROCESS_BLOCK_AGAIN:; for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt) { /* Verify that bbTreeList is threaded correctly */ - /* Note that for the GT_STMT list, the gtPrev list is circular. The gtNext list is not: gtNext of the last GT_STMT in a block is nullptr. */ + /* Note that for the GT_STMT list, the gtPrev list is circular. The gtNext list is not: gtNext of the + * last GT_STMT in a block is nullptr. */ noway_assert(stmt->gtPrev); - if (stmt == block->bbTreeList) + if (stmt == block->bbTreeList) { noway_assert(stmt->gtPrev->gtNext == nullptr); } @@ -20703,7 +20811,7 @@ PROCESS_BLOCK_AGAIN:; noway_assert(stmt->gtPrev->gtNext == stmt); } - if (stmt->gtNext) + if (stmt->gtNext) { noway_assert(stmt->gtNext->gtPrev == stmt); } @@ -20782,9 +20890,9 @@ void Compiler::fgDebugCheckBlockLinks() // Create a set with all the successors. Don't use BlockSet, so we don't need to worry // about the BlockSet epoch. BitVecTraits bitVecTraits(fgBBNumMax + 1, this); - BitVec BITVEC_INIT_NOCOPY(succBlocks, BitVecOps::MakeEmpty(&bitVecTraits)); + BitVec BITVEC_INIT_NOCOPY(succBlocks, BitVecOps::MakeEmpty(&bitVecTraits)); BasicBlock** jumpTable = block->bbJumpSwt->bbsDstTab; - unsigned jumpCount = block->bbJumpSwt->bbsCount; + unsigned jumpCount = block->bbJumpSwt->bbsCount; for (unsigned i = 0; i < jumpCount; i++) { BitVecOps::AddElemD(&bitVecTraits, succBlocks, jumpTable[i]->bbNum); @@ -20822,10 +20930,10 @@ void Compiler::fgDebugCheckBlockLinks() // Likewise the depth limit is a policy consideration, and serves mostly // as a safeguard to prevent runaway inlining of small methods. -unsigned Compiler::fgCheckInlineDepthAndRecursion(InlineInfo* inlineInfo) +unsigned Compiler::fgCheckInlineDepthAndRecursion(InlineInfo* inlineInfo) { BYTE* candidateCode = inlineInfo->inlineCandidateInfo->methInfo.ILCode; - InlineContext* inlineContext = inlineInfo->iciStmt->gtStmt.gtInlineContext; + InlineContext* inlineContext = inlineInfo->iciStmt->gtInlineContext; InlineResult* inlineResult = inlineInfo->inlineResult; // There should be a context for all candidates. @@ -20860,17 +20968,18 @@ unsigned Compiler::fgCheckInlineDepthAndRecursion(InlineInfo* inlineInfo) * Inlining phase */ - -void Compiler::fgInline() +void Compiler::fgInline() { - if (!opts.OptEnabled(CLFLG_INLINING)) { + if (!opts.OptEnabled(CLFLG_INLINING)) + { return; -} + } #ifdef DEBUG - if (verbose) { + if (verbose) + { printf("*************** In fgInline()\n"); -} + } #endif // DEBUG BasicBlock* block = fgFirstBB; @@ -20881,9 +20990,7 @@ void Compiler::fgInline() for (; block != nullptr; block = block->bbNext) { - for (GenTreeStmt* stmt = block->firstStmt(); - stmt; - stmt = stmt->gtNextStmt) + for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt) { stmt->gtInlineContext = rootContext; } @@ -20901,9 +21008,7 @@ void Compiler::fgInline() GenTreeStmt* stmt; GenTreePtr expr; - for (stmt = block->firstStmt(); - stmt != nullptr; - stmt = stmt->gtNextStmt) + for (stmt = block->firstStmt(); stmt != nullptr; stmt = stmt->gtNextStmt) { expr = stmt->gtStmtExpr; @@ -20932,14 +21037,11 @@ void Compiler::fgInline() } // See if we need to replace the return value place holder. - fgWalkTreePre(&stmt->gtStmtExpr, - fgUpdateInlineReturnExpressionPlaceHolder, - (void *) this); + fgWalkTreePre(&stmt->gtStmtExpr, fgUpdateInlineReturnExpressionPlaceHolder, (void*)this); // See if stmt is of the form GT_COMMA(call, nop) - // If yes, we can get rid of GT_COMMA. - if (expr->OperGet() == GT_COMMA && - expr->gtOp.gtOp1->OperGet() == GT_CALL && + // If yes, we can get rid of GT_COMMA. + if (expr->OperGet() == GT_COMMA && expr->gtOp.gtOp1->OperGet() == GT_CALL && expr->gtOp.gtOp2->OperGet() == GT_NOP) { stmt->gtStmtExpr = expr->gtOp.gtOp1; @@ -20961,9 +21063,7 @@ void Compiler::fgInline() { GenTreeStmt* stmt; - for (stmt = block->firstStmt(); - stmt; - stmt = stmt->gtNextStmt) + for (stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt) { // Call Compiler::fgDebugCheckInlineCandidates on each node fgWalkTreePre(&stmt->gtStmtExpr, fgDebugCheckInlineCandidates); @@ -20975,17 +21075,17 @@ void Compiler::fgInline() fgVerifyHandlerTab(); - if (verbose) + if (verbose) { printf("*************** After fgInline()\n"); fgDispBasicBlocks(true); fgDispHandlerTab(); } - if (verbose || fgPrintInlinedMethods) + if (verbose || fgPrintInlinedMethods) { - printf("**************** Inline Tree\n"); - m_inlineStrategy->Dump(); + printf("**************** Inline Tree\n"); + m_inlineStrategy->Dump(); } #endif // DEBUG @@ -21007,14 +21107,13 @@ void Compiler::fgInline() // Note: // Invokes fgNoteNonInlineCandidate on the nodes it finds. -Compiler::fgWalkResult Compiler::fgFindNonInlineCandidate(GenTreePtr* pTree, - fgWalkData* data) +Compiler::fgWalkResult Compiler::fgFindNonInlineCandidate(GenTreePtr* pTree, fgWalkData* data) { GenTreePtr tree = *pTree; if (tree->gtOper == GT_CALL) { Compiler* compiler = data->compiler; - GenTreePtr stmt = (GenTreePtr) data->pCallbackData; + GenTreeStmt* stmt = (GenTreeStmt*)data->pCallbackData; GenTreeCall* call = tree->AsCall(); compiler->fgNoteNonInlineCandidate(stmt, call); @@ -21027,17 +21126,16 @@ Compiler::fgWalkResult Compiler::fgFindNonInlineCandidate(GenTreePtr* pTree // not marked as inline candidates. // // Arguments: -// tree - statement containing the call +// stmt - statement containing the call // call - the call itself // // Notes: // Used in debug only to try and place descriptions of inline failures // into the proper context in the inline tree. -void Compiler::fgNoteNonInlineCandidate(GenTreePtr tree, - GenTreeCall* call) +void Compiler::fgNoteNonInlineCandidate(GenTreeStmt* stmt, GenTreeCall* call) { - InlineResult inlineResult(this, call, nullptr, "fgNotInlineCandidate"); + InlineResult inlineResult(this, call, nullptr, "fgNotInlineCandidate"); InlineObservation currentObservation = InlineObservation::CALLSITE_NOT_CANDIDATE; // Try and recover the reason left behind when the jit decided @@ -21070,7 +21168,7 @@ void Compiler::fgNoteNonInlineCandidate(GenTreePtr tree, if (call->gtCallType == CT_USER_FUNC) { // Create InlineContext for the failure - m_inlineStrategy->NewFailure(tree, &inlineResult); + m_inlineStrategy->NewFailure(stmt, &inlineResult); } } @@ -21088,12 +21186,8 @@ void Compiler::fgNoteNonInlineCandidate(GenTreePtr tree, */ GenTreePtr Compiler::fgGetStructAsStructPtr(GenTreePtr tree) { - noway_assert((tree->gtOper == GT_LCL_VAR) || - (tree->gtOper == GT_FIELD) || - (tree->gtOper == GT_IND) || - (tree->gtOper == GT_BLK) || - (tree->gtOper == GT_OBJ) || - tree->OperIsSIMD() || + noway_assert((tree->gtOper == GT_LCL_VAR) || (tree->gtOper == GT_FIELD) || (tree->gtOper == GT_IND) || + (tree->gtOper == GT_BLK) || (tree->gtOper == GT_OBJ) || tree->OperIsSIMD() || // tree->gtOper == GT_CALL || cannot get address of call. // tree->gtOper == GT_MKREFANY || inlining should've been aborted due to mkrefany opcode. // tree->gtOper == GT_RET_EXPR || cannot happen after fgUpdateInlineReturnExpressionPlaceHolder @@ -21101,18 +21195,18 @@ GenTreePtr Compiler::fgGetStructAsStructPtr(GenTreePtr tree) switch (tree->OperGet()) { - case GT_BLK: - case GT_OBJ: - case GT_IND: - return tree->gtOp.gtOp1; + case GT_BLK: + case GT_OBJ: + case GT_IND: + return tree->gtOp.gtOp1; - case GT_COMMA: - tree->gtOp.gtOp2 = fgGetStructAsStructPtr(tree->gtOp.gtOp2); - tree->gtType = TYP_BYREF; - return tree; + case GT_COMMA: + tree->gtOp.gtOp2 = fgGetStructAsStructPtr(tree->gtOp.gtOp2); + tree->gtType = TYP_BYREF; + return tree; - default: - return gtNewOperNode(GT_ADDR, TYP_BYREF, tree); + default: + return gtNewOperNode(GT_ADDR, TYP_BYREF, tree); } } @@ -21137,15 +21231,15 @@ GenTreePtr Compiler::fgAssignStructInlineeToVar(GenTreePtr child, CORINFO_CLASS_ // we have a ", , , call()" -- this is very defensive as we may never get // an inlinee that is made of commas. If the inlinee is not a call, then // we use a copy block to do the assignment. - GenTreePtr src = child; - GenTreePtr lastComma = NULL; + GenTreePtr src = child; + GenTreePtr lastComma = nullptr; while (src->gtOper == GT_COMMA) { lastComma = src; - src = src->gtOp.gtOp2; + src = src->gtOp.gtOp2; } - GenTreePtr newInlinee = NULL; + GenTreePtr newInlinee = nullptr; if (src->gtOper == GT_CALL) { // If inlinee was just a call, new inlinee is v05 = call() @@ -21162,16 +21256,16 @@ GenTreePtr Compiler::fgAssignStructInlineeToVar(GenTreePtr child, CORINFO_CLASS_ if (child->gtOper == GT_COMMA) { lastComma->gtOp.gtOp2 = newInlinee; - newInlinee = child; + newInlinee = child; } } else { // Inlinee is not a call, so just create a copy block to the tmp. - src = child; + src = child; GenTreePtr dstAddr = fgGetStructAsStructPtr(dst); GenTreePtr srcAddr = fgGetStructAsStructPtr(src); - newInlinee = gtNewCpObjNode(dstAddr, srcAddr, retClsHnd, false); + newInlinee = gtNewCpObjNode(dstAddr, srcAddr, retClsHnd, false); } GenTreePtr production = gtNewLclvNode(tmpNum, structType); @@ -21197,15 +21291,17 @@ void Compiler::fgAttachStructInlineeToAsg(GenTreePtr tree, GenTreePtr child, COR assert(tree->gtOper == GT_ASG); // We have an assignment, we codegen only V05 = call(). - if (child->gtOper == GT_CALL && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR) + // However, if it is a multireg return on x64/ux we want to assign it to a temp. + if (child->gtOper == GT_CALL && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR && !child->AsCall()->HasMultiRegRetVal()) { return; } GenTreePtr dstAddr = fgGetStructAsStructPtr(tree->gtOp.gtOp1); - GenTreePtr srcAddr = fgGetStructAsStructPtr((child->gtOper == GT_CALL) - ? fgAssignStructInlineeToVar(child, retClsHnd) // Assign to a variable if it is a call. - : child); // Just get the address, if not a call. + GenTreePtr srcAddr = fgGetStructAsStructPtr( + (child->gtOper == GT_CALL) + ? fgAssignStructInlineeToVar(child, retClsHnd) // Assign to a variable if it is a call. + : child); // Just get the address, if not a call. tree->CopyFrom(gtNewCpObjNode(dstAddr, srcAddr, retClsHnd, false), this); } @@ -21217,16 +21313,15 @@ void Compiler::fgAttachStructInlineeToAsg(GenTreePtr tree, GenTreePtr child, COR */ /* static */ -Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(GenTreePtr* pTree, - fgWalkData* data) +Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(GenTreePtr* pTree, fgWalkData* data) { - GenTreePtr tree = *pTree; - Compiler* comp = data->compiler; + GenTreePtr tree = *pTree; + Compiler* comp = data->compiler; CORINFO_CLASS_HANDLE retClsHnd = NO_CLASS_HANDLE; if (tree->gtOper == GT_RET_EXPR) { - // We are going to copy the tree from the inlinee, + // We are going to copy the tree from the inlinee, // so record the handle now. // if (varTypeIsStruct(tree)) @@ -21242,7 +21337,7 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder( #ifdef DEBUG if (comp->verbose) { - printf("\nReplacing the return expression placeholder "); + printf("\nReplacing the return expression placeholder "); printTreeID(tree); printf(" with "); printTreeID(inlineCandidate); @@ -21252,7 +21347,7 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder( } #endif // DEBUG - tree->CopyFrom(inlineCandidate, comp); + tree->CopyFrom(inlineCandidate, comp); #ifdef DEBUG if (comp->verbose) @@ -21262,8 +21357,7 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder( printf("\n"); } #endif // DEBUG - } - while (tree->gtOper == GT_RET_EXPR); + } while (tree->gtOper == GT_RET_EXPR); } #if FEATURE_MULTIREG_RET @@ -21305,15 +21399,12 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder( if ((tree->gtOper == GT_ASG) && (tree->gtOp.gtOp2->gtOper == GT_COMMA)) { GenTreePtr comma; - for (comma = tree->gtOp.gtOp2; - comma->gtOper == GT_COMMA; - comma = comma->gtOp.gtOp2) + for (comma = tree->gtOp.gtOp2; comma->gtOper == GT_COMMA; comma = comma->gtOp.gtOp2) { // empty } - noway_assert(!varTypeIsStruct(comma) || - comma->gtOper != GT_RET_EXPR || + noway_assert(!varTypeIsStruct(comma) || comma->gtOper != GT_RET_EXPR || !comp->IsMultiRegReturnedType(comma->gtRetExpr.gtRetClsHnd)); } @@ -21330,8 +21421,7 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder( */ /* static */ -Compiler::fgWalkResult Compiler::fgDebugCheckInlineCandidates(GenTreePtr* pTree, - fgWalkData* data) +Compiler::fgWalkResult Compiler::fgDebugCheckInlineCandidates(GenTreePtr* pTree, fgWalkData* data) { GenTreePtr tree = *pTree; if (tree->gtOper == GT_CALL) @@ -21348,9 +21438,7 @@ Compiler::fgWalkResult Compiler::fgDebugCheckInlineCandidates(GenTreePtr* p #endif // DEBUG - -void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call, - InlineResult* inlineResult) +void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* inlineResult) { noway_assert(call->gtOper == GT_CALL); noway_assert((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0); @@ -21393,92 +21481,95 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call, // Set the trap to catch all errors (including recoverable ones from the EE) struct Param { - Compiler* pThis; - GenTree* call; + Compiler* pThis; + GenTree* call; CORINFO_METHOD_HANDLE fncHandle; - InlineCandidateInfo* inlineCandidateInfo; - InlineInfo* inlineInfo; + InlineCandidateInfo* inlineCandidateInfo; + InlineInfo* inlineInfo; } param = {nullptr}; - param.pThis = this; - param.call = call; - param.fncHandle = fncHandle; + param.pThis = this; + param.call = call; + param.fncHandle = fncHandle; param.inlineCandidateInfo = inlineCandidateInfo; - param.inlineInfo = &inlineInfo; - bool success = eeRunWithErrorTrap<Param>([](Param* pParam) - { - // Init the local var info of the inlinee - pParam->pThis->impInlineInitVars(pParam->inlineInfo); + param.inlineInfo = &inlineInfo; + bool success = eeRunWithErrorTrap<Param>( + [](Param* pParam) { + // Init the local var info of the inlinee + pParam->pThis->impInlineInitVars(pParam->inlineInfo); - if (pParam->inlineInfo->inlineResult->IsCandidate()) - { - /* Clear the temp table */ - memset(pParam->inlineInfo->lclTmpNum, -1, sizeof(pParam->inlineInfo->lclTmpNum)); + if (pParam->inlineInfo->inlineResult->IsCandidate()) + { + /* Clear the temp table */ + memset(pParam->inlineInfo->lclTmpNum, -1, sizeof(pParam->inlineInfo->lclTmpNum)); - // - // Prepare the call to jitNativeCode - // + // + // Prepare the call to jitNativeCode + // - pParam->inlineInfo->InlinerCompiler = pParam->pThis; - if (pParam->pThis->impInlineInfo == nullptr) - { - pParam->inlineInfo->InlineRoot = pParam->pThis; - } - else - { - pParam->inlineInfo->InlineRoot = pParam->pThis->impInlineInfo->InlineRoot; - } - pParam->inlineInfo->argCnt = pParam->inlineCandidateInfo->methInfo.args.totalILArgs(); - pParam->inlineInfo->tokenLookupContextHandle = pParam->inlineCandidateInfo->exactContextHnd; + pParam->inlineInfo->InlinerCompiler = pParam->pThis; + if (pParam->pThis->impInlineInfo == nullptr) + { + pParam->inlineInfo->InlineRoot = pParam->pThis; + } + else + { + pParam->inlineInfo->InlineRoot = pParam->pThis->impInlineInfo->InlineRoot; + } + pParam->inlineInfo->argCnt = pParam->inlineCandidateInfo->methInfo.args.totalILArgs(); + pParam->inlineInfo->tokenLookupContextHandle = pParam->inlineCandidateInfo->exactContextHnd; - JITLOG_THIS(pParam->pThis, - (LL_INFO100000, - "INLINER: inlineInfo.tokenLookupContextHandle for %s set to 0x%p:\n", - pParam->pThis->eeGetMethodFullName(pParam->fncHandle), - pParam->pThis->dspPtr(pParam->inlineInfo->tokenLookupContextHandle))); + JITLOG_THIS(pParam->pThis, + (LL_INFO100000, "INLINER: inlineInfo.tokenLookupContextHandle for %s set to 0x%p:\n", + pParam->pThis->eeGetMethodFullName(pParam->fncHandle), + pParam->pThis->dspPtr(pParam->inlineInfo->tokenLookupContextHandle))); - CORJIT_FLAGS compileFlagsForInlinee; - memcpy(&compileFlagsForInlinee, pParam->pThis->opts.jitFlags, sizeof(compileFlagsForInlinee)); - compileFlagsForInlinee.corJitFlags &= ~CORJIT_FLG_LOST_WHEN_INLINING; - compileFlagsForInlinee.corJitFlags |= CORJIT_FLG_SKIP_VERIFICATION; + JitFlags compileFlagsForInlinee = *pParam->pThis->opts.jitFlags; + + // The following flags are lost when inlining. + // (This is checked in Compiler::compInitOptions().) + compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_BBOPT); + compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_BBINSTR); + compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_PROF_ENTERLEAVE); + compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_DEBUG_EnC); + compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_DEBUG_INFO); + + compileFlagsForInlinee.Set(JitFlags::JIT_FLAG_SKIP_VERIFICATION); #ifdef DEBUG - if (pParam->pThis->verbose) - { - printf("\nInvoking compiler for the inlinee method %s :\n", - pParam->pThis->eeGetMethodFullName(pParam->fncHandle)); - } + if (pParam->pThis->verbose) + { + printf("\nInvoking compiler for the inlinee method %s :\n", + pParam->pThis->eeGetMethodFullName(pParam->fncHandle)); + } #endif // DEBUG - int result = jitNativeCode(pParam->fncHandle, - pParam->inlineCandidateInfo->methInfo.scope, - pParam->pThis->info.compCompHnd, - &pParam->inlineCandidateInfo->methInfo, - (void**)pParam->inlineInfo, - nullptr, - &compileFlagsForInlinee, - pParam->inlineInfo); - - if (result != CORJIT_OK) - { - // If we haven't yet determined why this inline fails, use - // a catch-all something bad happened observation. - InlineResult* innerInlineResult = pParam->inlineInfo->inlineResult; + int result = + jitNativeCode(pParam->fncHandle, pParam->inlineCandidateInfo->methInfo.scope, + pParam->pThis->info.compCompHnd, &pParam->inlineCandidateInfo->methInfo, + (void**)pParam->inlineInfo, nullptr, &compileFlagsForInlinee, pParam->inlineInfo); - if (!innerInlineResult->IsFailure()) + if (result != CORJIT_OK) { - innerInlineResult->NoteFatal(InlineObservation::CALLSITE_COMPILATION_FAILURE); + // If we haven't yet determined why this inline fails, use + // a catch-all something bad happened observation. + InlineResult* innerInlineResult = pParam->inlineInfo->inlineResult; + + if (!innerInlineResult->IsFailure()) + { + innerInlineResult->NoteFatal(InlineObservation::CALLSITE_COMPILATION_FAILURE); + } } } - } - }, ¶m); + }, + ¶m); if (!success) { #ifdef DEBUG if (verbose) { - printf("\nInlining failed due to an exception during invoking the compiler for the inlinee method %s.\n", - eeGetMethodFullName(fncHandle)); + printf("\nInlining failed due to an exception during invoking the compiler for the inlinee method %s.\n", + eeGetMethodFullName(fncHandle)); } #endif // DEBUG @@ -21498,8 +21589,7 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call, #ifdef DEBUG if (0 && verbose) { - printf("\nDone invoking compiler for the inlinee method %s\n", - eeGetMethodFullName(fncHandle)); + printf("\nDone invoking compiler for the inlinee method %s\n", eeGetMethodFullName(fncHandle)); } #endif // DEBUG @@ -21514,7 +21604,7 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call, if (verbose) { printf("\nInlining failed because pInlineInfo->retExpr is not set in the inlinee method %s.\n", - eeGetMethodFullName(fncHandle)); + eeGetMethodFullName(fncHandle)); } #endif // DEBUG inlineResult->NoteFatal(InlineObservation::CALLEE_LACKS_RETURN); @@ -21526,7 +21616,8 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call, // we defer the call to initClass() until inlining is completed in case it fails. If inlining succeeds, // we will call initClass(). if (!(info.compCompHnd->initClass(nullptr /* field */, fncHandle /* method */, - inlineCandidateInfo->exactContextHnd /* context */) & CORINFO_INITCLASS_INITIALIZED)) + inlineCandidateInfo->exactContextHnd /* context */) & + CORINFO_INITCLASS_INITIALIZED)) { inlineResult->NoteFatal(InlineObservation::CALLEE_CLASS_INIT_FAILURE); return; @@ -21545,11 +21636,8 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call, if (verbose || fgPrintInlinedMethods) { - printf("Successfully inlined %s (%d IL bytes) (depth %d) [%s]\n", - eeGetMethodFullName(fncHandle), - inlineCandidateInfo->methInfo.ILCodeSize, - inlineDepth, - inlineResult->ReasonString()); + printf("Successfully inlined %s (%d IL bytes) (depth %d) [%s]\n", eeGetMethodFullName(fncHandle), + inlineCandidateInfo->methInfo.ILCodeSize, inlineDepth, inlineResult->ReasonString()); } if (verbose) @@ -21566,20 +21654,39 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call, inlineResult->NoteSuccess(); } -// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -// The inlining attempt cannot be failed starting from this point. -// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +//------------------------------------------------------------------------ +// fgInsertInlineeBlocks: incorporate statements for an inline into the +// root method. +// +// Arguments: +// inlineInfo -- info for the inline +// +// Notes: +// The inlining attempt cannot be failed once this method is called. +// +// Adds all inlinee statements, plus any glue statements needed +// either before or after the inlined call. +// +// Updates flow graph and assigns weights to inlinee +// blocks. Currently does not attempt to read IBC data for the +// inlinee. +// +// Updates relevant root method status flags (eg optMethodFlags) to +// include information from the inlinee. +// +// Marks newly added statements with an appropriate inline context. + void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) { - GenTreePtr iciCall = pInlineInfo->iciCall; - GenTreePtr iciStmt = pInlineInfo->iciStmt; - BasicBlock* iciBlock = pInlineInfo->iciBlock; + GenTreeCall* iciCall = pInlineInfo->iciCall; + GenTreeStmt* iciStmt = pInlineInfo->iciStmt; + BasicBlock* iciBlock = pInlineInfo->iciBlock; BasicBlock* block; // We can write better assert here. For example, we can check that // iciBlock contains iciStmt, which in turn contains iciCall. noway_assert(iciBlock->bbTreeList != nullptr); - noway_assert(iciStmt->gtStmt.gtStmtExpr != nullptr); + noway_assert(iciStmt->gtStmtExpr != nullptr); noway_assert(iciCall->gtOper == GT_CALL); #ifdef DEBUG @@ -21591,33 +21698,23 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) printf("\n\n----------- Statements (and blocks) added due to the inlining of call "); printTreeID(iciCall); printf(" -----------\n"); - // gtDispTree(iciStmt); } #endif // DEBUG - // // Create a new inline context and mark the inlined statements with it - // InlineContext* calleeContext = m_inlineStrategy->NewSuccess(pInlineInfo); - for (block = InlineeCompiler->fgFirstBB; - block != nullptr; - block = block->bbNext) + for (block = InlineeCompiler->fgFirstBB; block != nullptr; block = block->bbNext) { - for (GenTreeStmt* stmt = block->firstStmt(); - stmt; - stmt = stmt->gtNextStmt) + for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt) { stmt->gtInlineContext = calleeContext; } } - // - // Prepend statements. - // - GenTreePtr stmtAfter; - stmtAfter = fgInlinePrependStatements(pInlineInfo); + // Prepend statements + GenTreePtr stmtAfter = fgInlinePrependStatements(pInlineInfo); #ifdef DEBUG if (verbose) @@ -21627,6 +21724,9 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) } #endif // DEBUG + BasicBlock* topBlock = iciBlock; + BasicBlock* bottomBlock = nullptr; + if (InlineeCompiler->fgBBcount == 1) { // When fgBBCount is 1 we will always have a non-NULL fgFirstBB @@ -21641,22 +21741,21 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) // Inlinee contains just one BB. So just insert its statement list to topBlock. if (InlineeCompiler->fgFirstBB->bbTreeList) { - stmtAfter = fgInsertStmtListAfter(iciBlock, - stmtAfter, - InlineeCompiler->fgFirstBB->bbTreeList); + stmtAfter = fgInsertStmtListAfter(iciBlock, stmtAfter, InlineeCompiler->fgFirstBB->bbTreeList); // Copy inlinee bbFlags to caller bbFlags. - const unsigned int inlineeBlockFlags = InlineeCompiler->fgFirstBB->bbFlags; + const unsigned __int64 inlineeBlockFlags = InlineeCompiler->fgFirstBB->bbFlags; noway_assert((inlineeBlockFlags & BBF_HAS_JMP) == 0); noway_assert((inlineeBlockFlags & BBF_KEEP_BBJ_ALWAYS) == 0); iciBlock->bbFlags |= inlineeBlockFlags; } + #ifdef DEBUG if (verbose) { noway_assert(currentDumpStmt); - if (currentDumpStmt != stmtAfter) + if (currentDumpStmt != stmtAfter) { do { @@ -21669,10 +21768,14 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) gtDispTree(currentDumpStmt); printf("\n"); - } while (currentDumpStmt != stmtAfter); + } while (currentDumpStmt != stmtAfter); } } #endif // DEBUG + + // Append statements to unpin, if necessary. + fgInlineAppendStatements(pInlineInfo, iciBlock, stmtAfter); + goto _Done; } } @@ -21681,24 +21784,20 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) // ======= Inserting inlinee's basic blocks =============== // - BasicBlock* topBlock; - BasicBlock* bottomBlock; - - topBlock = iciBlock; - - bottomBlock = fgNewBBafter(topBlock->bbJumpKind, topBlock, true); - bottomBlock->bbRefs = 1; + bottomBlock = fgNewBBafter(topBlock->bbJumpKind, topBlock, true); + bottomBlock->bbRefs = 1; bottomBlock->bbJumpDest = topBlock->bbJumpDest; bottomBlock->inheritWeight(topBlock); topBlock->bbJumpKind = BBJ_NONE; // Update block flags - unsigned originalFlags; - originalFlags = topBlock->bbFlags; - noway_assert((originalFlags & BBF_SPLIT_NONEXIST) == 0); - topBlock->bbFlags &= ~(BBF_SPLIT_LOST); - bottomBlock->bbFlags |= originalFlags & BBF_SPLIT_GAINED; + { + const unsigned __int64 originalFlags = topBlock->bbFlags; + noway_assert((originalFlags & BBF_SPLIT_NONEXIST) == 0); + topBlock->bbFlags &= ~(BBF_SPLIT_LOST); + bottomBlock->bbFlags |= originalFlags & BBF_SPLIT_GAINED; + } // // Split statements between topBlock and bottomBlock @@ -21708,10 +21807,10 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) GenTreePtr bottomBlock_Begin; GenTreePtr bottomBlock_End; - topBlock_Begin = nullptr; - topBlock_End = nullptr; + topBlock_Begin = nullptr; + topBlock_End = nullptr; bottomBlock_Begin = nullptr; - bottomBlock_End = nullptr; + bottomBlock_End = nullptr; // // First figure out bottomBlock_Begin @@ -21724,7 +21823,7 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) // topBlock is empty before the split. // In this case, both topBlock and bottomBlock should be empty noway_assert(bottomBlock_Begin == nullptr); - topBlock->bbTreeList = nullptr; + topBlock->bbTreeList = nullptr; bottomBlock->bbTreeList = nullptr; } else if (topBlock->bbTreeList == bottomBlock_Begin) @@ -21735,7 +21834,7 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) // And the split is before the first statement. // In this case, topBlock should be empty, and everything else should be moved to the bottonBlock. bottomBlock->bbTreeList = topBlock->bbTreeList; - topBlock->bbTreeList = nullptr; + topBlock->bbTreeList = nullptr; } else if (bottomBlock_Begin == nullptr) { @@ -21753,9 +21852,9 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) noway_assert(bottomBlock_Begin); // This is the normal case where both blocks should contain at least one statement. - topBlock_Begin = topBlock->bbTreeList; + topBlock_Begin = topBlock->bbTreeList; noway_assert(topBlock_Begin); - topBlock_End = bottomBlock_Begin->gtPrev; + topBlock_End = bottomBlock_Begin->gtPrev; noway_assert(topBlock_End); bottomBlock_End = topBlock->lastStmt(); noway_assert(bottomBlock_End); @@ -21778,25 +21877,23 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) bool inheritWeight; inheritWeight = true; // The firstBB does inherit the weight from the iciBlock - for (block = InlineeCompiler->fgFirstBB; - block != nullptr; - block = block->bbNext) + for (block = InlineeCompiler->fgFirstBB; block != nullptr; block = block->bbNext) { noway_assert(!block->hasTryIndex()); noway_assert(!block->hasHndIndex()); block->copyEHRegion(iciBlock); - block->bbFlags |= iciBlock->bbFlags & BBF_BACKWARD_JUMP; + block->bbFlags |= iciBlock->bbFlags & BBF_BACKWARD_JUMP; - if (iciStmt->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET) + if (iciStmt->gtStmtILoffsx != BAD_IL_OFFSET) { - block->bbCodeOffs = jitGetILoffs(iciStmt->gtStmt.gtStmtILoffsx); - block->bbCodeOffsEnd = block->bbCodeOffs + 1; // TODO: is code size of 1 some magic number for inlining? + block->bbCodeOffs = jitGetILoffs(iciStmt->gtStmtILoffsx); + block->bbCodeOffsEnd = block->bbCodeOffs + 1; // TODO: is code size of 1 some magic number for inlining? } else { - block->bbCodeOffs = 0; // TODO: why not BAD_IL_OFFSET? - block->bbCodeOffsEnd = 0; - block->bbFlags |= BBF_INTERNAL; + block->bbCodeOffs = 0; // TODO: why not BAD_IL_OFFSET? + block->bbCodeOffsEnd = 0; + block->bbFlags |= BBF_INTERNAL; } if (block->bbJumpKind == BBJ_RETURN) @@ -21810,8 +21907,8 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) #ifdef DEBUG if (verbose) { - printf("\nConvert bbJumpKind of BB%02u to BBJ_ALWAYS to bottomBlock BB%02u\n", - block->bbNum, bottomBlock->bbNum); + printf("\nConvert bbJumpKind of BB%02u to BBJ_ALWAYS to bottomBlock BB%02u\n", block->bbNum, + bottomBlock->bbNum); } #endif // DEBUG } @@ -21846,6 +21943,9 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) // fgBBcount += InlineeCompiler->fgBBcount; + // Append statements to unpin if necessary. + fgInlineAppendStatements(pInlineInfo, bottomBlock, nullptr); + #ifdef DEBUG if (verbose) { @@ -21862,15 +21962,18 @@ _Done: // // Copy out some flags // - compLongUsed |= InlineeCompiler->compLongUsed; - compFloatingPointUsed |= InlineeCompiler->compFloatingPointUsed; - compLocallocUsed |= InlineeCompiler->compLocallocUsed; - compQmarkUsed |= InlineeCompiler->compQmarkUsed; - compUnsafeCastUsed |= InlineeCompiler->compUnsafeCastUsed; + compLongUsed |= InlineeCompiler->compLongUsed; + compFloatingPointUsed |= InlineeCompiler->compFloatingPointUsed; + compLocallocUsed |= InlineeCompiler->compLocallocUsed; + compQmarkUsed |= InlineeCompiler->compQmarkUsed; + compUnsafeCastUsed |= InlineeCompiler->compUnsafeCastUsed; compNeedsGSSecurityCookie |= InlineeCompiler->compNeedsGSSecurityCookie; - compGSReorderStackLayout |= InlineeCompiler->compGSReorderStackLayout; + compGSReorderStackLayout |= InlineeCompiler->compGSReorderStackLayout; + + // Update unmanaged call count + info.compCallUnmanaged += InlineeCompiler->info.compCallUnmanaged; - // Update optMethodFlags +// Update optMethodFlags #ifdef DEBUG unsigned optMethodFlagsBefore = optMethodFlags; @@ -21881,8 +21984,8 @@ _Done: #ifdef DEBUG if (optMethodFlags != optMethodFlagsBefore) { - JITDUMP("INLINER: Updating optMethodFlags -- root:%0x callee:%0x new:%0x\n", - optMethodFlagsBefore, InlineeCompiler->optMethodFlags, optMethodFlags); + JITDUMP("INLINER: Updating optMethodFlags -- root:%0x callee:%0x new:%0x\n", optMethodFlagsBefore, + InlineeCompiler->optMethodFlags, optMethodFlags); } #endif @@ -21908,24 +22011,41 @@ _Done: // Detach the GT_CALL node from the original statement by hanging a "nothing" node under it, // so that fgMorphStmts can remove the statement once we return from here. // - iciStmt->gtStmt.gtStmtExpr = gtNewNothingNode(); + iciStmt->gtStmtExpr = gtNewNothingNode(); } -// Prepend the statements that are needed before the inlined call. -// Return the last statement that is prepended. +//------------------------------------------------------------------------ +// fgInlinePrependStatements: prepend statements needed to match up +// caller and inlined callee +// +// Arguments: +// inlineInfo -- info for the inline +// +// Return Value: +// The last statement that was added, or the original call if no +// statements were added. +// +// Notes: +// Statements prepended may include the following: +// * This pointer null check +// * Class initialization +// * Zeroing of must-init locals in the callee +// * Passing of call arguments via temps +// +// Newly added statements are placed just after the original call +// and are are given the same inline context as the call any calls +// added here will appear to have been part of the immediate caller. -GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) +GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) { - BasicBlock* block = inlineInfo->iciBlock; + BasicBlock* block = inlineInfo->iciBlock; + GenTreeStmt* callStmt = inlineInfo->iciStmt; + IL_OFFSETX callILOffset = callStmt->gtStmtILoffsx; + GenTreeStmt* postStmt = callStmt->gtNextStmt; + GenTreePtr afterStmt = callStmt; // afterStmt is the place where the new statements should be inserted after. + GenTreePtr newStmt = nullptr; + GenTreePtr call = inlineInfo->iciCall; - GenTreePtr callStmt = inlineInfo->iciStmt; - noway_assert(callStmt->gtOper == GT_STMT); - IL_OFFSETX callILOffset = callStmt->gtStmt.gtStmtILoffsx; - - GenTreePtr afterStmt = callStmt; // afterStmt is the place where the new statements should be inserted after. - GenTreePtr newStmt; - - GenTreePtr call = inlineInfo->iciCall; noway_assert(call->gtOper == GT_CALL); #ifdef DEBUG @@ -21939,12 +22059,13 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) // Prepend statements for any initialization / side effects - InlArgInfo* inlArgInfo = inlineInfo->inlArgInfo; - InlLclVarInfo* lclVarInfo = inlineInfo->lclVarInfo; + InlArgInfo* inlArgInfo = inlineInfo->inlArgInfo; + InlLclVarInfo* lclVarInfo = inlineInfo->lclVarInfo; GenTreePtr tree; - // Create the null check statement (but not appending it to the statement list yet) for the 'this' pointer if necessary. + // Create the null check statement (but not appending it to the statement list yet) for the 'this' pointer if + // necessary. // The NULL check should be done after "argument setup statements". // The only reason we move it here is for calling "impInlineFetchArg(0,..." to reserve a temp // for the "this" pointer. @@ -21956,8 +22077,7 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) if (call->gtFlags & GTF_CALL_NULLCHECK && !inlineInfo->thisDereferencedFirst) { // Call impInlineFetchArg to "reserve" a temp for the "this" pointer. - nullcheck = gtNewOperNode(GT_IND, TYP_INT, - impInlineFetchArg(0, inlArgInfo, lclVarInfo)); + nullcheck = gtNewOperNode(GT_IND, TYP_INT, impInlineFetchArg(0, inlArgInfo, lclVarInfo)); nullcheck->gtFlags |= GTF_EXCEPT; // The NULL-check statement will be inserted to the statement list after those statements @@ -21995,10 +22115,8 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) GenTreePtr argSingleUseNode = inlArgInfo[argNum].argBashTmpNode; - if (argSingleUseNode && - !(argSingleUseNode->gtFlags & GTF_VAR_CLONED) && - !inlArgInfo[argNum].argHasLdargaOp && - !inlArgInfo[argNum].argHasStargOp) + if (argSingleUseNode && !(argSingleUseNode->gtFlags & GTF_VAR_CLONED) && + !inlArgInfo[argNum].argHasLdargaOp && !inlArgInfo[argNum].argHasStargOp) { // Change the temp in-place to the actual argument. // We currently do not support this for struct arguments, so it must not be a GT_OBJ. @@ -22019,15 +22137,12 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) noway_assert(structHnd != NO_CLASS_HANDLE); } - // Unsafe value cls check is not needed for argTmpNum here since in-linee compiler instance would have - // iterated over these and marked them accordingly. - impAssignTempGen(inlArgInfo[argNum].argTmpNum, - inlArgInfo[argNum].argNode, - structHnd, - (unsigned)CHECK_SPILL_NONE, - & afterStmt, - callILOffset, - block); + // Unsafe value cls check is not needed for + // argTmpNum here since in-linee compiler instance + // would have iterated over these and marked them + // accordingly. + impAssignTempGen(inlArgInfo[argNum].argTmpNum, inlArgInfo[argNum].argNode, structHnd, + (unsigned)CHECK_SPILL_NONE, &afterStmt, callILOffset, block); #ifdef DEBUG if (verbose) @@ -22035,7 +22150,6 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) gtDispTree(afterStmt); } #endif // DEBUG - } } else if (inlArgInfo[argNum].argIsByRefToStructLocal) @@ -22046,19 +22160,18 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) { /* The argument is either not used or a const or lcl var */ - noway_assert(!inlArgInfo[argNum].argIsUsed || - inlArgInfo[argNum].argIsInvariant || - inlArgInfo[argNum].argIsLclVar ); + noway_assert(!inlArgInfo[argNum].argIsUsed || inlArgInfo[argNum].argIsInvariant || + inlArgInfo[argNum].argIsLclVar); /* Make sure we didnt change argNode's along the way, or else subsequent uses of the arg would have worked with the bashed value */ if (inlArgInfo[argNum].argIsInvariant) { - assert(inlArgInfo[argNum].argNode->OperIsConst() || - inlArgInfo[argNum].argNode->gtOper == GT_ADDR); + assert(inlArgInfo[argNum].argNode->OperIsConst() || inlArgInfo[argNum].argNode->gtOper == GT_ADDR); } noway_assert((inlArgInfo[argNum].argIsLclVar == 0) == - (inlArgInfo[argNum].argNode->gtOper != GT_LCL_VAR || (inlArgInfo[argNum].argNode->gtFlags & GTF_GLOB_REF))); + (inlArgInfo[argNum].argNode->gtOper != GT_LCL_VAR || + (inlArgInfo[argNum].argNode->gtFlags & GTF_GLOB_REF))); /* If the argument has side effects, append it */ @@ -22086,7 +22199,6 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) gtDispTree(afterStmt); } #endif // DEBUG - } } } @@ -22101,7 +22213,7 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) if (inlineInfo->inlineCandidateInfo->initClassResult & CORINFO_INITCLASS_USE_HELPER) { CORINFO_CONTEXT_HANDLE exactContext = inlineInfo->inlineCandidateInfo->exactContextHnd; - CORINFO_CLASS_HANDLE exactClass; + CORINFO_CLASS_HANDLE exactClass; if (((SIZE_T)exactContext & CORINFO_CONTEXTFLAGS_MASK) == CORINFO_CONTEXTFLAGS_CLASS) { @@ -22109,18 +22221,19 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) } else { - exactClass = info.compCompHnd->getMethodClass(CORINFO_METHOD_HANDLE((SIZE_T)exactContext & ~CORINFO_CONTEXTFLAGS_MASK)); + exactClass = info.compCompHnd->getMethodClass( + CORINFO_METHOD_HANDLE((SIZE_T)exactContext & ~CORINFO_CONTEXTFLAGS_MASK)); } - tree = fgGetSharedCCtor(exactClass); - newStmt = gtNewStmt(tree, callILOffset); + tree = fgGetSharedCCtor(exactClass); + newStmt = gtNewStmt(tree, callILOffset); afterStmt = fgInsertStmtAfter(block, afterStmt, newStmt); } // Insert the nullcheck statement now. if (nullcheck) { - newStmt = gtNewStmt(nullcheck, callILOffset); + newStmt = gtNewStmt(nullcheck, callILOffset); afterStmt = fgInsertStmtAfter(block, afterStmt, newStmt); } @@ -22133,8 +22246,7 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) unsigned lclCnt = InlineeMethodInfo->locals.numArgs; // Does callee contain any zero-init local? - if ((lclCnt != 0) && - (InlineeMethodInfo->options & CORINFO_OPT_INIT_LOCALS) != 0) + if ((lclCnt != 0) && (InlineeMethodInfo->options & CORINFO_OPT_INIT_LOCALS) != 0) { #ifdef DEBUG @@ -22146,7 +22258,7 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) for (unsigned lclNum = 0; lclNum < lclCnt; lclNum++) { - unsigned tmpNum = inlineInfo->lclTmpNum[lclNum]; + unsigned tmpNum = inlineInfo->lclTmpNum[lclNum]; // Is the local used at all? if (tmpNum != BAD_VAR_NUM) @@ -22158,25 +22270,21 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) { // Unsafe value cls check is not needed here since in-linee compiler instance would have // iterated over locals and marked accordingly. - impAssignTempGen(tmpNum, - gtNewZeroConNode(genActualType(lclTyp)), - NO_CLASS_HANDLE, - (unsigned)CHECK_SPILL_NONE, - & afterStmt, - callILOffset, - block); + impAssignTempGen(tmpNum, gtNewZeroConNode(genActualType(lclTyp)), NO_CLASS_HANDLE, + (unsigned)CHECK_SPILL_NONE, &afterStmt, callILOffset, block); } else { - CORINFO_CLASS_HANDLE structType = lclVarInfo[lclNum + inlineInfo->argCnt].lclVerTypeInfo.GetClassHandle(); + CORINFO_CLASS_HANDLE structType = + lclVarInfo[lclNum + inlineInfo->argCnt].lclVerTypeInfo.GetClassHandle(); - tree = gtNewBlkOpNode(gtNewLclvNode(tmpNum, lclTyp), // Dest - gtNewIconNode(0), // Value + tree = gtNewBlkOpNode(gtNewLclvNode(tmpNum, lclTyp), // Dest + gtNewIconNode(0), // Value info.compCompHnd->getClassSize(structType), // Size - false, // isVolatile - false); // not copyBlock + false, // isVolatile + false); // not copyBlock - newStmt = gtNewStmt(tree, callILOffset); + newStmt = gtNewStmt(tree, callILOffset); afterStmt = fgInsertStmtAfter(block, afterStmt, newStmt); } @@ -22190,14 +22298,102 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) } } + // Update any newly added statements with the appropriate context. + InlineContext* context = callStmt->gtInlineContext; + assert(context != nullptr); + for (GenTreeStmt* addedStmt = callStmt->gtNextStmt; addedStmt != postStmt; addedStmt = addedStmt->gtNextStmt) + { + assert(addedStmt->gtInlineContext == nullptr); + addedStmt->gtInlineContext = context; + } + return afterStmt; } +//------------------------------------------------------------------------ +// fgInlineAppendStatements: Append statements that are needed +// after the inlined call. +// +// Arguments: +// inlineInfo - information about the inline +// block - basic block for the new statements +// stmtAfter - (optional) insertion point for mid-block cases + +void Compiler::fgInlineAppendStatements(InlineInfo* inlineInfo, BasicBlock* block, GenTreePtr stmtAfter) +{ + // Null out any inline pinned locals + if (!inlineInfo->hasPinnedLocals) + { + // No pins, nothing to do + return; + } + + JITDUMP("Unpin inlinee locals:\n"); + + GenTreePtr callStmt = inlineInfo->iciStmt; + IL_OFFSETX callILOffset = callStmt->gtStmt.gtStmtILoffsx; + CORINFO_METHOD_INFO* InlineeMethodInfo = InlineeCompiler->info.compMethodInfo; + unsigned lclCnt = InlineeMethodInfo->locals.numArgs; + InlLclVarInfo* lclVarInfo = inlineInfo->lclVarInfo; + + noway_assert(callStmt->gtOper == GT_STMT); + + for (unsigned lclNum = 0; lclNum < lclCnt; lclNum++) + { + unsigned tmpNum = inlineInfo->lclTmpNum[lclNum]; + + // Is the local used at all? + if (tmpNum == BAD_VAR_NUM) + { + // Nope, nothing to unpin. + continue; + } + + // Is the local pinned? + if (!lvaTable[tmpNum].lvPinned) + { + // Nope, nothing to unpin. + continue; + } + + // Does the local we're about to unpin appear in the return + // expression? If so we somehow messed up and didn't properly + // spill the return value. See impInlineFetchLocal. + GenTreePtr retExpr = inlineInfo->retExpr; + if (retExpr != nullptr) + { + const bool interferesWithReturn = gtHasRef(inlineInfo->retExpr, tmpNum, false); + noway_assert(!interferesWithReturn); + } + + // Emit the unpin, by assigning null to the local. + var_types lclTyp = (var_types)lvaTable[tmpNum].lvType; + noway_assert(lclTyp == lclVarInfo[lclNum + inlineInfo->argCnt].lclTypeInfo); + noway_assert(!varTypeIsStruct(lclTyp)); + GenTreePtr unpinExpr = gtNewTempAssign(tmpNum, gtNewZeroConNode(genActualType(lclTyp))); + GenTreePtr unpinStmt = gtNewStmt(unpinExpr, callILOffset); + + if (stmtAfter == nullptr) + { + stmtAfter = fgInsertStmtAtBeg(block, unpinStmt); + } + else + { + stmtAfter = fgInsertStmtAfter(block, stmtAfter, unpinStmt); + } + +#ifdef DEBUG + if (verbose) + { + gtDispTree(unpinStmt); + } +#endif // DEBUG + } +} /*****************************************************************************/ /*static*/ -Compiler::fgWalkResult Compiler::fgChkThrowCB(GenTreePtr* pTree, - fgWalkData* data) +Compiler::fgWalkResult Compiler::fgChkThrowCB(GenTreePtr* pTree, fgWalkData* data) { GenTreePtr tree = *pTree; @@ -22210,28 +22406,30 @@ Compiler::fgWalkResult Compiler::fgChkThrowCB(GenTreePtr* pTree, switch (tree->gtOper) { - case GT_MUL: - case GT_ADD: - case GT_SUB: - case GT_ASG_ADD: - case GT_ASG_SUB: - case GT_CAST: - if (tree->gtOverflow()) { - return Compiler::WALK_ABORT; -} - break; + case GT_MUL: + case GT_ADD: + case GT_SUB: + case GT_ASG_ADD: + case GT_ASG_SUB: + case GT_CAST: + if (tree->gtOverflow()) + { + return Compiler::WALK_ABORT; + } + break; - case GT_INDEX: - if (tree->gtFlags & GTF_INX_RNGCHK) { - return Compiler::WALK_ABORT; -} - break; + case GT_INDEX: + if (tree->gtFlags & GTF_INX_RNGCHK) + { + return Compiler::WALK_ABORT; + } + break; - case GT_ARR_BOUNDS_CHECK: - return Compiler::WALK_ABORT; + case GT_ARR_BOUNDS_CHECK: + return Compiler::WALK_ABORT; - default: - break; + default: + break; } return Compiler::WALK_CONTINUE; @@ -22239,33 +22437,32 @@ Compiler::fgWalkResult Compiler::fgChkThrowCB(GenTreePtr* pTree, /*****************************************************************************/ /*static*/ -Compiler::fgWalkResult Compiler::fgChkLocAllocCB(GenTreePtr* pTree, - fgWalkData* data) +Compiler::fgWalkResult Compiler::fgChkLocAllocCB(GenTreePtr* pTree, fgWalkData* data) { GenTreePtr tree = *pTree; - if (tree->gtOper == GT_LCLHEAP) { + if (tree->gtOper == GT_LCLHEAP) + { return Compiler::WALK_ABORT; -} + } return Compiler::WALK_CONTINUE; } /*****************************************************************************/ /*static*/ -Compiler::fgWalkResult Compiler::fgChkQmarkCB(GenTreePtr* pTree, - fgWalkData* data) +Compiler::fgWalkResult Compiler::fgChkQmarkCB(GenTreePtr* pTree, fgWalkData* data) { GenTreePtr tree = *pTree; - if (tree->gtOper == GT_QMARK) { + if (tree->gtOper == GT_QMARK) + { return Compiler::WALK_ABORT; -} + } return Compiler::WALK_CONTINUE; } - void Compiler::fgLclFldAssign(unsigned lclNum) { assert(varTypeIsStruct(lvaTable[lclNum].lvType)); diff --git a/src/jit/gcencode.cpp b/src/jit/gcencode.cpp index f20183b25a..128fc4addb 100644 --- a/src/jit/gcencode.cpp +++ b/src/jit/gcencode.cpp @@ -23,6 +23,89 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "gcinfotypes.h" +ReturnKind GCTypeToReturnKind(CorInfoGCType gcType) +{ + switch (gcType) + { + case TYPE_GC_NONE: + return RT_Scalar; + case TYPE_GC_REF: + return RT_Object; + case TYPE_GC_BYREF: + return RT_ByRef; + default: + _ASSERTE(!"TYP_GC_OTHER is unexpected"); + return RT_Illegal; + } +} + +ReturnKind GCInfo::getReturnKind() +{ + switch (compiler->info.compRetType) + { + case TYP_REF: + case TYP_ARRAY: + return RT_Object; + case TYP_BYREF: + return RT_ByRef; + case TYP_STRUCT: + { + CORINFO_CLASS_HANDLE structType = compiler->info.compMethodInfo->args.retTypeClass; + var_types retType = compiler->getReturnTypeForStruct(structType); + + switch (retType) + { + case TYP_ARRAY: + _ASSERTE(false && "TYP_ARRAY unexpected from getReturnTypeForStruct()"); + // fall through + case TYP_REF: + return RT_Object; + + case TYP_BYREF: + return RT_ByRef; + + case TYP_STRUCT: + if (compiler->IsHfa(structType)) + { +#ifdef _TARGET_X86_ + _ASSERTE(false && "HFAs not expected for X86"); +#endif // _TARGET_X86_ + + return RT_Scalar; + } + else + { + // Multi-reg return + BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE}; + compiler->info.compCompHnd->getClassGClayout(structType, gcPtrs); + + ReturnKind first = GCTypeToReturnKind((CorInfoGCType)gcPtrs[0]); + ReturnKind second = GCTypeToReturnKind((CorInfoGCType)gcPtrs[1]); + + return GetStructReturnKind(first, second); + } + +#ifdef _TARGET_X86_ + case TYP_FLOAT: + case TYP_DOUBLE: + return RT_Float; +#endif // _TARGET_X86_ + default: + return RT_Scalar; + } + } + +#ifdef _TARGET_X86_ + case TYP_FLOAT: + case TYP_DOUBLE: + return RT_Float; +#endif // _TARGET_X86_ + + default: + return RT_Scalar; + } +} + #ifdef JIT32_GCENCODER #include "emit.h" @@ -104,18 +187,21 @@ static void regenLog(unsigned encoding, InfoHdr* header, InfoHdr* state) fprintf(logFile, "InfoHdr( %2d, %2d, %1d, %1d, %1d," " %1d, %1d, %1d, %1d, %1d," - " %1d, %1d, %1d, %1d, %1d," - " %1d, %2d, %2d, %2d, %2d," - " %2d, %2d), \n", + " %1d, %1d, %1d, %1d, %1d, %1d," + " %1d, %1d, %1d," + " %1d, %2d, %2d," + " %2d, %2d, %2d, %2d, %2d, %2d), \n", state->prologSize, state->epilogSize, state->epilogCount, state->epilogAtEnd, state->ediSaved, state->esiSaved, state->ebxSaved, state->ebpSaved, state->ebpFrame, state->interruptible, state->doubleAlign, state->security, state->handlers, state->localloc, state->editNcontinue, state->varargs, - state->profCallbacks, state->argCount, state->frameSize, + state->profCallbacks, state->genericsContext, state->genericsContextIsMethodDesc, state->returnKind, + state->argCount, state->frameSize, (state->untrackedCnt <= SET_UNTRACKED_MAX) ? state->untrackedCnt : HAS_UNTRACKED, (state->varPtrTableSize == 0) ? 0 : HAS_VARPTR, (state->gsCookieOffset == INVALID_GS_COOKIE_OFFSET) ? 0 : HAS_GS_COOKIE_OFFSET, (state->syncStartOffset == INVALID_SYNC_OFFSET) ? 0 : HAS_SYNC_OFFSET, - (state->syncStartOffset == INVALID_SYNC_OFFSET) ? 0 : HAS_SYNC_OFFSET); + (state->syncStartOffset == INVALID_SYNC_OFFSET) ? 0 : HAS_SYNC_OFFSET, + (state->revPInvokeOffset == INVALID_REV_PINVOKE_OFFSET) ? 0 : HAS_REV_PINVOKE_FRAME_OFFSET); fflush(logFile); @@ -265,9 +351,11 @@ static int bigEncoding4(unsigned cur, unsigned tgt, unsigned max) return cnt; } -BYTE FASTCALL encodeHeaderNext(const InfoHdr& header, InfoHdr* state) +BYTE FASTCALL encodeHeaderNext(const InfoHdr& header, InfoHdr* state, BYTE& codeSet) { BYTE encoding = 0xff; + codeSet = 1; // codeSet is 1 or 2, depending on whether the returned encoding + // corresponds to InfoHdrAdjust, or InfoHdrAdjust2 enumerations. if (state->argCount != header.argCount) { @@ -547,6 +635,15 @@ BYTE FASTCALL encodeHeaderNext(const InfoHdr& header, InfoHdr* state) goto DO_RETURN; } + if (GCInfoEncodesReturnKind() && (state->returnKind != header.returnKind)) + { + state->returnKind = header.returnKind; + codeSet = 2; // Two byte encoding + encoding = header.returnKind; + _ASSERTE(encoding < SET_RET_KIND_MAX); + goto DO_RETURN; + } + if (state->gsCookieOffset != header.gsCookieOffset) { assert(state->gsCookieOffset == INVALID_GS_COOKIE_OFFSET || state->gsCookieOffset == HAS_GS_COOKIE_OFFSET); @@ -587,10 +684,31 @@ BYTE FASTCALL encodeHeaderNext(const InfoHdr& header, InfoHdr* state) } } + if (GCInfoEncodesRevPInvokeFrame() && (state->revPInvokeOffset != header.revPInvokeOffset)) + { + assert(state->revPInvokeOffset == INVALID_REV_PINVOKE_OFFSET || + state->revPInvokeOffset == HAS_REV_PINVOKE_FRAME_OFFSET); + + if (state->revPInvokeOffset == INVALID_REV_PINVOKE_OFFSET) + { + // header.revPInvokeOffset is non-zero. + state->revPInvokeOffset = HAS_REV_PINVOKE_FRAME_OFFSET; + encoding = FLIP_REV_PINVOKE_FRAME; + goto DO_RETURN; + } + else if (header.revPInvokeOffset == INVALID_REV_PINVOKE_OFFSET) + { + state->revPInvokeOffset = INVALID_REV_PINVOKE_OFFSET; + encoding = FLIP_REV_PINVOKE_FRAME; + goto DO_RETURN; + } + } + DO_RETURN: - assert(encoding < 0x80); + _ASSERTE(encoding < MORE_BYTES_TO_FOLLOW); if (!state->isHeaderMatch(header)) - encoding |= 0x80; + encoding |= MORE_BYTES_TO_FOLLOW; + return encoding; } @@ -806,6 +924,14 @@ static int measureDistance(const InfoHdr& header, const InfoHdrSmall* p, int clo return distance; } + if (p->returnKind != header.returnKind) + { + // Setting the ReturnKind requires two bytes of encoding. + distance += 2; + if (distance >= closeness) + return distance; + } + if (header.gsCookieOffset != INVALID_GS_COOKIE_OFFSET) { distance += 1; @@ -820,6 +946,13 @@ static int measureDistance(const InfoHdr& header, const InfoHdrSmall* p, int clo return distance; } + if (header.revPInvokeOffset != INVALID_REV_PINVOKE_OFFSET) + { + distance += 1; + if (distance >= closeness) + return distance; + } + return distance; } @@ -1164,6 +1297,16 @@ size_t GCInfo::gcInfoBlockHdrSave( header->genericsContext = compiler->lvaReportParamTypeArg(); header->genericsContextIsMethodDesc = header->genericsContext && (compiler->info.compMethodInfo->options & (CORINFO_GENERICS_CTXT_FROM_METHODDESC)); + + if (GCInfoEncodesReturnKind()) + { + ReturnKind returnKind = getReturnKind(); + _ASSERTE(IsValidReturnKind(returnKind) && "Return Kind must be valid"); + _ASSERTE(!IsStructReturnKind(returnKind) && "Struct Return Kinds Unexpected for JIT32"); + _ASSERTE(((int)returnKind < (int)SET_RET_KIND_MAX) && "ReturnKind has no legal encoding"); + header->returnKind = returnKind; + } + header->gsCookieOffset = INVALID_GS_COOKIE_OFFSET; if (compiler->getNeedsGSSecurityCookie()) { @@ -1190,6 +1333,8 @@ size_t GCInfo::gcInfoBlockHdrSave( assert(header->epilogCount <= 1); } + header->revPInvokeOffset = INVALID_REV_PINVOKE_OFFSET; + assert((compiler->compArgSize & 0x3) == 0); size_t argCount = @@ -1224,12 +1369,21 @@ size_t GCInfo::gcInfoBlockHdrSave( *dest++ = headerEncoding; BYTE encoding = headerEncoding; - while (encoding & 0x80) + BYTE codeSet = 1; + while (encoding & MORE_BYTES_TO_FOLLOW) { - encoding = encodeHeaderNext(*header, &state); + encoding = encodeHeaderNext(*header, &state, codeSet); + #if REGEN_SHORTCUTS regenLog(headerEncoding, header, &state); #endif + _ASSERTE(codeSet == 1 || codeSet == 2 && "Encoding must correspond to InfoHdrAdjust or InfoHdrAdjust2"); + if (codeSet == 2) + { + *dest++ = NEXT_OPCODE | MORE_BYTES_TO_FOLLOW; + ++size; + } + *dest++ = encoding; ++size; } @@ -1771,12 +1925,12 @@ size_t GCInfo::gcMakeRegPtrTable(BYTE* dest, int mask, const InfoHdr& header, un } else { - /* Stack-passed arguments which are not enregistered - * are always reported in this "untracked stack - * pointers" section of the GC info even if lvTracked==true - */ +/* Stack-passed arguments which are not enregistered + * are always reported in this "untracked stack + * pointers" section of the GC info even if lvTracked==true + */ - /* Has this argument been enregistered? */ +/* Has this argument been enregistered? */ #ifndef LEGACY_BACKEND if (!varDsc->lvOnFrame) #else // LEGACY_BACKEND @@ -3277,7 +3431,7 @@ void GCInfo::gcFindPtrsInFrame(const void* infoBlock, const void* codeBlock, uns GCDump gcDump(GCINFO_VERSION); gcDump.gcPrintf = gcDump_logf; // use my printf (which logs to VM) - gcDump.DumpPtrsInFrame((const BYTE*)infoBlock, (const BYTE*)codeBlock, offs, verifyGCTables); + gcDump.DumpPtrsInFrame((PTR_CBYTE)infoBlock, (const BYTE*)codeBlock, offs, verifyGCTables); } #endif // DUMP_GC_TABLES @@ -3504,23 +3658,6 @@ public: #endif // DEBUG -ReturnKind GCTypeToReturnKind(CorInfoGCType gcType) -{ - - switch (gcType) - { - case TYPE_GC_NONE: - return RT_Scalar; - case TYPE_GC_REF: - return RT_Object; - case TYPE_GC_BYREF: - return RT_ByRef; - default: - _ASSERTE(!"TYP_GC_OTHER is unexpected"); - return RT_Illegal; - } -} - void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSize, unsigned prologSize) { #ifdef DEBUG @@ -3536,65 +3673,7 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz gcInfoEncoderWithLog->SetCodeLength(methodSize); - ReturnKind returnKind = RT_Illegal; - - switch (compiler->info.compRetType) - { - case TYP_REF: - case TYP_ARRAY: - returnKind = RT_Object; - break; - case TYP_BYREF: - returnKind = RT_ByRef; - break; - case TYP_STRUCT: - { - CORINFO_CLASS_HANDLE structType = compiler->info.compMethodInfo->args.retTypeClass; - var_types retType = compiler->getReturnTypeForStruct(structType); - - switch (retType) - { - case TYP_ARRAY: - _ASSERTE(false && "TYP_ARRAY unexpected from getReturnTypeForStruct()"); - - case TYP_REF: - returnKind = RT_Object; - break; - - case TYP_BYREF: - returnKind = RT_ByRef; - break; - - case TYP_STRUCT: - if (compiler->IsHfa(structType)) - { - returnKind = RT_Scalar; - } - else - { - // Multi-reg return - BYTE gcPtrs[2] = { TYPE_GC_NONE, TYPE_GC_NONE }; - compiler->info.compCompHnd->getClassGClayout(structType, gcPtrs); - - ReturnKind first = GCTypeToReturnKind((CorInfoGCType)gcPtrs[0]); - ReturnKind second = GCTypeToReturnKind((CorInfoGCType)gcPtrs[1]); - - returnKind = GetStructReturnKind(first, second); - } - break; - - default: - returnKind = RT_Scalar; - break; - } - break; - } - default: - returnKind = RT_Scalar; - } - - _ASSERTE(returnKind != RT_Illegal); - gcInfoEncoderWithLog->SetReturnKind(returnKind); + gcInfoEncoderWithLog->SetReturnKind(getReturnKind()); if (compiler->isFramePointerUsed()) { @@ -3682,10 +3761,8 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz } #if FEATURE_EH_FUNCLETS - if (compiler->ehNeedsPSPSym()) + if (compiler->lvaPSPSym != BAD_VAR_NUM) { - assert(compiler->lvaPSPSym != BAD_VAR_NUM); - #ifdef _TARGET_AMD64_ // The PSPSym is relative to InitialSP on X64 and CallerSP on other platforms. gcInfoEncoderWithLog->SetPSPSymStackSlot(compiler->lvaGetInitialSPRelativeOffset(compiler->lvaPSPSym)); diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 67474e11ec..4a6cc740c6 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -21,7 +21,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX /*****************************************************************************/ const unsigned short GenTree::gtOperKindTable[] = { -#define GTNODE(en, sn, cm, ok) ok + GTK_COMMUTE *cm, +#define GTNODE(en, sn, st, cm, ok) ok + GTK_COMMUTE *cm, #include "gtlist.h" }; @@ -209,7 +209,7 @@ static void printIndent(IndentStack* indentStack) } static const char* nodeNames[] = { -#define GTNODE(en, sn, cm, ok) sn, +#define GTNODE(en, sn, st, cm, ok) sn, #include "gtlist.h" }; @@ -220,8 +220,12 @@ const char* GenTree::NodeName(genTreeOps op) return nodeNames[op]; } +#endif + +#if defined(DEBUG) || NODEBASH_STATS || MEASURE_NODE_SIZE || COUNT_AST_OPERS + static const char* opNames[] = { -#define GTNODE(en, sn, cm, ok) #en, +#define GTNODE(en, sn, st, cm, ok) #en, #include "gtlist.h" }; @@ -234,12 +238,27 @@ const char* GenTree::OpName(genTreeOps op) #endif +#if MEASURE_NODE_SIZE && SMALL_TREE_NODES + +static const char* opStructNames[] = { +#define GTNODE(en, sn, st, cm, ok) #st, +#include "gtlist.h" +}; + +const char* GenTree::OpStructName(genTreeOps op) +{ + assert((unsigned)op < sizeof(opStructNames) / sizeof(opStructNames[0])); + + return opStructNames[op]; +} + +#endif + /***************************************************************************** * * When 'SMALL_TREE_NODES' is enabled, we allocate tree nodes in 2 different - * sizes: 'GTF_DEBUG_NODE_SMALL' for most nodes and 'GTF_DEBUG_NODE_LARGE' for - * the few nodes (such as calls and statement list nodes) that have more fields - * and take up a lot more space. + * sizes: 'TREE_NODE_SZ_SMALL' for most nodes and 'TREE_NODE_SZ_LARGE' for the + * few nodes (such as calls) that have more fields and take up a lot more space. */ #if SMALL_TREE_NODES @@ -248,6 +267,19 @@ const char* GenTree::OpName(genTreeOps op) /* static */ unsigned char GenTree::s_gtNodeSizes[GT_COUNT + 1]; +#if NODEBASH_STATS || MEASURE_NODE_SIZE || COUNT_AST_OPERS + +unsigned char GenTree::s_gtTrueSizes[GT_COUNT + 1]{ +#define GTNODE(en, sn, st, cm, ok) sizeof(st), +#include "gtlist.h" +}; + +#endif // NODEBASH_STATS || MEASURE_NODE_SIZE || COUNT_AST_OPERS + +#if COUNT_AST_OPERS +LONG GenTree::s_gtNodeCounts[GT_COUNT + 1] = {0}; +#endif // COUNT_AST_OPERS + /* static */ void GenTree::InitNodeSize() { @@ -265,12 +297,13 @@ void GenTree::InitNodeSize() // Now set all of the appropriate entries to 'large' CLANG_FORMAT_COMMENT_ANCHOR; +// clang-format off #if defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // On ARM32, ARM64 and System V for struct returning // there is code that does GT_ASG-tree.CopyObj call. // CopyObj is a large node and the GT_ASG is small, which triggers an exception. - GenTree::s_gtNodeSizes[GT_ASG] = TREE_NODE_SZ_LARGE; - GenTree::s_gtNodeSizes[GT_RETURN] = TREE_NODE_SZ_LARGE; + GenTree::s_gtNodeSizes[GT_ASG] = TREE_NODE_SZ_LARGE; + GenTree::s_gtNodeSizes[GT_RETURN] = TREE_NODE_SZ_LARGE; #endif // defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) GenTree::s_gtNodeSizes[GT_CALL] = TREE_NODE_SZ_LARGE; @@ -282,30 +315,32 @@ void GenTree::InitNodeSize() #ifdef FEATURE_SIMD GenTree::s_gtNodeSizes[GT_SIMD_CHK] = TREE_NODE_SZ_LARGE; #endif // FEATURE_SIMD - GenTree::s_gtNodeSizes[GT_ARR_ELEM] = TREE_NODE_SZ_LARGE; - GenTree::s_gtNodeSizes[GT_ARR_INDEX] = TREE_NODE_SZ_LARGE; - GenTree::s_gtNodeSizes[GT_ARR_OFFSET] = TREE_NODE_SZ_LARGE; - GenTree::s_gtNodeSizes[GT_RET_EXPR] = TREE_NODE_SZ_LARGE; - GenTree::s_gtNodeSizes[GT_OBJ] = TREE_NODE_SZ_LARGE; - GenTree::s_gtNodeSizes[GT_FIELD] = TREE_NODE_SZ_LARGE; - GenTree::s_gtNodeSizes[GT_STMT] = TREE_NODE_SZ_LARGE; - GenTree::s_gtNodeSizes[GT_CMPXCHG] = TREE_NODE_SZ_LARGE; - GenTree::s_gtNodeSizes[GT_QMARK] = TREE_NODE_SZ_LARGE; - GenTree::s_gtNodeSizes[GT_LEA] = TREE_NODE_SZ_LARGE; - GenTree::s_gtNodeSizes[GT_STORE_OBJ] = TREE_NODE_SZ_LARGE; - GenTree::s_gtNodeSizes[GT_DYN_BLK] = TREE_NODE_SZ_LARGE; - GenTree::s_gtNodeSizes[GT_STORE_DYN_BLK] = TREE_NODE_SZ_LARGE; - GenTree::s_gtNodeSizes[GT_INTRINSIC] = TREE_NODE_SZ_LARGE; - GenTree::s_gtNodeSizes[GT_ALLOCOBJ] = TREE_NODE_SZ_LARGE; + GenTree::s_gtNodeSizes[GT_ARR_ELEM] = TREE_NODE_SZ_LARGE; + GenTree::s_gtNodeSizes[GT_ARR_INDEX] = TREE_NODE_SZ_LARGE; + GenTree::s_gtNodeSizes[GT_ARR_OFFSET] = TREE_NODE_SZ_LARGE; + GenTree::s_gtNodeSizes[GT_RET_EXPR] = TREE_NODE_SZ_LARGE; + GenTree::s_gtNodeSizes[GT_OBJ] = TREE_NODE_SZ_LARGE; + GenTree::s_gtNodeSizes[GT_FIELD] = TREE_NODE_SZ_LARGE; + GenTree::s_gtNodeSizes[GT_STMT] = TREE_NODE_SZ_LARGE; + GenTree::s_gtNodeSizes[GT_CMPXCHG] = TREE_NODE_SZ_LARGE; + GenTree::s_gtNodeSizes[GT_QMARK] = TREE_NODE_SZ_LARGE; + GenTree::s_gtNodeSizes[GT_LEA] = TREE_NODE_SZ_LARGE; + GenTree::s_gtNodeSizes[GT_STORE_OBJ] = TREE_NODE_SZ_LARGE; + GenTree::s_gtNodeSizes[GT_DYN_BLK] = TREE_NODE_SZ_LARGE; + GenTree::s_gtNodeSizes[GT_STORE_DYN_BLK] = TREE_NODE_SZ_LARGE; + GenTree::s_gtNodeSizes[GT_INTRINSIC] = TREE_NODE_SZ_LARGE; + GenTree::s_gtNodeSizes[GT_ALLOCOBJ] = TREE_NODE_SZ_LARGE; #if USE_HELPERS_FOR_INT_DIV - GenTree::s_gtNodeSizes[GT_DIV] = TREE_NODE_SZ_LARGE; - GenTree::s_gtNodeSizes[GT_UDIV] = TREE_NODE_SZ_LARGE; - GenTree::s_gtNodeSizes[GT_MOD] = TREE_NODE_SZ_LARGE; - GenTree::s_gtNodeSizes[GT_UMOD] = TREE_NODE_SZ_LARGE; + GenTree::s_gtNodeSizes[GT_DIV] = TREE_NODE_SZ_LARGE; + GenTree::s_gtNodeSizes[GT_UDIV] = TREE_NODE_SZ_LARGE; + GenTree::s_gtNodeSizes[GT_MOD] = TREE_NODE_SZ_LARGE; + GenTree::s_gtNodeSizes[GT_UMOD] = TREE_NODE_SZ_LARGE; #endif -#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING - GenTree::s_gtNodeSizes[GT_PUTARG_STK] = TREE_NODE_SZ_LARGE; -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING +#ifdef FEATURE_PUT_STRUCT_ARG_STK + // TODO-Throughput: This should not need to be a large node. The object info should be + // obtained from the child node. + GenTree::s_gtNodeSizes[GT_PUTARG_STK] = TREE_NODE_SZ_LARGE; +#endif // FEATURE_PUT_STRUCT_ARG_STK assert(GenTree::s_gtNodeSizes[GT_RETURN] == GenTree::s_gtNodeSizes[GT_ASG]); @@ -314,60 +349,65 @@ void GenTree::InitNodeSize() assert(sizeof(GenTreeLclFld) <= GenTree::s_gtNodeSizes[GT_LCL_FLD]); assert(sizeof(GenTreeLclVar) <= GenTree::s_gtNodeSizes[GT_LCL_VAR]); - static_assert_no_msg(sizeof(GenTree) <= TREE_NODE_SZ_SMALL); - static_assert_no_msg(sizeof(GenTreeUnOp) <= TREE_NODE_SZ_SMALL); - static_assert_no_msg(sizeof(GenTreeOp) <= TREE_NODE_SZ_SMALL); - static_assert_no_msg(sizeof(GenTreeVal) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTree) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeUnOp) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeOp) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeVal) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeIntConCommon) <= TREE_NODE_SZ_SMALL); - static_assert_no_msg(sizeof(GenTreePhysReg) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreePhysReg) <= TREE_NODE_SZ_SMALL); #ifndef LEGACY_BACKEND - static_assert_no_msg(sizeof(GenTreeJumpTable) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeJumpTable) <= TREE_NODE_SZ_SMALL); #endif // !LEGACY_BACKEND - static_assert_no_msg(sizeof(GenTreeIntCon) <= TREE_NODE_SZ_SMALL); - static_assert_no_msg(sizeof(GenTreeLngCon) <= TREE_NODE_SZ_SMALL); - static_assert_no_msg(sizeof(GenTreeDblCon) <= TREE_NODE_SZ_SMALL); - static_assert_no_msg(sizeof(GenTreeStrCon) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeIntCon) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeLngCon) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeDblCon) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeStrCon) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeLclVarCommon) <= TREE_NODE_SZ_SMALL); - static_assert_no_msg(sizeof(GenTreeLclVar) <= TREE_NODE_SZ_SMALL); - static_assert_no_msg(sizeof(GenTreeLclFld) <= TREE_NODE_SZ_SMALL); - static_assert_no_msg(sizeof(GenTreeRegVar) <= TREE_NODE_SZ_SMALL); - static_assert_no_msg(sizeof(GenTreeCast) <= TREE_NODE_SZ_LARGE); // *** large node - static_assert_no_msg(sizeof(GenTreeBox) <= TREE_NODE_SZ_LARGE); // *** large node - static_assert_no_msg(sizeof(GenTreeField) <= TREE_NODE_SZ_LARGE); // *** large node - static_assert_no_msg(sizeof(GenTreeArgList) <= TREE_NODE_SZ_SMALL); - static_assert_no_msg(sizeof(GenTreeColon) <= TREE_NODE_SZ_SMALL); - static_assert_no_msg(sizeof(GenTreeCall) <= TREE_NODE_SZ_LARGE); // *** large node - static_assert_no_msg(sizeof(GenTreeCmpXchg) <= TREE_NODE_SZ_LARGE); // *** large node - static_assert_no_msg(sizeof(GenTreeFptrVal) <= TREE_NODE_SZ_LARGE); // *** large node - static_assert_no_msg(sizeof(GenTreeQmark) <= TREE_NODE_SZ_LARGE); // *** large node - static_assert_no_msg(sizeof(GenTreeIntrinsic) <= TREE_NODE_SZ_LARGE); // *** large node - static_assert_no_msg(sizeof(GenTreeIndex) <= TREE_NODE_SZ_LARGE); // *** large node - static_assert_no_msg(sizeof(GenTreeArrLen) <= TREE_NODE_SZ_LARGE); // *** large node - static_assert_no_msg(sizeof(GenTreeBoundsChk) <= TREE_NODE_SZ_LARGE); // *** large node - static_assert_no_msg(sizeof(GenTreeArrElem) <= TREE_NODE_SZ_LARGE); // *** large node - static_assert_no_msg(sizeof(GenTreeArrIndex) <= TREE_NODE_SZ_LARGE); // *** large node - static_assert_no_msg(sizeof(GenTreeArrOffs) <= TREE_NODE_SZ_LARGE); // *** large node - static_assert_no_msg(sizeof(GenTreeIndir) <= TREE_NODE_SZ_SMALL); - static_assert_no_msg(sizeof(GenTreeStoreInd) <= TREE_NODE_SZ_SMALL); - static_assert_no_msg(sizeof(GenTreeAddrMode) <= TREE_NODE_SZ_SMALL); - static_assert_no_msg(sizeof(GenTreeObj) <= TREE_NODE_SZ_LARGE); // *** large node - static_assert_no_msg(sizeof(GenTreeBlk) <= TREE_NODE_SZ_SMALL); - static_assert_no_msg(sizeof(GenTreeRetExpr) <= TREE_NODE_SZ_LARGE); // *** large node - static_assert_no_msg(sizeof(GenTreeStmt) <= TREE_NODE_SZ_LARGE); // *** large node - static_assert_no_msg(sizeof(GenTreeClsVar) <= TREE_NODE_SZ_SMALL); - static_assert_no_msg(sizeof(GenTreeArgPlace) <= TREE_NODE_SZ_SMALL); - static_assert_no_msg(sizeof(GenTreeLabel) <= TREE_NODE_SZ_SMALL); - static_assert_no_msg(sizeof(GenTreePhiArg) <= TREE_NODE_SZ_SMALL); - static_assert_no_msg(sizeof(GenTreeAllocObj) <= TREE_NODE_SZ_LARGE); // *** large node -#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING - static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_SMALL); -#else // FEATURE_UNIX_AMD64_STRUCT_PASSING - static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_LARGE); -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + static_assert_no_msg(sizeof(GenTreeLclVar) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeLclFld) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeRegVar) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeJumpCC) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeCast) <= TREE_NODE_SZ_LARGE); // *** large node + static_assert_no_msg(sizeof(GenTreeBox) <= TREE_NODE_SZ_LARGE); // *** large node + static_assert_no_msg(sizeof(GenTreeField) <= TREE_NODE_SZ_LARGE); // *** large node + static_assert_no_msg(sizeof(GenTreeArgList) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeFieldList) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeColon) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeCall) <= TREE_NODE_SZ_LARGE); // *** large node + static_assert_no_msg(sizeof(GenTreeCmpXchg) <= TREE_NODE_SZ_LARGE); // *** large node + static_assert_no_msg(sizeof(GenTreeFptrVal) <= TREE_NODE_SZ_LARGE); // *** large node + static_assert_no_msg(sizeof(GenTreeQmark) <= TREE_NODE_SZ_LARGE); // *** large node + static_assert_no_msg(sizeof(GenTreeIntrinsic) <= TREE_NODE_SZ_LARGE); // *** large node + static_assert_no_msg(sizeof(GenTreeIndex) <= TREE_NODE_SZ_LARGE); // *** large node + static_assert_no_msg(sizeof(GenTreeArrLen) <= TREE_NODE_SZ_LARGE); // *** large node + static_assert_no_msg(sizeof(GenTreeBoundsChk) <= TREE_NODE_SZ_LARGE); // *** large node + static_assert_no_msg(sizeof(GenTreeArrElem) <= TREE_NODE_SZ_LARGE); // *** large node + static_assert_no_msg(sizeof(GenTreeArrIndex) <= TREE_NODE_SZ_LARGE); // *** large node + static_assert_no_msg(sizeof(GenTreeArrOffs) <= TREE_NODE_SZ_LARGE); // *** large node + static_assert_no_msg(sizeof(GenTreeIndir) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeStoreInd) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeAddrMode) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeObj) <= TREE_NODE_SZ_LARGE); // *** large node + static_assert_no_msg(sizeof(GenTreeBlk) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeRetExpr) <= TREE_NODE_SZ_LARGE); // *** large node + static_assert_no_msg(sizeof(GenTreeStmt) <= TREE_NODE_SZ_LARGE); // *** large node + static_assert_no_msg(sizeof(GenTreeClsVar) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeArgPlace) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeLabel) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreePhiArg) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeAllocObj) <= TREE_NODE_SZ_LARGE); // *** large node +#ifndef FEATURE_PUT_STRUCT_ARG_STK + static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_SMALL); +#else // FEATURE_PUT_STRUCT_ARG_STK + // TODO-Throughput: This should not need to be a large node. The object info should be + // obtained from the child node. + static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_LARGE); +#endif // FEATURE_PUT_STRUCT_ARG_STK #ifdef FEATURE_SIMD - static_assert_no_msg(sizeof(GenTreeSIMD) <= TREE_NODE_SZ_SMALL); + static_assert_no_msg(sizeof(GenTreeSIMD) <= TREE_NODE_SZ_SMALL); #endif // FEATURE_SIMD + // clang-format on } size_t GenTree::GetNodeSize() const @@ -394,6 +434,88 @@ bool GenTree::IsNodeProperlySized() const } #endif +/***************************************************************************** + * + * When 'NODEBASH_STATS' is enabled in "jit.h" we record all instances of + * an existing GenTree node having its operator changed. This can be useful + * for two (related) things - to see what is being bashed (and what isn't), + * and to verify that the existing choices for what nodes are marked 'large' + * are reasonable (to minimize "wasted" space). + * + * And yes, the hash function / logic is simplistic, but it is conflict-free + * and transparent for what we need. + */ + +#if NODEBASH_STATS + +#define BASH_HASH_SIZE 211 + +inline hashme(genTreeOps op1, genTreeOps op2) +{ + return ((op1 * 104729) ^ (op2 * 56569)) % BASH_HASH_SIZE; +} + +struct BashHashDsc +{ + unsigned __int32 bhFullHash; // the hash value (unique for all old->new pairs) + unsigned __int32 bhCount; // the same old->new bashings seen so far + unsigned __int8 bhOperOld; // original gtOper + unsigned __int8 bhOperNew; // new gtOper +}; + +static BashHashDsc BashHash[BASH_HASH_SIZE]; + +void GenTree::RecordOperBashing(genTreeOps operOld, genTreeOps operNew) +{ + unsigned hash = hashme(operOld, operNew); + BashHashDsc* desc = BashHash + hash; + + if (desc->bhFullHash != hash) + { + noway_assert(desc->bhCount == 0); // if this ever fires, need fix the hash fn + desc->bhFullHash = hash; + } + + desc->bhCount += 1; + desc->bhOperOld = operOld; + desc->bhOperNew = operNew; +} + +void GenTree::ReportOperBashing(FILE* f) +{ + unsigned total = 0; + + fflush(f); + + fprintf(f, "\n"); + fprintf(f, "Bashed gtOper stats:\n"); + fprintf(f, "\n"); + fprintf(f, " Old operator New operator #bytes old->new Count\n"); + fprintf(f, " ---------------------------------------------------------------\n"); + + for (unsigned h = 0; h < BASH_HASH_SIZE; h++) + { + unsigned count = BashHash[h].bhCount; + if (count == 0) + continue; + + unsigned opOld = BashHash[h].bhOperOld; + unsigned opNew = BashHash[h].bhOperNew; + + fprintf(f, " GT_%-13s -> GT_%-13s [size: %3u->%3u] %c %7u\n", OpName((genTreeOps)opOld), + OpName((genTreeOps)opNew), s_gtTrueSizes[opOld], s_gtTrueSizes[opNew], + (s_gtTrueSizes[opOld] < s_gtTrueSizes[opNew]) ? 'X' : ' ', count); + total += count; + } + fprintf(f, "\n"); + fprintf(f, "Total bashings: %u\n", total); + fprintf(f, "\n"); + + fflush(f); +} + +#endif // NODEBASH_STATS + #else // SMALL_TREE_NODES #ifdef DEBUG @@ -407,6 +529,71 @@ bool GenTree::IsNodeProperlySized() const /*****************************************************************************/ +#if MEASURE_NODE_SIZE + +void GenTree::DumpNodeSizes(FILE* fp) +{ +// Dump the sizes of the various GenTree flavors + +#if SMALL_TREE_NODES + fprintf(fp, "Small tree node size = %3u bytes\n", TREE_NODE_SZ_SMALL); +#endif + fprintf(fp, "Large tree node size = %3u bytes\n", TREE_NODE_SZ_LARGE); + fprintf(fp, "\n"); + +#if SMALL_TREE_NODES + + // Verify that node sizes are set kosherly and dump sizes + for (unsigned op = GT_NONE + 1; op < GT_COUNT; op++) + { + unsigned needSize = s_gtTrueSizes[op]; + unsigned nodeSize = s_gtNodeSizes[op]; + + const char* structNm = OpStructName((genTreeOps)op); + const char* operName = OpName((genTreeOps)op); + + bool repeated = false; + + // Have we seen this struct flavor before? + for (unsigned mop = GT_NONE + 1; mop < op; mop++) + { + if (strcmp(structNm, OpStructName((genTreeOps)mop)) == 0) + { + repeated = true; + break; + } + } + + // Don't repeat the same GenTree flavor unless we have an error + if (!repeated || needSize > nodeSize) + { + unsigned sizeChar = '?'; + + if (nodeSize == TREE_NODE_SZ_SMALL) + sizeChar = 'S'; + else if (nodeSize == TREE_NODE_SZ_LARGE) + sizeChar = 'L'; + + fprintf(fp, "GT_%-16s ... %-19s = %3u bytes (%c)", operName, structNm, needSize, sizeChar); + if (needSize > nodeSize) + { + fprintf(fp, " -- ERROR -- allocation is only %u bytes!", nodeSize); + } + else if (needSize <= TREE_NODE_SZ_SMALL && nodeSize == TREE_NODE_SZ_LARGE) + { + fprintf(fp, " ... could be small"); + } + + fprintf(fp, "\n"); + } + } + +#endif +} + +#endif // MEASURE_NODE_SIZE +/*****************************************************************************/ + // make sure these get instantiated, because it's not in a header file // (emulating the c++ 'export' keyword here) // VC appears to be somewhat unpredictable about whether they end up in the .obj file without this @@ -965,11 +1152,12 @@ Compiler::fgWalkResult Compiler::fgWalkTreePostRec(GenTreePtr* pTree, fgWalkData } break; - case GT_LIST: + case GT_FIELD_LIST: { - GenTreeArgList* list = tree->AsArgList(); - if (list->IsAggregate()) + GenTreeFieldList* list = tree->AsFieldList(); + if (list->IsFieldListHead()) { + GenTreeFieldList* list = tree->AsFieldList(); for (; list != nullptr; list = list->Rest()) { result = fgWalkTreePostRec<computeStack>(&list->gtOp1, fgWalkData); @@ -978,12 +1166,8 @@ Compiler::fgWalkResult Compiler::fgWalkTreePostRec(GenTreePtr* pTree, fgWalkData return result; } } - break; } - - // GT_LIST nodes that do not represent aggregate arguments intentionally fall through to the - // default node processing below. - __fallthrough; + break; } default: @@ -1765,6 +1949,66 @@ bool GenTreeCall::IsHelperCall(Compiler* compiler, unsigned helper) const return IsHelperCall(compiler->eeFindHelper(helper)); } +//------------------------------------------------------------------------ +// GenTreeCall::ReplaceCallOperand: +// Replaces a given operand to a call node and updates the call +// argument table if necessary. +// +// Arguments: +// useEdge - the use edge that points to the operand to be replaced. +// replacement - the replacement node. +// +void GenTreeCall::ReplaceCallOperand(GenTree** useEdge, GenTree* replacement) +{ + assert(useEdge != nullptr); + assert(replacement != nullptr); + assert(TryGetUse(*useEdge, &useEdge)); + + GenTree* originalOperand = *useEdge; + *useEdge = replacement; + + const bool isArgument = + (replacement != gtControlExpr) && + ((gtCallType != CT_INDIRECT) || ((replacement != gtCallCookie) && (replacement != gtCallAddr))); + + if (isArgument) + { + if ((originalOperand->gtFlags & GTF_LATE_ARG) != 0) + { + replacement->gtFlags |= GTF_LATE_ARG; + } + else + { + assert((replacement->gtFlags & GTF_LATE_ARG) == 0); + + fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(this, originalOperand); + assert(fp->node == originalOperand); + fp->node = replacement; + } + } +} + +//------------------------------------------------------------------------- +// AreArgsComplete: Determine if this GT_CALL node's arguments have been processed. +// +// Return Value: +// Returns true if fgMorphArgs has processed the arguments. +// +bool GenTreeCall::AreArgsComplete() const +{ + if (fgArgInfo == nullptr) + { + return false; + } + if (fgArgInfo->AreArgsComplete()) + { + assert((gtCallLateArgs != nullptr) || !fgArgInfo->HasRegArgs()); + return true; + } + assert(gtCallArgs == nullptr); + return false; +} + /***************************************************************************** * * Returns non-zero if the two trees are identical. @@ -2071,7 +2315,9 @@ AGAIN: #ifdef FEATURE_READYTORUN_COMPILER if (op1->gtCall.gtEntryPoint.addr != op2->gtCall.gtEntryPoint.addr) + { return false; + } #endif } else @@ -2560,8 +2806,8 @@ AGAIN: hash = genTreeHashAdd(hash, tree->gtAllocObj.gtNewHelper); break; case GT_OBJ: - hash = genTreeHashAdd(hash, static_cast<unsigned>( - reinterpret_cast<uintptr_t>(tree->gtObj.gtClass))); + hash = + genTreeHashAdd(hash, static_cast<unsigned>(reinterpret_cast<uintptr_t>(tree->gtObj.gtClass))); break; // For the ones below no extra argument matters for comparison. @@ -3196,6 +3442,11 @@ GenTreePtr Compiler::gtReverseCond(GenTree* tree) tree->gtFlags ^= GTF_RELOP_NAN_UN; } } + else if (tree->OperGet() == GT_JCC) + { + GenTreeJumpCC* jcc = tree->AsJumpCC(); + jcc->gtCondition = GenTree::ReverseRelop(jcc->gtCondition); + } else { tree = gtNewOperNode(GT_NOT, TYP_INT, tree); @@ -3257,77 +3508,136 @@ bool GenTree::gtIsValid64RsltMul() #endif // DEBUG -/***************************************************************************** - * - * Figure out the evaluation order for a list of values. - */ +//------------------------------------------------------------------------------ +// gtSetListOrder : Figure out the evaluation order for a list of values. +// +// +// Arguments: +// list - List to figure out the evaluation order for +// isListCallArgs - True iff the list is a list of call arguments +// callArgsInRegs - True iff the list is a list of call arguments and they are passed in registers +// +// Return Value: +// True if the operation can be a root of a bitwise rotation tree; false otherwise. -unsigned Compiler::gtSetListOrder(GenTree* list, bool regs) +unsigned Compiler::gtSetListOrder(GenTree* list, bool isListCallArgs, bool callArgsInRegs) { - assert(list && list->IsList()); + assert((list != nullptr) && list->OperIsAnyList()); + assert(!callArgsInRegs || isListCallArgs); - unsigned level = 0; - unsigned ftreg = 0; - unsigned costSz = 0; - unsigned costEx = 0; + ArrayStack<GenTree*> listNodes(this); + do + { + listNodes.Push(list); + list = list->gtOp.gtOp2; + } while ((list != nullptr) && (list->OperIsAnyList())); + + unsigned nxtlvl = (list == nullptr) ? 0 : gtSetEvalOrder(list); + while (listNodes.Height() > 0) + { #if FEATURE_STACK_FP_X87 - /* Save the current FP stack level since an argument list - * will implicitly pop the FP stack when pushing the argument */ - unsigned FPlvlSave = codeGen->genGetFPstkLevel(); + /* Save the current FP stack level since an argument list + * will implicitly pop the FP stack when pushing the argument */ + unsigned FPlvlSave = codeGen->genGetFPstkLevel(); #endif // FEATURE_STACK_FP_X87 - GenTreePtr next = list->gtOp.gtOp2; + list = listNodes.Pop(); + assert(list && list->OperIsAnyList()); + GenTreePtr next = list->gtOp.gtOp2; - if (next) - { - unsigned nxtlvl = gtSetListOrder(next, regs); + unsigned level = 0; + unsigned ftreg = 0; - ftreg |= next->gtRsvdRegs; + // TODO: Do we have to compute costs differently for argument lists and + // all other lists? + // https://github.com/dotnet/coreclr/issues/7095 + unsigned costSz = (isListCallArgs || (next == nullptr)) ? 0 : 1; + unsigned costEx = (isListCallArgs || (next == nullptr)) ? 0 : 1; - if (level < nxtlvl) + if (next != nullptr) { - level = nxtlvl; + ftreg |= next->gtRsvdRegs; + if (isListCallArgs) + { + if (level < nxtlvl) + { + level = nxtlvl; + } + } + costEx += next->gtCostEx; + costSz += next->gtCostSz; } - costEx += next->gtCostEx; - costSz += next->gtCostSz; - } - GenTreePtr op1 = list->gtOp.gtOp1; - unsigned lvl = gtSetEvalOrder(op1); + GenTreePtr op1 = list->gtOp.gtOp1; + unsigned lvl = gtSetEvalOrder(op1); #if FEATURE_STACK_FP_X87 - /* restore the FP level */ - codeGen->genResetFPstkLevel(FPlvlSave); + // restore the FP level + codeGen->genResetFPstkLevel(FPlvlSave); #endif // FEATURE_STACK_FP_X87 - list->gtRsvdRegs = (regMaskSmall)(ftreg | op1->gtRsvdRegs); + list->gtRsvdRegs = (regMaskSmall)(ftreg | op1->gtRsvdRegs); - if (level < lvl) - { - level = lvl; - } + // Swap the level counts + if (list->gtFlags & GTF_REVERSE_OPS) + { + unsigned tmpl; - if (op1->gtCostEx != 0) - { - costEx += op1->gtCostEx; - costEx += regs ? 0 : IND_COST_EX; - } + tmpl = lvl; + lvl = nxtlvl; + nxtlvl = tmpl; + } - if (op1->gtCostSz != 0) - { - costSz += op1->gtCostSz; + // TODO: Do we have to compute levels differently for argument lists and + // all other lists? + // https://github.com/dotnet/coreclr/issues/7095 + if (isListCallArgs) + { + if (level < lvl) + { + level = lvl; + } + } + else + { + if (lvl < 1) + { + level = nxtlvl; + } + else if (lvl == nxtlvl) + { + level = lvl + 1; + } + else + { + level = lvl; + } + } + + if (op1->gtCostEx != 0) + { + costEx += op1->gtCostEx; + costEx += (callArgsInRegs || !isListCallArgs) ? 0 : IND_COST_EX; + } + + if (op1->gtCostSz != 0) + { + costSz += op1->gtCostSz; #ifdef _TARGET_XARCH_ - if (regs) // push is smaller than mov to reg + if (callArgsInRegs) // push is smaller than mov to reg #endif - { - costSz += 1; + { + costSz += 1; + } } - } - list->SetCosts(costEx, costSz); + list->SetCosts(costEx, costSz); - return level; + nxtlvl = level; + } + + return nxtlvl; } /***************************************************************************** @@ -3363,17 +3673,8 @@ void Compiler::gtWalkOp(GenTree** op1WB, GenTree** op2WB, GenTree* adr, bool con { GenTreePtr op1 = *op1WB; GenTreePtr op2 = *op2WB; - GenTreePtr op1EffectiveVal; - if (op1->gtOper == GT_COMMA) - { - op1EffectiveVal = op1->gtEffectiveVal(); - if ((op1EffectiveVal->gtOper == GT_ADD) && (!op1EffectiveVal->gtOverflow()) && - (!constOnly || (op1EffectiveVal->gtOp.gtOp2->IsCnsIntOrI()))) - { - op1 = op1EffectiveVal; - } - } + op1 = op1->gtEffectiveVal(); // Now we look for op1's with non-overflow GT_ADDs [of constants] while ((op1->gtOper == GT_ADD) && (!op1->gtOverflow()) && (!constOnly || (op1->gtOp.gtOp2->IsCnsIntOrI()))) @@ -3398,20 +3699,12 @@ void Compiler::gtWalkOp(GenTree** op1WB, GenTree** op2WB, GenTree* adr, bool con op2 = tmp; } - if (op1->gtOper == GT_COMMA) - { - op1EffectiveVal = op1->gtEffectiveVal(); - if ((op1EffectiveVal->gtOper == GT_ADD) && (!op1EffectiveVal->gtOverflow()) && - (!constOnly || (op1EffectiveVal->gtOp.gtOp2->IsCnsIntOrI()))) - { - op1 = op1EffectiveVal; - } - } - if (!constOnly && ((op2 == adr) || (!op2->IsCnsIntOrI()))) { break; } + + op1 = op1->gtEffectiveVal(); } *op1WB = op1; @@ -3445,15 +3738,7 @@ GenTreePtr Compiler::gtWalkOpEffectiveVal(GenTreePtr op) { for (;;) { - if (op->gtOper == GT_COMMA) - { - GenTreePtr opEffectiveVal = op->gtEffectiveVal(); - if ((opEffectiveVal->gtOper == GT_ADD) && (!opEffectiveVal->gtOverflow()) && - (opEffectiveVal->gtOp.gtOp2->IsCnsIntOrI())) - { - op = opEffectiveVal; - } - } + op = op->gtEffectiveVal(); if ((op->gtOper != GT_ADD) || op->gtOverflow() || !op->gtOp.gtOp2->IsCnsIntOrI()) { @@ -3980,6 +4265,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) break; case GT_LIST: + case GT_FIELD_LIST: case GT_NOP: costEx = 0; costSz = 0; @@ -4671,6 +4957,14 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) goto DONE; + case GT_LIST: + case GT_FIELD_LIST: + { + const bool isListCallArgs = false; + const bool callArgsInRegs = false; + return gtSetListOrder(tree, isListCallArgs, callArgsInRegs); + } + default: break; } @@ -5025,6 +5319,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) break; case GT_LIST: + case GT_FIELD_LIST: break; case GT_SUB: @@ -5123,7 +5418,9 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) #if FEATURE_STACK_FP_X87 FPlvlSave = codeGen->genGetFPstkLevel(); #endif // FEATURE_STACK_FP_X87 - lvl2 = gtSetListOrder(tree->gtCall.gtCallArgs, false); + const bool isListCallArgs = true; + const bool callArgsInRegs = false; + lvl2 = gtSetListOrder(tree->gtCall.gtCallArgs, isListCallArgs, callArgsInRegs); if (level < lvl2) { level = lvl2; @@ -5145,7 +5442,9 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) #if FEATURE_STACK_FP_X87 FPlvlSave = codeGen->genGetFPstkLevel(); #endif // FEATURE_STACK_FP_X87 - lvl2 = gtSetListOrder(tree->gtCall.gtCallLateArgs, true); + const bool isListCallArgs = true; + const bool callArgsInRegs = true; + lvl2 = gtSetListOrder(tree->gtCall.gtCallLateArgs, isListCallArgs, callArgsInRegs); if (level < lvl2) { level = lvl2; @@ -5189,7 +5488,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) costSz += 2; } } - else if ((opts.eeFlags & CORJIT_FLG_PREJIT) == 0) + else if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) { costEx += 2; costSz += 6; @@ -5789,11 +6088,11 @@ bool GenTree::IsAddWithI32Const(GenTreePtr* addr, int* offset) // 'parent' must be non-null // // Notes: -// When FEATURE_MULTIREG_ARGS is defined we can get here with GT_LDOBJ tree. +// When FEATURE_MULTIREG_ARGS is defined we can get here with GT_OBJ tree. // This happens when we have a struct that is passed in multiple registers. // // Also note that when FEATURE_UNIX_AMD64_STRUCT_PASSING is defined the GT_LDOBJ -// later gets converted to a GT_LIST with two GT_LCL_FLDs in Lower/LowerXArch. +// later gets converted to a GT_FIELD_LIST with two GT_LCL_FLDs in Lower/LowerXArch. // GenTreePtr* GenTree::gtGetChildPointer(GenTreePtr parent) @@ -5952,6 +6251,9 @@ GenTreePtr* GenTree::gtGetChildPointer(GenTreePtr parent) bool GenTree::TryGetUse(GenTree* def, GenTree*** use) { + assert(def != nullptr); + assert(use != nullptr); + for (GenTree** useEdge : UseEdges()) { if (*useEdge == def) @@ -5965,6 +6267,32 @@ bool GenTree::TryGetUse(GenTree* def, GenTree*** use) } //------------------------------------------------------------------------ +// GenTree::ReplaceOperand: +// Replace a given operand to this node with a new operand. If the +// current node is a call node, this will also udpate the call +// argument table if necessary. +// +// Arguments: +// useEdge - the use edge that points to the operand to be replaced. +// replacement - the replacement node. +// +void GenTree::ReplaceOperand(GenTree** useEdge, GenTree* replacement) +{ + assert(useEdge != nullptr); + assert(replacement != nullptr); + assert(TryGetUse(*useEdge, &useEdge)); + + if (OperGet() == GT_CALL) + { + AsCall()->ReplaceCallOperand(useEdge, replacement); + } + else + { + *useEdge = replacement; + } +} + +//------------------------------------------------------------------------ // gtGetParent: Get the parent of this node, and optionally capture the // pointer to the child so that it can be modified. // @@ -6500,16 +6828,15 @@ GenTreeCall* Compiler::gtNewCallNode( #endif // LEGACY_BACKEND #ifdef FEATURE_READYTORUN_COMPILER - node->gtCall.gtEntryPoint.addr = nullptr; + node->gtEntryPoint.addr = nullptr; #endif #if defined(DEBUG) || defined(INLINE_DATA) // These get updated after call node is built. - node->gtCall.gtInlineObservation = InlineObservation::CALLEE_UNUSED_INITIAL; - node->gtCall.gtRawILOffset = BAD_IL_OFFSET; + node->gtInlineObservation = InlineObservation::CALLEE_UNUSED_INITIAL; + node->gtRawILOffset = BAD_IL_OFFSET; #endif -#ifdef DEBUGGING_SUPPORT // Spec: Managed Retval sequence points needs to be generated while generating debug info for debuggable code. // // Implementation note: if not generating MRV info genCallSite2ILOffsetMap will be NULL and @@ -6537,7 +6864,6 @@ GenTreeCall* Compiler::gtNewCallNode( assert(!genCallSite2ILOffsetMap->Lookup(node, &value)); genCallSite2ILOffsetMap->Set(node, ilOffset); } -#endif // Initialize gtOtherRegs node->ClearOtherRegs(); @@ -6545,6 +6871,22 @@ GenTreeCall* Compiler::gtNewCallNode( // Initialize spill flags of gtOtherRegs node->ClearOtherRegFlags(); +#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) + // Initialize the multi-reg long return info if necessary + if (varTypeIsLong(node)) + { + // The return type will remain as the incoming long type + node->gtReturnType = node->gtType; + + // Initialize Return type descriptor of call node + ReturnTypeDesc* retTypeDesc = node->GetReturnTypeDesc(); + retTypeDesc->InitializeLongReturnType(this); + + // must be a long returned in two registers + assert(retTypeDesc->GetReturnRegCount() == 2); + } +#endif // defined(_TARGET_X86_) && !defined(_LEGACY_BACKEND_) + return node; } @@ -6648,29 +6990,6 @@ GenTreeArgList* Compiler::gtNewArgList(GenTreePtr arg1, GenTreePtr arg2) return new (this, GT_LIST) GenTreeArgList(arg1, gtNewArgList(arg2)); } -//------------------------------------------------------------------------ -// Compiler::gtNewAggregate: -// Creates a new aggregate argument node. These nodes are used to -// represent arguments that are composed of multiple values (e.g. -// the lclVars that represent the fields of a promoted struct). -// -// Note that aggregate arguments are currently represented by GT_LIST -// nodes that are marked with the GTF_LIST_AGGREGATE flag. This -// representation may be changed in the future to instead use its own -// node type (e.g. GT_AGGREGATE). -// -// Arguments: -// firstElement - The first element in the aggregate's list of values. -// -// Returns: -// The newly-created aggregate node. -GenTreeArgList* Compiler::gtNewAggregate(GenTree* firstElement) -{ - GenTreeArgList* agg = gtNewArgList(firstElement); - agg->gtFlags |= GTF_LIST_AGGREGATE; - return agg; -} - /***************************************************************************** * * Create a list out of the three values. @@ -6741,7 +7060,7 @@ fgArgTabEntryPtr Compiler::gtArgEntryByNode(GenTreePtr call, GenTreePtr node) #endif // PROTO_JIT else if (curArgTabEntry->parent != nullptr) { - assert(curArgTabEntry->parent->IsList()); + assert(curArgTabEntry->parent->OperIsList()); if (curArgTabEntry->parent->Current() == node) { return curArgTabEntry; @@ -6956,17 +7275,32 @@ GenTree* Compiler::gtNewBlockVal(GenTreePtr addr, unsigned size) { // By default we treat this as an opaque struct type with known size. var_types blkType = TYP_STRUCT; -#if FEATURE_SIMD if ((addr->gtOper == GT_ADDR) && (addr->gtGetOp1()->OperGet() == GT_LCL_VAR)) { GenTree* val = addr->gtGetOp1(); - if (varTypeIsSIMD(val) && (genTypeSize(val->TypeGet()) == size)) +#if FEATURE_SIMD + if (varTypeIsSIMD(val)) { - blkType = val->TypeGet(); - return addr->gtGetOp1(); + if (genTypeSize(val->TypeGet()) == size) + { + blkType = val->TypeGet(); + return addr->gtGetOp1(); + } } - } + else #endif // FEATURE_SIMD +#ifndef LEGACY_BACKEND + if (val->TypeGet() == TYP_STRUCT) + { + GenTreeLclVarCommon* lcl = addr->gtGetOp1()->AsLclVarCommon(); + LclVarDsc* varDsc = &(lvaTable[lcl->gtLclNum]); + if ((varDsc->TypeGet() == TYP_STRUCT) && (varDsc->lvExactSize == size)) + { + return addr->gtGetOp1(); + } + } +#endif // !LEGACY_BACKEND + } return new (this, GT_BLK) GenTreeBlk(GT_BLK, blkType, addr, size); } @@ -6979,7 +7313,10 @@ GenTree* Compiler::gtNewBlockVal(GenTreePtr addr, unsigned size) // if FEATURE_SIMD is enabled and the source has a SIMD type. // isVolatile - Is this marked as volatile memory? -GenTree* Compiler::gtNewCpObjNode(GenTreePtr dstAddr, GenTreePtr srcAddr, CORINFO_CLASS_HANDLE structHnd, bool isVolatile) +GenTree* Compiler::gtNewCpObjNode(GenTreePtr dstAddr, + GenTreePtr srcAddr, + CORINFO_CLASS_HANDLE structHnd, + bool isVolatile) { GenTreePtr lhs = gtNewStructVal(structHnd, dstAddr); GenTree* src = nullptr; @@ -7046,10 +7383,10 @@ void GenTreeIntCon::FixupInitBlkValue(var_types asgType) } #endif // _TARGET_64BIT_ - // Make the type used in the GT_IND node match for evaluation types. + // Make the type match for evaluation types. gtType = asgType; - // if we are using an GT_INITBLK on a GC type the value being assigned has to be zero (null). + // if we are initializing a GC type the value being assigned must be zero (null). assert(!varTypeIsGC(asgType) || (cns == 0)); } @@ -7057,7 +7394,7 @@ void GenTreeIntCon::FixupInitBlkValue(var_types asgType) } } -// +// //------------------------------------------------------------------------ // gtBlockOpInit: Initializes a BlkOp GenTree // @@ -7066,7 +7403,7 @@ void GenTreeIntCon::FixupInitBlkValue(var_types asgType) // dst - the target (destination) we want to either initialize or copy to. // src - the init value for InitBlk or the source struct for CpBlk/CpObj. // isVolatile - specifies whether this node is a volatile memory operation. -// +// // Assumptions: // 'result' is an assignment that is newly constructed. // If 'dst' is TYP_STRUCT, then it must be a block node or lclVar. @@ -7156,9 +7493,6 @@ void Compiler::gtBlockOpInit(GenTreePtr result, GenTreePtr dst, GenTreePtr srcOr result->gtFlags |= dst->gtFlags & GTF_ALL_EFFECT; result->gtFlags |= result->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT; - // TODO-1stClassStructs: This should be done only if the destination is non-local. - result->gtFlags |= (GTF_GLOB_REF | GTF_ASG); - // REVERSE_OPS is necessary because the use must occur before the def result->gtFlags |= GTF_REVERSE_OPS; @@ -7229,12 +7563,20 @@ GenTree* Compiler::gtNewBlkOpNode( srcOrFillVal = srcOrFillVal->gtGetOp1()->gtGetOp1(); } } - - GenTree* result = gtNewAssignNode(dst, srcOrFillVal); - if (!isCopyBlock) + else { - result->gtFlags |= GTF_BLK_INIT; + // InitBlk + assert(varTypeIsIntegral(srcOrFillVal)); + if (varTypeIsStruct(dst)) + { + if (!srcOrFillVal->IsIntegralConst(0)) + { + srcOrFillVal = gtNewOperNode(GT_INIT_VAL, TYP_INT, srcOrFillVal); + } + } } + + GenTree* result = gtNewAssignNode(dst, srcOrFillVal); gtBlockOpInit(result, dst, srcOrFillVal, isVolatile); return result; } @@ -7376,17 +7718,30 @@ GenTreePtr Compiler::gtClone(GenTree* tree, bool complexOK) return copy; } -/***************************************************************************** - * - * Clones the given tree value and returns a copy of the given tree. Any - * references to local variable varNum will be replaced with the integer - * constant varVal. - */ +//------------------------------------------------------------------------ +// gtCloneExpr: Create a copy of `tree`, adding flags `addFlags`, mapping +// local `varNum` to int constant `varVal` if it appears at +// the root, and mapping uses of local `deepVarNum` to constant +// `deepVarVal` if they occur beyond the root. +// +// Arguments: +// tree - GenTree to create a copy of +// addFlags - GTF_* flags to add to the copied tree nodes +// varNum - lclNum to replace at the root, or ~0 for no root replacement +// varVal - If replacing at root, replace local `varNum` with IntCns `varVal` +// deepVarNum - lclNum to replace uses of beyond the root, or ~0 for no replacement +// deepVarVal - If replacing beyond root, replace `deepVarNum` with IntCns `deepVarVal` +// +// Return Value: +// A copy of the given tree with the replacements and added flags specified. +// +// Notes: +// Top-level callers should generally call the overload that doesn't have +// the explicit `deepVarNum` and `deepVarVal` parameters; those are used in +// recursive invocations to avoid replacing defs. -GenTreePtr Compiler::gtCloneExpr(GenTree* tree, - unsigned addFlags, - unsigned varNum, // = (unsigned)-1 - int varVal) +GenTreePtr Compiler::gtCloneExpr( + GenTree* tree, unsigned addFlags, unsigned varNum, int varVal, unsigned deepVarNum, int deepVarVal) { if (tree == nullptr) { @@ -7442,6 +7797,10 @@ GenTreePtr Compiler::gtCloneExpr(GenTree* tree, if (tree->gtLclVarCommon.gtLclNum == varNum) { copy = gtNewIconNode(varVal, tree->gtType); + if (tree->gtFlags & GTF_VAR_ARR_INDEX) + { + copy->LabelIndex(this); + } } else { @@ -7572,16 +7931,16 @@ GenTreePtr Compiler::gtCloneExpr(GenTree* tree, // The nodes below this are not bashed, so they can be allocated at their individual sizes. case GT_LIST: - // This is ridiculous, but would go away if we made a stronger distinction between argument lists, whose - // second argument *must* be an arglist*, and the uses of LIST in copyblk and initblk. - if (tree->gtOp.gtOp2 != nullptr && tree->gtOp.gtOp2->OperGet() == GT_LIST) - { - copy = new (this, GT_LIST) GenTreeArgList(tree->gtOp.gtOp1, tree->gtOp.gtOp2->AsArgList()); - } - else - { - copy = new (this, GT_LIST) GenTreeOp(GT_LIST, TYP_VOID, tree->gtOp.gtOp1, tree->gtOp.gtOp2); - } + assert((tree->gtOp.gtOp2 == nullptr) || tree->gtOp.gtOp2->OperIsList()); + copy = new (this, GT_LIST) GenTreeArgList(tree->gtOp.gtOp1); + copy->gtOp.gtOp2 = tree->gtOp.gtOp2; + break; + + case GT_FIELD_LIST: + copy = new (this, GT_FIELD_LIST) GenTreeFieldList(tree->gtOp.gtOp1, tree->AsFieldList()->gtFieldOffset, + tree->AsFieldList()->gtFieldType, nullptr); + copy->gtOp.gtOp2 = tree->gtOp.gtOp2; + copy->gtFlags = (copy->gtFlags & ~GTF_FIELD_LIST_HEAD) | (tree->gtFlags & GTF_FIELD_LIST_HEAD); break; case GT_INDEX: @@ -7608,8 +7967,9 @@ GenTreePtr Compiler::gtCloneExpr(GenTree* tree, case GT_ARR_INDEX: copy = new (this, GT_ARR_INDEX) - GenTreeArrIndex(tree->TypeGet(), gtCloneExpr(tree->gtArrIndex.ArrObj(), addFlags, varNum, varVal), - gtCloneExpr(tree->gtArrIndex.IndexExpr(), addFlags, varNum, varVal), + GenTreeArrIndex(tree->TypeGet(), + gtCloneExpr(tree->gtArrIndex.ArrObj(), addFlags, deepVarNum, deepVarVal), + gtCloneExpr(tree->gtArrIndex.IndexExpr(), addFlags, deepVarNum, deepVarVal), tree->gtArrIndex.gtCurrDim, tree->gtArrIndex.gtArrRank, tree->gtArrIndex.gtArrElemType); break; @@ -7708,12 +8068,20 @@ GenTreePtr Compiler::gtCloneExpr(GenTree* tree, if (tree->gtOp.gtOp1) { - copy->gtOp.gtOp1 = gtCloneExpr(tree->gtOp.gtOp1, addFlags, varNum, varVal); + if (tree->gtOper == GT_ASG) + { + // Don't replace varNum if it appears as the LHS of an assign. + copy->gtOp.gtOp1 = gtCloneExpr(tree->gtOp.gtOp1, addFlags, -1, 0, deepVarNum, deepVarVal); + } + else + { + copy->gtOp.gtOp1 = gtCloneExpr(tree->gtOp.gtOp1, addFlags, deepVarNum, deepVarVal); + } } if (tree->gtGetOp2()) { - copy->gtOp.gtOp2 = gtCloneExpr(tree->gtOp.gtOp2, addFlags, varNum, varVal); + copy->gtOp.gtOp2 = gtCloneExpr(tree->gtOp.gtOp2, addFlags, deepVarNum, deepVarVal); } /* Flags */ @@ -7775,18 +8143,6 @@ GenTreePtr Compiler::gtCloneExpr(GenTree* tree, copy->CopyReg(tree); } - // We can call gtCloneExpr() before we have called fgMorph when we expand a GT_INDEX node in fgMorphArrayIndex() - // The method gtFoldExpr() expects to be run after fgMorph so it will set the GTF_DEBUG_NODE_MORPHED - // flag on nodes that it adds/modifies. Then when we call fgMorph we will assert. - // We really only will need to fold when this method is used to replace references to - // local variable with an integer. - // - if (varNum != (unsigned)-1) - { - /* Try to do some folding */ - copy = gtFoldExpr(copy); - } - goto DONE; } @@ -7795,7 +8151,7 @@ GenTreePtr Compiler::gtCloneExpr(GenTree* tree, switch (oper) { case GT_STMT: - copy = gtCloneExpr(tree->gtStmt.gtStmtExpr, addFlags, varNum, varVal); + copy = gtCloneExpr(tree->gtStmt.gtStmtExpr, addFlags, deepVarNum, deepVarVal); copy = gtNewStmt(copy, tree->gtStmt.gtStmtILoffsx); goto DONE; @@ -7803,15 +8159,17 @@ GenTreePtr Compiler::gtCloneExpr(GenTree* tree, copy = new (this, GT_CALL) GenTreeCall(tree->TypeGet()); - copy->gtCall.gtCallObjp = - tree->gtCall.gtCallObjp ? gtCloneExpr(tree->gtCall.gtCallObjp, addFlags, varNum, varVal) : nullptr; - copy->gtCall.gtCallArgs = tree->gtCall.gtCallArgs - ? gtCloneExpr(tree->gtCall.gtCallArgs, addFlags, varNum, varVal)->AsArgList() + copy->gtCall.gtCallObjp = tree->gtCall.gtCallObjp + ? gtCloneExpr(tree->gtCall.gtCallObjp, addFlags, deepVarNum, deepVarVal) : nullptr; + copy->gtCall.gtCallArgs = + tree->gtCall.gtCallArgs + ? gtCloneExpr(tree->gtCall.gtCallArgs, addFlags, deepVarNum, deepVarVal)->AsArgList() + : nullptr; copy->gtCall.gtCallMoreFlags = tree->gtCall.gtCallMoreFlags; copy->gtCall.gtCallLateArgs = tree->gtCall.gtCallLateArgs - ? gtCloneExpr(tree->gtCall.gtCallLateArgs, addFlags, varNum, varVal)->AsArgList() + ? gtCloneExpr(tree->gtCall.gtCallLateArgs, addFlags, deepVarNum, deepVarVal)->AsArgList() : nullptr; #if !FEATURE_FIXED_OUT_ARGS @@ -7832,11 +8190,12 @@ GenTreePtr Compiler::gtCloneExpr(GenTree* tree, /* Copy the union */ if (tree->gtCall.gtCallType == CT_INDIRECT) { - copy->gtCall.gtCallCookie = tree->gtCall.gtCallCookie - ? gtCloneExpr(tree->gtCall.gtCallCookie, addFlags, varNum, varVal) - : nullptr; - copy->gtCall.gtCallAddr = - tree->gtCall.gtCallAddr ? gtCloneExpr(tree->gtCall.gtCallAddr, addFlags, varNum, varVal) : nullptr; + copy->gtCall.gtCallCookie = + tree->gtCall.gtCallCookie ? gtCloneExpr(tree->gtCall.gtCallCookie, addFlags, deepVarNum, deepVarVal) + : nullptr; + copy->gtCall.gtCallAddr = tree->gtCall.gtCallAddr + ? gtCloneExpr(tree->gtCall.gtCallAddr, addFlags, deepVarNum, deepVarVal) + : nullptr; } else if (tree->gtFlags & GTF_CALL_VIRT_STUB) { @@ -7883,8 +8242,9 @@ GenTreePtr Compiler::gtCloneExpr(GenTree* tree, copy = gtNewFieldRef(tree->TypeGet(), tree->gtField.gtFldHnd, nullptr, tree->gtField.gtFldOffset); - copy->gtField.gtFldObj = - tree->gtField.gtFldObj ? gtCloneExpr(tree->gtField.gtFldObj, addFlags, varNum, varVal) : nullptr; + copy->gtField.gtFldObj = tree->gtField.gtFldObj + ? gtCloneExpr(tree->gtField.gtFldObj, addFlags, deepVarNum, deepVarVal) + : nullptr; copy->gtField.gtFldMayOverlap = tree->gtField.gtFldMayOverlap; #ifdef FEATURE_READYTORUN_COMPILER copy->gtField.gtFieldLookup = tree->gtField.gtFieldLookup; @@ -7897,10 +8257,10 @@ GenTreePtr Compiler::gtCloneExpr(GenTree* tree, GenTreePtr inds[GT_ARR_MAX_RANK]; for (unsigned dim = 0; dim < tree->gtArrElem.gtArrRank; dim++) { - inds[dim] = gtCloneExpr(tree->gtArrElem.gtArrInds[dim], addFlags, varNum, varVal); + inds[dim] = gtCloneExpr(tree->gtArrElem.gtArrInds[dim], addFlags, deepVarNum, deepVarVal); } copy = new (this, GT_ARR_ELEM) - GenTreeArrElem(tree->TypeGet(), gtCloneExpr(tree->gtArrElem.gtArrObj, addFlags, varNum, varVal), + GenTreeArrElem(tree->TypeGet(), gtCloneExpr(tree->gtArrElem.gtArrObj, addFlags, deepVarNum, deepVarVal), tree->gtArrElem.gtArrRank, tree->gtArrElem.gtArrElemSize, tree->gtArrElem.gtArrElemType, &inds[0]); } @@ -7909,34 +8269,37 @@ GenTreePtr Compiler::gtCloneExpr(GenTree* tree, case GT_ARR_OFFSET: { copy = new (this, GT_ARR_OFFSET) - GenTreeArrOffs(tree->TypeGet(), gtCloneExpr(tree->gtArrOffs.gtOffset, addFlags, varNum, varVal), - gtCloneExpr(tree->gtArrOffs.gtIndex, addFlags, varNum, varVal), - gtCloneExpr(tree->gtArrOffs.gtArrObj, addFlags, varNum, varVal), + GenTreeArrOffs(tree->TypeGet(), gtCloneExpr(tree->gtArrOffs.gtOffset, addFlags, deepVarNum, deepVarVal), + gtCloneExpr(tree->gtArrOffs.gtIndex, addFlags, deepVarNum, deepVarVal), + gtCloneExpr(tree->gtArrOffs.gtArrObj, addFlags, deepVarNum, deepVarVal), tree->gtArrOffs.gtCurrDim, tree->gtArrOffs.gtArrRank, tree->gtArrOffs.gtArrElemType); } break; case GT_CMPXCHG: copy = new (this, GT_CMPXCHG) - GenTreeCmpXchg(tree->TypeGet(), gtCloneExpr(tree->gtCmpXchg.gtOpLocation, addFlags, varNum, varVal), - gtCloneExpr(tree->gtCmpXchg.gtOpValue, addFlags, varNum, varVal), - gtCloneExpr(tree->gtCmpXchg.gtOpComparand, addFlags, varNum, varVal)); + GenTreeCmpXchg(tree->TypeGet(), + gtCloneExpr(tree->gtCmpXchg.gtOpLocation, addFlags, deepVarNum, deepVarVal), + gtCloneExpr(tree->gtCmpXchg.gtOpValue, addFlags, deepVarNum, deepVarVal), + gtCloneExpr(tree->gtCmpXchg.gtOpComparand, addFlags, deepVarNum, deepVarVal)); break; case GT_ARR_BOUNDS_CHECK: #ifdef FEATURE_SIMD case GT_SIMD_CHK: #endif // FEATURE_SIMD - copy = new (this, oper) GenTreeBoundsChk(oper, tree->TypeGet(), - gtCloneExpr(tree->gtBoundsChk.gtArrLen, addFlags, varNum, varVal), - gtCloneExpr(tree->gtBoundsChk.gtIndex, addFlags, varNum, varVal), - tree->gtBoundsChk.gtThrowKind); + copy = new (this, oper) + GenTreeBoundsChk(oper, tree->TypeGet(), + gtCloneExpr(tree->gtBoundsChk.gtArrLen, addFlags, deepVarNum, deepVarVal), + gtCloneExpr(tree->gtBoundsChk.gtIndex, addFlags, deepVarNum, deepVarVal), + tree->gtBoundsChk.gtThrowKind); break; case GT_STORE_DYN_BLK: case GT_DYN_BLK: - copy = new (this, oper) GenTreeDynBlk(gtCloneExpr(tree->gtDynBlk.Addr(), addFlags, varNum, varVal), - gtCloneExpr(tree->gtDynBlk.gtDynamicSize, addFlags, varNum, varVal)); + copy = new (this, oper) + GenTreeDynBlk(gtCloneExpr(tree->gtDynBlk.Addr(), addFlags, deepVarNum, deepVarVal), + gtCloneExpr(tree->gtDynBlk.gtDynamicSize, addFlags, deepVarNum, deepVarVal)); break; default: @@ -8050,12 +8413,31 @@ GenTreePtr Compiler::gtReplaceTree(GenTreePtr stmt, GenTreePtr tree, GenTreePtr { assert(treeParent != nullptr); + // Check to see if the node to be replaced is a call argument and if so, + // set `treeParent` to the call node. + GenTree* cursor = treeParent; + while ((cursor != nullptr) && (cursor->OperGet() == GT_LIST)) + { + cursor = cursor->gtNext; + } + + if ((cursor != nullptr) && (cursor->OperGet() == GT_CALL)) + { + treeParent = cursor; + } + +#ifdef DEBUG + GenTree** useEdge; + assert(treeParent->TryGetUse(tree, &useEdge)); + assert(useEdge == treePtr); +#endif // DEBUG + GenTreePtr treeFirstNode = fgGetFirstNode(tree); GenTreePtr treeLastNode = tree; GenTreePtr treePrevNode = treeFirstNode->gtPrev; GenTreePtr treeNextNode = treeLastNode->gtNext; - *treePtr = replacementTree; + treeParent->ReplaceOperand(treePtr, replacementTree); // Build the linear order for "replacementTree". fgSetTreeSeq(replacementTree, treePrevNode); @@ -8082,48 +8464,6 @@ GenTreePtr Compiler::gtReplaceTree(GenTreePtr stmt, GenTreePtr tree, GenTreePtr treeNextNode->gtPrev = treeLastNode; } - bool needFixupCallArg = false; - GenTreePtr node = treeParent; - - // If we have replaced an arg, then update pointers in argtable. - do - { - // Look for the first enclosing callsite - switch (node->OperGet()) - { - case GT_LIST: - case GT_ARGPLACE: - // "tree" is likely an argument of a call. - needFixupCallArg = true; - break; - - case GT_CALL: - if (needFixupCallArg) - { - // We have replaced an arg, so update pointers in argtable. - fgFixupArgTabEntryPtr(node, tree, replacementTree); - needFixupCallArg = false; - } - break; - - default: - // "tree" is unlikely an argument of a call. - needFixupCallArg = false; - break; - } - - if (needFixupCallArg) - { - // Keep tracking to update the first enclosing call. - node = node->gtGetParent(nullptr); - } - else - { - // Stop tracking. - node = nullptr; - } - } while (node != nullptr); - // Propagate side-effect flags of "replacementTree" to its parents if needed. gtUpdateSideEffects(treeParent, tree->gtFlags, replacementTree->gtFlags); } @@ -8304,14 +8644,13 @@ bool GenTree::gtSetFlags() const // // Precondition we have a GTK_SMPOP // - assert(OperIsSimple()); - if (!varTypeIsIntegralOrI(TypeGet())) { return false; } #if FEATURE_SET_FLAGS + assert(OperIsSimple()); if ((gtFlags & GTF_SET_FLAGS) && gtOper != GT_IND) { @@ -8325,6 +8664,7 @@ bool GenTree::gtSetFlags() const #else // !FEATURE_SET_FLAGS +#ifdef LEGACY_BACKEND #ifdef _TARGET_XARCH_ // Return true if/when the codegen for this node will set the flags // @@ -8346,6 +8686,22 @@ bool GenTree::gtSetFlags() const return false; #endif +#else // !LEGACY_BACKEND +#ifdef _TARGET_XARCH_ + if (((gtFlags & GTF_SET_FLAGS) != 0) && (gtOper != GT_IND)) + { + // GTF_SET_FLAGS is not valid on GT_IND and is overlaid with GTF_NONFAULTING_IND + return true; + } + else + { + return false; + } +#else + unreached(); +#endif +#endif // !LEGACY_BACKEND + #endif // !FEATURE_SET_FLAGS } @@ -8399,7 +8755,8 @@ bool GenTree::gtRequestSetFlags() /*****************************************************************************/ void GenTree::CopyTo(class Compiler* comp, const GenTree& gt) { - gtOper = gt.gtOper; + SetOperRaw(gt.OperGet()); + gtType = gt.gtType; gtAssertionNum = gt.gtAssertionNum; @@ -8772,19 +9129,12 @@ GenTreePtr GenTree::GetChild(unsigned childNum) } } -GenTreeUseEdgeIterator::GenTreeUseEdgeIterator() - : m_node(nullptr) - , m_edge(nullptr) - , m_argList(nullptr) - , m_state(-1) +GenTreeUseEdgeIterator::GenTreeUseEdgeIterator() : m_node(nullptr), m_edge(nullptr), m_argList(nullptr), m_state(-1) { } GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node) - : m_node(node) - , m_edge(nullptr) - , m_argList(nullptr) - , m_state(0) + : m_node(node), m_edge(nullptr), m_argList(nullptr), m_state(0) { assert(m_node != nullptr); @@ -8894,30 +9244,53 @@ GenTree** GenTreeUseEdgeIterator::GetNextUseEdge() const } case GT_DYN_BLK: + { + GenTreeDynBlk* const dynBlock = m_node->AsDynBlk(); switch (m_state) { case 0: - return &(m_node->AsDynBlk()->gtOp1); + return dynBlock->gtEvalSizeFirst ? &dynBlock->gtDynamicSize : &dynBlock->gtOp1; case 1: - return &(m_node->AsDynBlk()->gtDynamicSize); + return dynBlock->gtEvalSizeFirst ? &dynBlock->gtOp1 : &dynBlock->gtDynamicSize; default: return nullptr; } - break; + } + break; case GT_STORE_DYN_BLK: - switch (m_state) + { + GenTreeDynBlk* const dynBlock = m_node->AsDynBlk(); + if (dynBlock->gtEvalSizeFirst) { - case 0: - return &(m_node->AsDynBlk()->gtOp1); - case 1: - return &(m_node->AsDynBlk()->gtOp2); - case 2: - return &(m_node->AsDynBlk()->gtDynamicSize); - default: - return nullptr; + switch (m_state) + { + case 0: + return &dynBlock->gtDynamicSize; + case 1: + return dynBlock->IsReverseOp() ? &dynBlock->gtOp2 : &dynBlock->gtOp1; + case 2: + return dynBlock->IsReverseOp() ? &dynBlock->gtOp1 : &dynBlock->gtOp2; + default: + return nullptr; + } } - break; + else + { + switch (m_state) + { + case 0: + return dynBlock->IsReverseOp() ? &dynBlock->gtOp2 : &dynBlock->gtOp1; + case 1: + return dynBlock->IsReverseOp() ? &dynBlock->gtOp1 : &dynBlock->gtOp2; + case 2: + return &dynBlock->gtDynamicSize; + default: + return nullptr; + } + } + } + break; case GT_LEA: { @@ -8942,13 +9315,9 @@ GenTree** GenTreeUseEdgeIterator::GetNextUseEdge() const } break; - case GT_LIST: - if (m_node->AsArgList()->IsAggregate()) - { - // List nodes that represent aggregates are handled by MoveNextAggregateUseEdge. - break; - } - __fallthrough; + case GT_FIELD_LIST: + // Field List nodes are handled by MoveToNextFieldUseEdge. + break; default: if (m_node->OperIsConst() || m_node->OperIsLeaf()) @@ -8988,13 +9357,13 @@ void GenTreeUseEdgeIterator::MoveToNextCallUseEdge() { enum { - CALL_INSTANCE = 0, - CALL_ARGS = 1, - CALL_LATE_ARGS = 2, + CALL_INSTANCE = 0, + CALL_ARGS = 1, + CALL_LATE_ARGS = 2, CALL_CONTROL_EXPR = 3, - CALL_COOKIE = 4, - CALL_ADDRESS = 5, - CALL_TERMINAL = 6, + CALL_COOKIE = 4, + CALL_ADDRESS = 5, + CALL_TERMINAL = 6, }; GenTreeCall* call = m_node->AsCall(); @@ -9197,10 +9566,9 @@ void GenTreeUseEdgeIterator::MoveToNextSIMDUseEdge() } #endif // FEATURE_SIMD -void GenTreeUseEdgeIterator::MoveToNextAggregateUseEdge() +void GenTreeUseEdgeIterator::MoveToNextFieldUseEdge() { - assert(m_node->OperGet() == GT_LIST); - assert(m_node->AsArgList()->IsAggregate()); + assert(m_node->OperGet() == GT_FIELD_LIST); for (;;) { @@ -9218,9 +9586,9 @@ void GenTreeUseEdgeIterator::MoveToNextAggregateUseEdge() } else { - GenTreeArgList* aggNode = m_argList->AsArgList(); - m_edge = &aggNode->gtOp1; - m_argList = aggNode->Rest(); + GenTreeArgList* listNode = m_argList->AsArgList(); + m_edge = &listNode->gtOp1; + m_argList = listNode->Rest(); return; } break; @@ -9266,9 +9634,9 @@ GenTreeUseEdgeIterator& GenTreeUseEdgeIterator::operator++() MoveToNextSIMDUseEdge(); } #endif - else if ((op == GT_LIST) && (m_node->AsArgList()->IsAggregate())) + else if (op == GT_FIELD_LIST) { - MoveToNextAggregateUseEdge(); + MoveToNextFieldUseEdge(); } else { @@ -9529,7 +9897,7 @@ void Compiler::gtDispNodeName(GenTree* tree) { sprintf_s(bufp, sizeof(buf), " %s_ovfl%c", name, 0); } - else if (tree->OperIsBlk() && (tree->AsBlk()->gtBlkSize != 0)) + else if (tree->OperIsBlk() && !tree->OperIsDynBlk()) { sprintf_s(bufp, sizeof(buf), " %s(%d)", name, tree->AsBlk()->gtBlkSize); } @@ -9775,6 +10143,9 @@ void Compiler::gtDispNode(GenTreePtr tree, IndentStack* indentStack, __in __in_z goto DASH; case GT_MUL: +#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) + case GT_MUL_LONG: +#endif if (tree->gtFlags & GTF_MUL_64RSLT) { printf("L"); @@ -10409,6 +10780,13 @@ void Compiler::gtDispConst(GenTree* tree) printf(" field offset"); } +#ifdef FEATURE_SIMD + if ((tree->gtFlags & GTF_ICON_SIMD_COUNT) != 0) + { + printf(" Vector<T>.Count"); + } +#endif + if ((tree->IsReuseRegVal()) != 0) { printf(" reuse reg val"); @@ -10714,6 +11092,10 @@ void Compiler::gtDispLeaf(GenTree* tree, IndentStack* indentStack) } break; + case GT_JCC: + printf(" cond=%s", GenTree::NodeName(tree->AsJumpCC()->gtCondition)); + break; + default: assert(!"don't know how to display tree leaf node"); } @@ -10928,14 +11310,62 @@ void Compiler::gtDispTree(GenTreePtr tree, { printf(" (last use)"); } - if (tree->OperIsCopyBlkOp()) + if (tree->OperIsBlkOp()) + { + if (tree->OperIsCopyBlkOp()) + { + printf(" (copy)"); + } + else if (tree->OperIsInitBlkOp()) + { + printf(" (init)"); + } + if (tree->OperIsStoreBlk() && (tree->AsBlk()->gtBlkOpKind != GenTreeBlk::BlkOpKindInvalid)) + { + switch (tree->AsBlk()->gtBlkOpKind) + { + case GenTreeBlk::BlkOpKindRepInstr: + printf(" (RepInstr)"); + break; + case GenTreeBlk::BlkOpKindUnroll: + printf(" (Unroll)"); + break; + case GenTreeBlk::BlkOpKindHelper: + printf(" (Helper)"); + break; + default: + unreached(); + } + } + } + else if (tree->OperIsFieldList()) { - printf(" (copy)"); + printf(" %s at offset %d", varTypeName(tree->AsFieldList()->gtFieldType), + tree->AsFieldList()->gtFieldOffset); } - else if (tree->OperIsInitBlkOp()) +#if FEATURE_PUT_STRUCT_ARG_STK + else if ((tree->OperGet() == GT_PUTARG_STK) && + (tree->AsPutArgStk()->gtPutArgStkKind != GenTreePutArgStk::Kind::Invalid)) { - printf(" (init)"); + switch (tree->AsPutArgStk()->gtPutArgStkKind) + { + case GenTreePutArgStk::Kind::RepInstr: + printf(" (RepInstr)"); + break; + case GenTreePutArgStk::Kind::Unroll: + printf(" (Unroll)"); + break; + case GenTreePutArgStk::Kind::Push: + printf(" (Push)"); + break; + case GenTreePutArgStk::Kind::PushAllSlots: + printf(" (PushAllSlots)"); + break; + default: + unreached(); + } } +#endif // FEATURE_PUT_STRUCT_ARG_STK IndirectAssignmentAnnotation* pIndirAnnote; if (tree->gtOper == GT_ASG && GetIndirAssignMap()->Lookup(tree, &pIndirAnnote)) @@ -11282,7 +11712,7 @@ void Compiler::gtDispTree(GenTreePtr tree, // call - The call for which 'arg' is an argument // arg - The argument for which a message should be constructed // argNum - The ordinal number of the arg in the argument list -// listCount - When printing in LIR form this is the count for a multireg GT_LIST +// listCount - When printing in LIR form this is the count for a GT_FIELD_LIST // or -1 if we are not printing in LIR form // bufp - A pointer to the buffer into which the message is written // bufLength - The length of the buffer pointed to by bufp @@ -11338,7 +11768,7 @@ void Compiler::gtGetArgMsg( // call - The call for which 'arg' is an argument // argx - The argument for which a message should be constructed // lateArgIndex - The ordinal number of the arg in the lastArg list -// listCount - When printing in LIR form this is the count for a multireg GT_LIST +// listCount - When printing in LIR form this is the count for a multireg GT_FIELD_LIST // or -1 if we are not printing in LIR form // bufp - A pointer to the buffer into which the message is written // bufLength - The length of the buffer pointed to by bufp @@ -11542,22 +11972,8 @@ void Compiler::gtDispLIRNode(GenTree* node) const bool nodeIsCall = node->IsCall(); - int numCallEarlyArgs = 0; - if (nodeIsCall) - { - GenTreeCall* call = node->AsCall(); - for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest()) - { - if (!args->Current()->IsArgPlaceHolderNode() && args->Current()->IsValue()) - { - numCallEarlyArgs++; - } - } - } - // Visit operands - IndentInfo operandArc = IIArcTop; - int callArgNumber = 0; + IndentInfo operandArc = IIArcTop; for (GenTree* operand : node->Operands()) { if (operand->IsArgPlaceHolderNode() || !operand->IsValue()) @@ -11588,20 +12004,22 @@ void Compiler::gtDispLIRNode(GenTree* node) } else { - int callLateArgNumber = callArgNumber - numCallEarlyArgs; + fgArgTabEntryPtr curArgTabEntry = gtArgEntryByNode(call, operand); + assert(curArgTabEntry); + if (operand->OperGet() == GT_LIST) { int listIndex = 0; for (GenTreeArgList* element = operand->AsArgList(); element != nullptr; element = element->Rest()) { operand = element->Current(); - if (callLateArgNumber < 0) + if (curArgTabEntry->lateArgInx == (unsigned)-1) { - gtGetArgMsg(call, operand, callArgNumber, listIndex, buf, sizeof(buf)); + gtGetArgMsg(call, operand, curArgTabEntry->argNum, listIndex, buf, sizeof(buf)); } else { - gtGetLateArgMsg(call, operand, callLateArgNumber, listIndex, buf, sizeof(buf)); + gtGetLateArgMsg(call, operand, curArgTabEntry->lateArgInx, listIndex, buf, sizeof(buf)); } displayOperand(operand, buf, operandArc, indentStack); @@ -11610,19 +12028,17 @@ void Compiler::gtDispLIRNode(GenTree* node) } else { - if (callLateArgNumber < 0) + if (curArgTabEntry->lateArgInx == (unsigned)-1) { - gtGetArgMsg(call, operand, callArgNumber, -1, buf, sizeof(buf)); + gtGetArgMsg(call, operand, curArgTabEntry->argNum, -1, buf, sizeof(buf)); } else { - gtGetLateArgMsg(call, operand, callLateArgNumber, -1, buf, sizeof(buf)); + gtGetLateArgMsg(call, operand, curArgTabEntry->lateArgInx, -1, buf, sizeof(buf)); } displayOperand(operand, buf, operandArc, indentStack); } - - callArgNumber++; } } else if (node->OperIsDynBlkOp()) @@ -12315,9 +12731,6 @@ GenTreePtr Compiler::gtFoldExprConst(GenTreePtr tree) case TYP_ULONG: if (!(tree->gtFlags & GTF_UNSIGNED) && tree->gtOverflow() && i1 < 0) { - op1->ChangeOperConst(GT_CNS_NATIVELONG); // need type of oper to be same as tree - op1->gtType = TYP_LONG; - // We don't care about the value as we are throwing an exception goto LNG_OVF; } lval1 = UINT64(UINT32(i1)); @@ -12516,47 +12929,19 @@ GenTreePtr Compiler::gtFoldExprConst(GenTreePtr tree) // constants in a target-specific function. CLANG_FORMAT_COMMENT_ANCHOR; -#ifdef _TARGET_XARCH_ - // Don't fold conversions of +inf/-inf to integral value as the value returned by JIT helper - // doesn't match with the C compiler's cast result. + // Don't fold conversions of +inf/-inf to integral value on all platforms + // as the value returned by JIT helper doesn't match with the C compiler's cast result. + // We want the behavior to be same with or without folding. return tree; -#else //!_TARGET_XARCH_ + } - switch (tree->CastToType()) - { - case TYP_BYTE: - i1 = ssize_t(INT8(d1)); - goto CNS_INT; - case TYP_UBYTE: - i1 = ssize_t(UINT8(d1)); - goto CNS_INT; - case TYP_SHORT: - i1 = ssize_t(INT16(d1)); - goto CNS_INT; - case TYP_CHAR: - i1 = ssize_t(UINT16(d1)); - goto CNS_INT; - case TYP_INT: - i1 = ssize_t(INT32(d1)); - goto CNS_INT; - case TYP_UINT: - i1 = ssize_t(UINT32(d1)); - goto CNS_INT; - case TYP_LONG: - lval1 = INT64(d1); - goto CNS_LONG; - case TYP_ULONG: - lval1 = UINT64(d1); - goto CNS_LONG; - case TYP_FLOAT: - case TYP_DOUBLE: - if (op1->gtType == TYP_FLOAT) - d1 = forceCastToFloat(d1); // it's only !_finite() after this conversion - goto CNS_DOUBLE; - default: - unreached(); - } -#endif //!_TARGET_XARCH_ + if (d1 <= -1.0 && varTypeIsUnsigned(tree->CastToType())) + { + // Don't fold conversions of these cases becasue the result is unspecified per ECMA spec + // and the native math doing the fold doesn't match the run-time computation on all + // platforms. + // We want the behavior to be same with or without folding. + return tree; } switch (tree->CastToType()) @@ -12633,7 +13018,7 @@ GenTreePtr Compiler::gtFoldExprConst(GenTreePtr tree) return op2; } - if (tree->gtOper == GT_LIST) + if (tree->OperIsAnyList()) { return tree; } @@ -13621,8 +14006,8 @@ GenTreePtr Compiler::gtNewTempAssign(unsigned tmp, GenTreePtr val) var_types valTyp = val->TypeGet(); if (val->OperGet() == GT_LCL_VAR && lvaTable[val->gtLclVar.gtLclNum].lvNormalizeOnLoad()) { - valTyp = lvaGetRealType(val->gtLclVar.gtLclNum); - val = gtNewLclvNode(val->gtLclVar.gtLclNum, valTyp, val->gtLclVar.gtLclILoffs); + valTyp = lvaGetRealType(val->gtLclVar.gtLclNum); + val->gtType = valTyp; } var_types dstTyp = varDsc->TypeGet(); @@ -14108,7 +14493,7 @@ void Compiler::gtExtractSideEffList(GenTreePtr expr, // effect of this instruction, change it into a GT_LOCKADD node (the add only) if (oper == GT_XADD) { - expr->gtOper = GT_LOCKADD; + expr->SetOperRaw(GT_LOCKADD); expr->gtType = TYP_VOID; } @@ -14188,12 +14573,12 @@ void Compiler::gtExtractSideEffList(GenTreePtr expr, GenTreePtr args; for (args = expr->gtCall.gtCallArgs; args; args = args->gtOp.gtOp2) { - assert(args->IsList()); + assert(args->OperIsList()); gtExtractSideEffList(args->Current(), pList, flags); } for (args = expr->gtCall.gtCallLateArgs; args; args = args->gtOp.gtOp2) { - assert(args->IsList()); + assert(args->OperIsList()); gtExtractSideEffList(args->Current(), pList, flags); } } @@ -15356,11 +15741,18 @@ bool GenTree::isContained() const return false; } + // these either produce a result in register or set flags reg. + if (IsSIMDEqualityOrInequality()) + { + return false; + } + // TODO-Cleanup : this is not clean, would be nice to have some way of marking this. switch (OperGet()) { case GT_STOREIND: case GT_JTRUE: + case GT_JCC: case GT_RETURN: case GT_RETFILT: case GT_STORE_LCL_FLD: @@ -15381,7 +15773,9 @@ bool GenTree::isContained() const case GT_STORE_OBJ: case GT_STORE_DYN_BLK: case GT_SWITCH: +#ifndef LEGACY_BACKEND case GT_JMPTABLE: +#endif case GT_SWITCH_TABLE: case GT_SWAP: case GT_LCLHEAP: @@ -15928,6 +16322,17 @@ void GenTree::ParseArrayAddress( // TODO-Review: A NotAField here indicates a failure to properly maintain the field sequence // See test case self_host_tests_x86\jit\regression\CLR-x86-JIT\v1-m12-beta2\ b70992\ b70992.exe // Safest thing to do here is to drop back to MinOpts + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef DEBUG + if (comp->opts.optRepeat) + { + // We don't guarantee preserving these annotations through the entire optimizer, so + // just conservatively return null if under optRepeat. + *pArr = nullptr; + return; + } +#endif // DEBUG noway_assert(!"fldSeqIter is NotAField() in ParseArrayAddress"); } @@ -16446,24 +16851,6 @@ bool GenTree::isCommutativeSIMDIntrinsic() #endif // FEATURE_SIMD //--------------------------------------------------------------------------------------- -// GenTreeArgList::Prepend: -// Prepends an element to a GT_LIST. -// -// Arguments: -// compiler - The compiler context. -// element - The element to prepend. -// -// Returns: -// The new head of the list. -GenTreeArgList* GenTreeArgList::Prepend(Compiler* compiler, GenTree* element) -{ - GenTreeArgList* head = compiler->gtNewListNode(element, this); - head->gtFlags |= (gtFlags & GTF_LIST_AGGREGATE); - gtFlags &= ~GTF_LIST_AGGREGATE; - return head; -} - -//--------------------------------------------------------------------------------------- // InitializeStructReturnType: // Initialize the Return Type Descriptor for a method that returns a struct type // diff --git a/src/jit/gentree.h b/src/jit/gentree.h index 4efeeae620..4611d35465 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -68,7 +68,7 @@ enum SpecialCodeKind DECLARE_TYPED_ENUM(genTreeOps, BYTE) { -#define GTNODE(en, sn, cm, ok) GT_##en, +#define GTNODE(en, sn, st, cm, ok) GT_##en, #include "gtlist.h" GT_COUNT, @@ -429,13 +429,15 @@ struct GenTree noway_assert(FitsIn<unsigned char>(level)); gtFPlvl = (unsigned char)level; } -#else // FEATURE_STACK_FP_X87 +#else // FEATURE_STACK_FP_X87 + void gtCopyFPlvl(GenTree* other) { } void gtSetFPlvl(unsigned level) { } + #endif // FEATURE_STACK_FP_X87 // @@ -564,7 +566,7 @@ public: bool isContainedIntOrIImmed() const { - return isContained() && IsCnsIntOrI(); + return isContained() && IsCnsIntOrI() && !isContainedSpillTemp(); } bool isContainedFltOrDblImmed() const @@ -766,15 +768,15 @@ public: #ifdef LEGACY_BACKEND #define GTF_SPILLED_OPER 0x00000100 // op1 has been spilled #define GTF_SPILLED_OP2 0x00000200 // op2 has been spilled -#else +#else // !LEGACY_BACKEND #define GTF_NOREG_AT_USE 0x00000100 // tree node is in memory at the point of use -#endif // LEGACY_BACKEND +#endif // !LEGACY_BACKEND #define GTF_ZSF_SET 0x00000400 // the zero(ZF) and sign(SF) flags set to the operand -#if FEATURE_SET_FLAGS + #define GTF_SET_FLAGS 0x00000800 // Requires that codegen for this node set the flags // Use gtSetFlags() to check this flags -#endif + #define GTF_IND_NONFAULTING 0x00000800 // An indir that cannot fault. GTF_SET_FLAGS is not used on indirs #define GTF_MAKE_CSE 0x00002000 // Hoisted Expression: try hard to make this into CSE (see optPerformHoistExpr) @@ -865,12 +867,18 @@ public: #define GTF_IND_TLS_REF 0x08000000 // GT_IND -- the target is accessed via TLS #define GTF_IND_ASG_LHS 0x04000000 // GT_IND -- this GT_IND node is (the effective val) of the LHS of an // assignment; don't evaluate it independently. -#define GTF_IND_UNALIGNED 0x02000000 // GT_IND -- the load or store is unaligned (we assume worst case - // alignment of 1 byte) -#define GTF_IND_INVARIANT 0x01000000 // GT_IND -- the target is invariant (a prejit indirection) -#define GTF_IND_ARR_LEN 0x80000000 // GT_IND -- the indirection represents an array length (of the REF - // contribution to its argument). -#define GTF_IND_ARR_INDEX 0x00800000 // GT_IND -- the indirection represents an (SZ) array index +#define GTF_IND_REQ_ADDR_IN_REG GTF_IND_ASG_LHS // GT_IND -- requires its addr operand to be evaluated + // into a register. This flag is useful in cases where it + // is required to generate register indirect addressing mode. + // One such case is virtual stub calls on xarch. This is only + // valid in the backend, where GTF_IND_ASG_LHS is not necessary + // (all such indirections will be lowered to GT_STOREIND). +#define GTF_IND_UNALIGNED 0x02000000 // GT_IND -- the load or store is unaligned (we assume worst case + // alignment of 1 byte) +#define GTF_IND_INVARIANT 0x01000000 // GT_IND -- the target is invariant (a prejit indirection) +#define GTF_IND_ARR_LEN 0x80000000 // GT_IND -- the indirection represents an array length (of the REF + // contribution to its argument). +#define GTF_IND_ARR_INDEX 0x00800000 // GT_IND -- the indirection represents an (SZ) array index #define GTF_IND_FLAGS \ (GTF_IND_VOLATILE | GTF_IND_REFARR_LAYOUT | GTF_IND_TGTANYWHERE | GTF_IND_NONFAULTING | GTF_IND_TLS_REF | \ @@ -925,11 +933,12 @@ public: #define GTF_ICON_FIELD_OFF 0x08000000 // GT_CNS_INT -- constant is a field offset +#define GTF_ICON_SIMD_COUNT 0x04000000 // GT_CNS_INT -- constant is Vector<T>.Count + #define GTF_BLK_VOLATILE 0x40000000 // GT_ASG, GT_STORE_BLK, GT_STORE_OBJ, GT_STORE_DYNBLK // -- is a volatile block operation #define GTF_BLK_UNALIGNED 0x02000000 // GT_ASG, GT_STORE_BLK, GT_STORE_OBJ, GT_STORE_DYNBLK // -- is an unaligned block operation -#define GTF_BLK_INIT 0x01000000 // GT_ASG, GT_STORE_BLK, GT_STORE_OBJ, GT_STORE_DYNBLK -- is an init block operation #define GTF_OVERFLOW 0x10000000 // GT_ADD, GT_SUB, GT_MUL, - Need overflow check // GT_ASG_ADD, GT_ASG_SUB, @@ -942,10 +951,13 @@ public: #define GTF_ARRLEN_ARR_IDX 0x80000000 // GT_ARR_LENGTH -- Length which feeds into an array index expression -#define GTF_LIST_AGGREGATE 0x80000000 // GT_LIST -- Indicates that this list should be treated as an - // anonymous aggregate value (e.g. a multi-value argument). +#define GTF_FIELD_LIST_HEAD 0x80000000 // GT_FIELD_LIST -- Indicates that this is the first field in a list of + // struct fields constituting a single call argument. //---------------------------------------------------------------- +#define GTF_SIMD12_OP 0x80000000 // GT_SIMD -- Indicates that the operands need to be handled as SIMD12 + // even if they have been retyped as SIMD16. +//---------------------------------------------------------------- #define GTF_STMT_CMPADD 0x80000000 // GT_STMT -- added by compiler #define GTF_STMT_HAS_CSE 0x40000000 // GT_STMT -- CSE def or use was subsituted @@ -958,8 +970,10 @@ public: #define GTF_DEBUG_NODE_MORPHED 0x00000001 // the node has been morphed (in the global morphing phase) #define GTF_DEBUG_NODE_SMALL 0x00000002 #define GTF_DEBUG_NODE_LARGE 0x00000004 +#define GTF_DEBUG_NODE_CG_PRODUCED 0x00000008 // genProduceReg has been called on this node +#define GTF_DEBUG_NODE_CG_CONSUMED 0x00000010 // genConsumeReg has been called on this node -#define GTF_DEBUG_NODE_MASK 0x00000007 // These flags are all node (rather than operation) properties. +#define GTF_DEBUG_NODE_MASK 0x0000001F // These flags are all node (rather than operation) properties. #define GTF_DEBUG_VAR_CSE_REF 0x00800000 // GT_LCL_VAR -- This is a CSE LCL_VAR node #endif // defined(DEBUG) @@ -970,6 +984,8 @@ public: #ifdef DEBUG unsigned gtTreeID; unsigned gtSeqNum; // liveness traversal order within the current statement + + int gtUseNum; // use-ordered traversal within the function #endif static const unsigned short gtOperKindTable[]; @@ -1011,9 +1027,9 @@ public: return gtType != TYP_VOID; } - if (gtOper == GT_LIST) + if (gtOper == GT_FIELD_LIST) { - return (gtFlags & GTF_LIST_AGGREGATE) != 0; + return (gtFlags & GTF_FIELD_LIST_HEAD) != 0; } return true; @@ -1033,14 +1049,14 @@ public: return IsNothingNode(); case GT_ARGPLACE: - // ARGPLACE nodes may not be present in a block's LIR sequence, but they may + case GT_LIST: + // ARGPLACE and LIST nodes may not be present in a block's LIR sequence, but they may // be present as children of an LIR node. return (gtNext == nullptr) && (gtPrev == nullptr); - case GT_LIST: - // LIST nodes may only be present in an LIR sequence if they represent aggregates. - // They are always allowed, however, as children of an LIR node. - return ((gtFlags & GTF_LIST_AGGREGATE) != 0) || ((gtNext == nullptr) && (gtPrev == nullptr)); + case GT_FIELD_LIST: + // Only the head of the FIELD_LIST is present in the block's LIR sequence. + return (((gtFlags & GTF_FIELD_LIST_HEAD) != 0) || ((gtNext == nullptr) && (gtPrev == nullptr))); case GT_ADDR: { @@ -1130,6 +1146,21 @@ public: return (gtOper == GT_LEA); } + static bool OperIsInitVal(genTreeOps gtOper) + { + return (gtOper == GT_INIT_VAL); + } + + bool OperIsInitVal() const + { + return OperIsInitVal(OperGet()); + } + + bool IsConstInitVal() + { + return (gtOper == GT_CNS_INT) || (OperIsInitVal() && (gtGetOp1()->gtOper == GT_CNS_INT)); + } + bool OperIsBlkOp(); bool OperIsCopyBlkOp(); bool OperIsInitBlkOp(); @@ -1146,6 +1177,16 @@ public: return OperIsBlk(OperGet()); } + static bool OperIsDynBlk(genTreeOps gtOper) + { + return ((gtOper == GT_DYN_BLK) || (gtOper == GT_STORE_DYN_BLK)); + } + + bool OperIsDynBlk() const + { + return OperIsDynBlk(OperGet()); + } + static bool OperIsStoreBlk(genTreeOps gtOper) { return ((gtOper == GT_STORE_BLK) || (gtOper == GT_STORE_OBJ) || (gtOper == GT_STORE_DYN_BLK)); @@ -1206,7 +1247,7 @@ public: return OperIsLocalRead(OperGet()); } - bool OperIsCompare() + bool OperIsCompare() const { return (OperKind(gtOper) & GTK_RELOP) != 0; } @@ -1270,7 +1311,6 @@ public: { case GT_ADD_HI: case GT_SUB_HI: - case GT_MUL_HI: case GT_DIV_HI: case GT_MOD_HI: return true; @@ -1396,8 +1436,7 @@ public: static bool OperIsStore(genTreeOps gtOper) { return (gtOper == GT_STOREIND || gtOper == GT_STORE_LCL_VAR || gtOper == GT_STORE_LCL_FLD || - gtOper == GT_STORE_CLS_VAR || gtOper == GT_STORE_BLK || gtOper == GT_STORE_OBJ || - gtOper == GT_STORE_DYN_BLK); + gtOper == GT_STORE_BLK || gtOper == GT_STORE_OBJ || gtOper == GT_STORE_DYN_BLK); } static bool OperIsAtomicOp(genTreeOps gtOper) @@ -1425,9 +1464,34 @@ public: return OperIsSIMD(gtOper); } - bool OperIsAggregate() + bool OperIsFieldListHead() + { + return (gtOper == GT_FIELD_LIST) && ((gtFlags & GTF_FIELD_LIST_HEAD) != 0); + } + + bool OperIsConditionalJump() const + { + return (gtOper == GT_JTRUE) || (gtOper == GT_JCC); + } + + static bool OperIsBoundsCheck(genTreeOps op) + { + if (op == GT_ARR_BOUNDS_CHECK) + { + return true; + } +#ifdef FEATURE_SIMD + if (op == GT_SIMD_CHK) + { + return true; + } +#endif // FEATURE_SIMD + return false; + } + + bool OperIsBoundsCheck() const { - return (gtOper == GT_LIST) && ((gtFlags & GTF_LIST_AGGREGATE) != 0); + return OperIsBoundsCheck(OperGet()); } // Requires that "op" is an op= operator. Returns @@ -1462,6 +1526,7 @@ public: switch (gtOper) { case GT_LIST: + case GT_FIELD_LIST: case GT_INTRINSIC: case GT_LEA: #ifdef FEATURE_SIMD @@ -1474,19 +1539,47 @@ public: } static inline bool RequiresNonNullOp2(genTreeOps oper); - bool IsListForMultiRegArg(); + bool IsValidCallArgument(); #endif // DEBUG inline bool IsFPZero(); inline bool IsIntegralConst(ssize_t constVal); + inline bool IsIntegralConstVector(ssize_t constVal); inline bool IsBoxedValue(); - bool IsList() const + inline bool IsSIMDEqualityOrInequality() const; + + static bool OperIsList(genTreeOps gtOper) { return gtOper == GT_LIST; } + bool OperIsList() const + { + return OperIsList(gtOper); + } + + static bool OperIsFieldList(genTreeOps gtOper) + { + return gtOper == GT_FIELD_LIST; + } + + bool OperIsFieldList() const + { + return OperIsFieldList(gtOper); + } + + static bool OperIsAnyList(genTreeOps gtOper) + { + return OperIsList(gtOper) || OperIsFieldList(gtOper); + } + + bool OperIsAnyList() const + { + return OperIsAnyList(gtOper); + } + inline GenTreePtr MoveNext(); inline GenTreePtr Current(); @@ -1508,6 +1601,8 @@ public: // Get the parent of this node, and optionally capture the pointer to the child so that it can be modified. GenTreePtr gtGetParent(GenTreePtr** parentChildPtrPtr); + void ReplaceOperand(GenTree** useEdge, GenTree* replacement); + inline GenTreePtr gtEffectiveVal(bool commaOnly = false); // Return the child of this node if it is a GT_RELOAD or GT_COPY; otherwise simply return the node itself @@ -1536,7 +1631,13 @@ public: public: #if SMALL_TREE_NODES static unsigned char s_gtNodeSizes[]; +#if NODEBASH_STATS || MEASURE_NODE_SIZE || COUNT_AST_OPERS + static unsigned char s_gtTrueSizes[]; +#endif +#if COUNT_AST_OPERS + static LONG s_gtNodeCounts[]; #endif +#endif // SMALL_TREE_NODES static void InitNodeSize(); @@ -1555,15 +1656,19 @@ public: static bool Compare(GenTreePtr op1, GenTreePtr op2, bool swapOK = false); //--------------------------------------------------------------------- -#ifdef DEBUG - //--------------------------------------------------------------------- +#if defined(DEBUG) static const char* NodeName(genTreeOps op); +#endif +#if defined(DEBUG) || NODEBASH_STATS || MEASURE_NODE_SIZE || COUNT_AST_OPERS static const char* OpName(genTreeOps op); +#endif -//--------------------------------------------------------------------- +#if MEASURE_NODE_SIZE && SMALL_TREE_NODES + static const char* OpStructName(genTreeOps op); #endif + //--------------------------------------------------------------------- bool IsNothingNode() const; @@ -1583,6 +1688,7 @@ public: // set gtOper and only keep GTF_COMMON_MASK flags void ChangeOper(genTreeOps oper, ValueNumberUpdate vnUpdate = CLEAR_VN); void ChangeOperUnchecked(genTreeOps oper); + void SetOperRaw(genTreeOps oper); void ChangeType(var_types newType) { @@ -1597,6 +1703,20 @@ public: } } +#if SMALL_TREE_NODES +#if NODEBASH_STATS + static void RecordOperBashing(genTreeOps operOld, genTreeOps operNew); + static void ReportOperBashing(FILE* fp); +#else + static void RecordOperBashing(genTreeOps operOld, genTreeOps operNew) + { /* do nothing */ + } + static void ReportOperBashing(FILE* fp) + { /* do nothing */ + } +#endif +#endif + bool IsLocal() const { return OperIsLocal(OperGet()); @@ -1777,6 +1897,14 @@ public: bool gtOverflowEx() const; bool gtSetFlags() const; bool gtRequestSetFlags(); + + // Returns true if the codegen of this tree node + // sets ZF and SF flags. + bool gtSetZSFlags() const + { + return (gtFlags & GTF_ZSF_SET) != 0; + } + #ifdef DEBUG bool gtIsValid64RsltMul(); static int gtDispFlags(unsigned flags, unsigned debugFlags); @@ -1827,10 +1955,10 @@ public: // Returns an iterator that will produce the use edge to each operand of this node. Differs // from the sequence of nodes produced by a loop over `GetChild` in its handling of call, phi, // and block op nodes. - GenTreeUseEdgeIterator GenTree::UseEdgesBegin(); - GenTreeUseEdgeIterator GenTree::UseEdgesEnd(); + GenTreeUseEdgeIterator UseEdgesBegin(); + GenTreeUseEdgeIterator UseEdgesEnd(); - IteratorPair<GenTreeUseEdgeIterator> GenTree::UseEdges(); + IteratorPair<GenTreeUseEdgeIterator> UseEdges(); // Returns an iterator that will produce each operand of this node. Differs from the sequence // of nodes produced by a loop over `GetChild` in its handling of call, phi, and block op @@ -1866,6 +1994,10 @@ public: gtFlags &= ~GTF_REUSE_REG_VAL; } +#if MEASURE_NODE_SIZE + static void DumpNodeSizes(FILE* fp); +#endif + #ifdef DEBUG private: @@ -1931,7 +2063,7 @@ class GenTreeUseEdgeIterator final #ifdef FEATURE_SIMD void MoveToNextSIMDUseEdge(); #endif - void MoveToNextAggregateUseEdge(); + void MoveToNextFieldUseEdge(); public: GenTreeUseEdgeIterator(); @@ -2128,7 +2260,7 @@ struct GenTreeIntConCommon : public GenTree } bool ImmedValNeedsReloc(Compiler* comp); - bool GenTreeIntConCommon::ImmedValCanBeFolded(Compiler* comp, genTreeOps op); + bool ImmedValCanBeFolded(Compiler* comp, genTreeOps op); #ifdef _TARGET_XARCH_ bool FitsInAddrBase(Compiler* comp); @@ -2629,18 +2761,13 @@ struct GenTreeField : public GenTree // method names for the arguments. struct GenTreeArgList : public GenTreeOp { - bool IsAggregate() const - { - return (gtFlags & GTF_LIST_AGGREGATE) != 0; - } - GenTreePtr& Current() { return gtOp1; } GenTreeArgList*& Rest() { - assert(gtOp2 == nullptr || gtOp2->OperGet() == GT_LIST); + assert(gtOp2 == nullptr || gtOp2->OperIsAnyList()); return *reinterpret_cast<GenTreeArgList**>(>Op2); } @@ -2654,20 +2781,68 @@ struct GenTreeArgList : public GenTreeOp { } - GenTreeArgList(GenTreePtr arg, GenTreeArgList* rest) : GenTreeOp(GT_LIST, TYP_VOID, arg, rest) + GenTreeArgList(GenTreePtr arg, GenTreeArgList* rest) : GenTreeArgList(GT_LIST, arg, rest) { - // With structs passed in multiple args we could have an arg - // GT_LIST containing a list of LCL_FLDs, see IsListForMultiRegArg() - // - assert((arg != nullptr) && ((!arg->IsList()) || (arg->IsListForMultiRegArg()))); + } + + GenTreeArgList(genTreeOps oper, GenTreePtr arg, GenTreeArgList* rest) : GenTreeOp(oper, TYP_VOID, arg, rest) + { + assert(OperIsAnyList(oper)); + assert((arg != nullptr) && arg->IsValidCallArgument()); gtFlags |= arg->gtFlags & GTF_ALL_EFFECT; if (rest != nullptr) { gtFlags |= rest->gtFlags & GTF_ALL_EFFECT; } } +}; + +// Represents a list of fields constituting a struct, when it is passed as an argument. +// The first field of the struct is marked with the GTF_FIELD_LIST_HEAD flag, and +// in LIR form it is the only member of the list that is threaded into the execution +// order. +// It differs from the GenTreeArgList in a couple of ways: +// - The entire list represents a single argument. +// - It contains additional fields to provide the offset and type of the field. +// +struct GenTreeFieldList : public GenTreeArgList +{ + unsigned gtFieldOffset; + var_types gtFieldType; + + bool IsFieldListHead() const + { + return (gtFlags & GTF_FIELD_LIST_HEAD) != 0; + } - GenTreeArgList* Prepend(Compiler* compiler, GenTree* element); +#if DEBUGGABLE_GENTREE + GenTreeFieldList() : GenTreeArgList() + { + } +#endif + + GenTreeFieldList*& Rest() + { + assert(gtOp2 == nullptr || gtOp2->OperGet() == GT_FIELD_LIST); + return *reinterpret_cast<GenTreeFieldList**>(>Op2); + } + + GenTreeFieldList(GenTreePtr arg, unsigned fieldOffset, var_types fieldType, GenTreeFieldList* prevList) + : GenTreeArgList(GT_FIELD_LIST, arg, nullptr) + { + // While GT_FIELD_LIST can be in a GT_LIST, GT_FIELD_LISTs cannot be nested or have GT_LISTs. + assert(!arg->OperIsAnyList()); + gtFieldOffset = fieldOffset; + gtFieldType = fieldType; + if (prevList == nullptr) + { + gtFlags |= GTF_FIELD_LIST_HEAD; + } + else + { + prevList->gtOp2 = this; + } + } }; // There was quite a bit of confusion in the code base about which of gtOp1 and gtOp2 was the @@ -3360,8 +3535,13 @@ struct GenTreeCall final : public GenTree bool IsHelperCall(Compiler* compiler, unsigned helper) const; + void ReplaceCallOperand(GenTree** operandUseEdge, GenTree* replacement); + + bool AreArgsComplete() const; + GenTreeCall(var_types type) : GenTree(GT_CALL, type) { + fgArgInfo = nullptr; } #if DEBUGGABLE_GENTREE GenTreeCall() : GenTree() @@ -4017,6 +4197,19 @@ struct GenTreeObj : public GenTreeBlk // Let's assert it just to be safe. noway_assert(roundUp(gtBlkSize, REGSIZE_BYTES) == gtBlkSize); } + else + { + genTreeOps newOper = GT_BLK; + if (gtOper == GT_STORE_OBJ) + { + newOper = GT_STORE_BLK; + } + else + { + assert(gtOper == GT_OBJ); + } + SetOper(newOper); + } } void CopyGCInfo(GenTreeObj* srcObj) @@ -4068,6 +4261,8 @@ public: GenTreeDynBlk(GenTreePtr addr, GenTreePtr dynamicSize) : GenTreeBlk(GT_DYN_BLK, TYP_STRUCT, addr, 0), gtDynamicSize(dynamicSize), gtEvalSizeFirst(false) { + // Conservatively the 'addr' could be null or point into the global heap. + gtFlags |= GTF_EXCEPT | GTF_GLOB_REF; gtFlags |= (dynamicSize->gtFlags & GTF_ALL_EFFECT); } @@ -4198,10 +4393,7 @@ struct GenTreeStmt : public GenTree GenTreePtr gtStmtExpr; // root of the expression tree GenTreePtr gtStmtList; // first node (for forward walks) InlineContext* gtInlineContext; // The inline context for this statement. - -#if defined(DEBUGGING_SUPPORT) || defined(DEBUG) - IL_OFFSETX gtStmtILoffsx; // instr offset (if available) -#endif + IL_OFFSETX gtStmtILoffsx; // instr offset (if available) #ifdef DEBUG IL_OFFSET gtStmtLastILoffs; // instr offset at end of stmt @@ -4240,9 +4432,7 @@ struct GenTreeStmt : public GenTree , gtStmtExpr(expr) , gtStmtList(nullptr) , gtInlineContext(nullptr) -#if defined(DEBUGGING_SUPPORT) || defined(DEBUG) , gtStmtILoffsx(offset) -#endif #ifdef DEBUG , gtStmtLastILoffs(BAD_IL_OFFSET) #endif @@ -4350,20 +4540,19 @@ struct GenTreePutArgStk : public GenTreeUnOp GenTreePutArgStk(genTreeOps oper, var_types type, - unsigned slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots) - FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct), + unsigned slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(unsigned numSlots) + PUT_STRUCT_ARG_STK_ONLY_ARG(bool isStruct), bool _putInIncomingArgArea = false DEBUGARG(GenTreePtr callNode = nullptr) DEBUGARG(bool largeNode = false)) : GenTreeUnOp(oper, type DEBUGARG(largeNode)) , gtSlotNum(slotNum) , putInIncomingArgArea(_putInIncomingArgArea) -#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING - , gtPutArgStkKind(PutArgStkKindInvalid) +#ifdef FEATURE_PUT_STRUCT_ARG_STK + , gtPutArgStkKind(Kind::Invalid) , gtNumSlots(numSlots) - , gtIsStruct(isStruct) , gtNumberReferenceSlots(0) , gtGcPtrs(nullptr) -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING +#endif // FEATURE_PUT_STRUCT_ARG_STK { #ifdef DEBUG gtCall = callNode; @@ -4373,20 +4562,18 @@ struct GenTreePutArgStk : public GenTreeUnOp GenTreePutArgStk(genTreeOps oper, var_types type, GenTreePtr op1, - unsigned slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots) - FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct), + unsigned slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(unsigned numSlots), bool _putInIncomingArgArea = false DEBUGARG(GenTreePtr callNode = nullptr) DEBUGARG(bool largeNode = false)) : GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode)) , gtSlotNum(slotNum) , putInIncomingArgArea(_putInIncomingArgArea) -#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING - , gtPutArgStkKind(PutArgStkKindInvalid) +#ifdef FEATURE_PUT_STRUCT_ARG_STK + , gtPutArgStkKind(Kind::Invalid) , gtNumSlots(numSlots) - , gtIsStruct(isStruct) , gtNumberReferenceSlots(0) , gtGcPtrs(nullptr) -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING +#endif // FEATURE_PUT_STRUCT_ARG_STK { #ifdef DEBUG gtCall = callNode; @@ -4397,18 +4584,16 @@ struct GenTreePutArgStk : public GenTreeUnOp GenTreePutArgStk(genTreeOps oper, var_types type, - unsigned slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots) - FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct) DEBUGARG(GenTreePtr callNode = NULL) - DEBUGARG(bool largeNode = false)) + unsigned slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(unsigned numSlots) + DEBUGARG(GenTreePtr callNode = NULL) DEBUGARG(bool largeNode = false)) : GenTreeUnOp(oper, type DEBUGARG(largeNode)) , gtSlotNum(slotNum) -#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING - , gtPutArgStkKind(PutArgStkKindInvalid) +#ifdef FEATURE_PUT_STRUCT_ARG_STK + , gtPutArgStkKind(Kind::Invalid) , gtNumSlots(numSlots) - , gtIsStruct(isStruct) , gtNumberReferenceSlots(0) , gtGcPtrs(nullptr) -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING +#endif // FEATURE_PUT_STRUCT_ARG_STK { #ifdef DEBUG gtCall = callNode; @@ -4418,18 +4603,16 @@ struct GenTreePutArgStk : public GenTreeUnOp GenTreePutArgStk(genTreeOps oper, var_types type, GenTreePtr op1, - unsigned slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots) - FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct) DEBUGARG(GenTreePtr callNode = NULL) - DEBUGARG(bool largeNode = false)) + unsigned slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(unsigned numSlots) + DEBUGARG(GenTreePtr callNode = NULL) DEBUGARG(bool largeNode = false)) : GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode)) , gtSlotNum(slotNum) -#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING - , gtPutArgStkKind(PutArgStkKindInvalid) +#ifdef FEATURE_PUT_STRUCT_ARG_STK + , gtPutArgStkKind(Kind::Invalid) , gtNumSlots(numSlots) - , gtIsStruct(isStruct) , gtNumberReferenceSlots(0) , gtGcPtrs(nullptr) -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING +#endif // FEATURE_PUT_STRUCT_ARG_STK { #ifdef DEBUG gtCall = callNode; @@ -4442,14 +4625,14 @@ struct GenTreePutArgStk : public GenTreeUnOp return gtSlotNum * TARGET_POINTER_SIZE; } -#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +#ifdef FEATURE_PUT_STRUCT_ARG_STK unsigned getArgSize() { return gtNumSlots * TARGET_POINTER_SIZE; } -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING +#endif // FEATURE_PUT_STRUCT_ARG_STK -#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +#ifdef FEATURE_PUT_STRUCT_ARG_STK //------------------------------------------------------------------------ // setGcPointers: Sets the number of references and the layout of the struct object returned by the VM. // @@ -4471,27 +4654,32 @@ struct GenTreePutArgStk : public GenTreeUnOp gtNumberReferenceSlots = numPointers; gtGcPtrs = pointers; } -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING +#endif // FEATURE_PUT_STRUCT_ARG_STK #ifdef DEBUG GenTreePtr gtCall; // the call node to which this argument belongs #endif -#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +#ifdef FEATURE_PUT_STRUCT_ARG_STK // Instruction selection: during codegen time, what code sequence we will be using // to encode this operation. + // TODO-Throughput: The following information should be obtained from the child + // block node. - enum PutArgStkKind : __int8{ - PutArgStkKindInvalid, PutArgStkKindRepInstr, PutArgStkKindUnroll, + enum class Kind : __int8{ + Invalid, RepInstr, Unroll, Push, PushAllSlots, }; - PutArgStkKind gtPutArgStkKind; + Kind gtPutArgStkKind; + bool isPushKind() + { + return (gtPutArgStkKind == Kind::Push) || (gtPutArgStkKind == Kind::PushAllSlots); + } unsigned gtNumSlots; // Number of slots for the argument to be passed on stack - bool gtIsStruct; // This stack arg is a struct. unsigned gtNumberReferenceSlots; // Number of reference slots. BYTE* gtGcPtrs; // gcPointers -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING +#endif // FEATURE_PUT_STRUCT_ARG_STK #if DEBUGGABLE_GENTREE GenTreePutArgStk() : GenTreeUnOp() @@ -4644,6 +4832,23 @@ struct GenTreeAllocObj final : public GenTreeUnOp #endif }; +struct GenTreeJumpCC final : public GenTree +{ + genTreeOps gtCondition; // any relop + + GenTreeJumpCC(genTreeOps condition) + : GenTree(GT_JCC, TYP_VOID DEBUGARG(/*largeNode*/ FALSE)), gtCondition(condition) + { + assert(OperIsCompare(condition)); + } + +#if DEBUGGABLE_GENTREE + GenTreeJumpCC() : GenTree() + { + } +#endif // DEBUGGABLE_GENTREE +}; + //------------------------------------------------------------------------ // Deferred inline functions of GenTree -- these need the subtypes above to // be defined already. @@ -4673,34 +4878,31 @@ inline bool GenTree::OperIsDynBlkOp() return false; } -inline bool GenTree::OperIsCopyBlkOp() +inline bool GenTree::OperIsInitBlkOp() { - if (gtOper == GT_ASG) + if (!OperIsBlkOp()) { - return (varTypeIsStruct(gtGetOp1()) && ((gtFlags & GTF_BLK_INIT) == 0)); + return false; } #ifndef LEGACY_BACKEND - else if (OperIsStoreBlk()) - { - return ((gtFlags & GTF_BLK_INIT) == 0); - } -#endif - return false; -} - -inline bool GenTree::OperIsInitBlkOp() -{ + GenTree* src; if (gtOper == GT_ASG) { - return (varTypeIsStruct(gtGetOp1()) && ((gtFlags & GTF_BLK_INIT) != 0)); + src = gtGetOp2(); } -#ifndef LEGACY_BACKEND - else if (OperIsStoreBlk()) + else { - return ((gtFlags & GTF_BLK_INIT) != 0); + src = AsBlk()->Data()->gtSkipReloadOrCopy(); } -#endif - return false; +#else // LEGACY_BACKEND + GenTree* src = gtGetOp2(); +#endif // LEGACY_BACKEND + return src->OperIsInitVal() || src->OperIsConst(); +} + +inline bool GenTree::OperIsCopyBlkOp() +{ + return OperIsBlkOp() && !OperIsInitBlkOp(); } //------------------------------------------------------------------------ @@ -4748,34 +4950,63 @@ inline bool GenTree::IsIntegralConst(ssize_t constVal) return false; } +//------------------------------------------------------------------- +// IsIntegralConstVector: returns true if this this is a SIMD vector +// with all its elements equal to an integral constant. +// +// Arguments: +// constVal - const value of vector element +// +// Returns: +// True if this represents an integral const SIMD vector. +// +inline bool GenTree::IsIntegralConstVector(ssize_t constVal) +{ +#ifdef FEATURE_SIMD + // SIMDIntrinsicInit intrinsic with a const value as initializer + // represents a const vector. + if ((gtOper == GT_SIMD) && (gtSIMD.gtSIMDIntrinsicID == SIMDIntrinsicInit) && gtGetOp1()->IsIntegralConst(constVal)) + { + assert(varTypeIsIntegral(gtSIMD.gtSIMDBaseType)); + assert(gtGetOp2() == nullptr); + return true; + } +#endif + + return false; +} + inline bool GenTree::IsBoxedValue() { assert(gtOper != GT_BOX || gtBox.BoxOp() != nullptr); return (gtOper == GT_BOX) && (gtFlags & GTF_BOX_VALUE); } +inline bool GenTree::IsSIMDEqualityOrInequality() const +{ +#ifdef FEATURE_SIMD + if (gtOper == GT_SIMD) + { + // Has to cast away const-ness since AsSIMD() method is non-const. + GenTreeSIMD* simdNode = const_cast<GenTree*>(this)->AsSIMD(); + return (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality || + simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality); + } +#endif + + return false; +} + inline GenTreePtr GenTree::MoveNext() { - assert(IsList()); + assert(OperIsAnyList()); return gtOp.gtOp2; } #ifdef DEBUG //------------------------------------------------------------------------ -// IsListForMultiRegArg: Given an GenTree node that represents an argument -// enforce (or don't enforce) the following invariant. -// -// For LEGACY_BACKEND or architectures that don't support MultiReg args -// we don't allow a GT_LIST at all. -// -// Currently for AMD64 UNIX we allow a limited case where a GT_LIST is -// allowed but every element must be a GT_LCL_FLD. -// -// For the future targets that allow for Multireg args (and this includes -// the current ARM64 target) we allow a GT_LIST of arbitrary nodes, these -// would typically start out as GT_LCL_VARs or GT_LCL_FLDS or GT_INDs, -// but could be changed into constants or GT_COMMA trees by the later -// optimization phases. +// IsValidCallArgument: Given an GenTree node that represents an argument +// enforce (or don't enforce) the following invariant. // // Arguments: // instance method for a GenTree node @@ -4784,33 +5015,46 @@ inline GenTreePtr GenTree::MoveNext() // true: the GenTree node is accepted as a valid argument // false: the GenTree node is not accepted as a valid argumeny // -inline bool GenTree::IsListForMultiRegArg() +// Notes: +// For targets that don't support arguments as a list of fields, we do not support GT_FIELD_LIST. +// +// Currently for AMD64 UNIX we allow a limited case where a GT_FIELD_LIST is +// allowed but every element must be a GT_LCL_FLD. +// +// For the future targets that allow for Multireg args (and this includes the current ARM64 target), +// or that allow for passing promoted structs, we allow a GT_FIELD_LIST of arbitrary nodes. +// These would typically start out as GT_LCL_VARs or GT_LCL_FLDS or GT_INDs, +// but could be changed into constants or GT_COMMA trees by the later +// optimization phases. + +inline bool GenTree::IsValidCallArgument() { - if (!IsList()) + if (OperIsList()) { - // We don't have a GT_LIST, so just return true. - return true; + // GT_FIELD_LIST is the only list allowed. + return false; } - else // We do have a GT_LIST + if (OperIsFieldList()) { -#if defined(LEGACY_BACKEND) || !FEATURE_MULTIREG_ARGS - - // Not allowed to have a GT_LIST for an argument - // unless we have a RyuJIT backend and FEATURE_MULTIREG_ARGS +#if defined(LEGACY_BACKEND) || (!FEATURE_MULTIREG_ARGS && !FEATURE_PUT_STRUCT_ARG_STK) + // Not allowed to have a GT_FIELD_LIST for an argument + // unless we have a RyuJIT backend and FEATURE_MULTIREG_ARGS or FEATURE_PUT_STRUCT_ARG_STK return false; -#else // we have RyuJIT backend and FEATURE_MULTIREG_ARGS +#else // we have RyuJIT backend and FEATURE_MULTIREG_ARGS or FEATURE_PUT_STRUCT_ARG_STK #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING - // For UNIX ABI we currently only allow a GT_LIST of GT_LCL_FLDs nodes + // For UNIX ABI we currently only allow a GT_FIELD_LIST of GT_LCL_FLDs nodes GenTree* gtListPtr = this; while (gtListPtr != nullptr) { // ToDo: fix UNIX_AMD64 so that we do not generate this kind of a List // Note the list as currently created is malformed, as the last entry is a nullptr if (gtListPtr->Current() == nullptr) + { break; + } // Only a list of GT_LCL_FLDs is allowed if (gtListPtr->Current()->OperGet() != GT_LCL_FLD) @@ -4821,25 +5065,27 @@ inline bool GenTree::IsListForMultiRegArg() } #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING - // Note that for non-UNIX ABI the GT_LIST may contain any node + // Note that for non-UNIX ABI the GT_FIELD_LIST may contain any node // - // We allow this GT_LIST as an argument + // We allow this GT_FIELD_LIST as an argument return true; -#endif // RyuJIT backend and FEATURE_MULTIREG_ARGS +#endif // FEATURE_MULTIREG_ARGS } + // We don't have either kind of list, so it satisfies the invariant. + return true; } #endif // DEBUG inline GenTreePtr GenTree::Current() { - assert(IsList()); + assert(OperIsAnyList()); return gtOp.gtOp1; } inline GenTreePtr* GenTree::pCurrent() { - assert(IsList()); + assert(OperIsAnyList()); return &(gtOp.gtOp1); } @@ -4917,23 +5163,22 @@ inline GenTreePtr GenTree::gtGetOp2() inline GenTreePtr GenTree::gtEffectiveVal(bool commaOnly) { - switch (gtOper) + GenTree* effectiveVal = this; + for (;;) { - case GT_COMMA: - return gtOp.gtOp2->gtEffectiveVal(commaOnly); - - case GT_NOP: - if (!commaOnly && gtOp.gtOp1 != nullptr) - { - return gtOp.gtOp1->gtEffectiveVal(); - } - break; - - default: - break; + if (effectiveVal->gtOper == GT_COMMA) + { + effectiveVal = effectiveVal->gtOp.gtOp2; + } + else if (!commaOnly && (effectiveVal->gtOper == GT_NOP) && (effectiveVal->gtOp.gtOp1 != nullptr)) + { + effectiveVal = effectiveVal->gtOp.gtOp1; + } + else + { + return effectiveVal; + } } - - return this; } inline GenTree* GenTree::gtSkipReloadOrCopy() diff --git a/src/jit/gschecks.cpp b/src/jit/gschecks.cpp index 43cbb892e9..9255d8fd36 100644 --- a/src/jit/gschecks.cpp +++ b/src/jit/gschecks.cpp @@ -40,9 +40,9 @@ const unsigned NO_SHADOW_COPY = UINT_MAX; * The current function has an unsafe buffer on the stack. Search for vulnerable * parameters which could be used to modify a code address and take over the process * in the case of a buffer overrun. Create a safe local copy for each vulnerable parameter, - * which will be allocated bellow the unsafe buffer. Change uses of the param to the + * which will be allocated bellow the unsafe buffer. Change uses of the param to the * shadow copy. - * + * * A pointer under indirection is considered vulnerable. A malicious user could read from * protected memory or write to it. If a parameter is assigned/computed into another variable, * and is a pointer (i.e., under indirection), then we consider the variable to be part of the @@ -58,7 +58,7 @@ void Compiler::gsCopyShadowParams() // Allocate array for shadow param info gsShadowVarInfo = new (this, CMK_Unknown) ShadowParamVarInfo[lvaCount](); - // Find groups of variables assigned to each other, and also + // Find groups of variables assigned to each other, and also // tracks variables which are dereferenced and marks them as ptrs. // Look for assignments to *p, and ptrs passed to functions if (gsFindVulnerableParams()) @@ -83,7 +83,7 @@ struct MarkPtrsInfo { printf( "[MarkPtrsInfo] = {comp = %p, lvAssignDef = %d, isAssignSrc = %d, isUnderIndir = %d, skipNextNode = %d}\n", - comp, lvAssignDef, isAssignSrc, isUnderIndir, skipNextNode); + comp, lvAssignDef, isAssignSrc, isUnderIndir, skipNextNode); } #endif }; @@ -129,7 +129,7 @@ Compiler::fgWalkResult Compiler::gsMarkPtrsAndAssignGroups(GenTreePtr* pTree, fg newState.isUnderIndir = true; { newState.skipNextNode = true; // Don't have to worry about which kind of node we're dealing with - comp->fgWalkTreePre(&tree, comp->gsMarkPtrsAndAssignGroups, (void *)&newState); + comp->fgWalkTreePre(&tree, comp->gsMarkPtrsAndAssignGroups, (void*)&newState); } return WALK_SKIP_SUBTREES; @@ -160,50 +160,50 @@ Compiler::fgWalkResult Compiler::gsMarkPtrsAndAssignGroups(GenTreePtr* pTree, fg { shadowVarInfo[pState->lvAssignDef].assignGroup->bitVectSet(lclNum); } - + // Point both to the same bit vector shadowVarInfo[lclNum].assignGroup = shadowVarInfo[pState->lvAssignDef].assignGroup; } else if (shadowVarInfo[lclNum].assignGroup) { shadowVarInfo[lclNum].assignGroup->bitVectSet(pState->lvAssignDef); - + // Point both to the same bit vector shadowVarInfo[pState->lvAssignDef].assignGroup = shadowVarInfo[lclNum].assignGroup; } else { - FixedBitVect* bv = FixedBitVect::bitVectInit(pState->comp->lvaCount, pState->comp); + FixedBitVect* bv = FixedBitVect::bitVectInit(pState->comp->lvaCount, pState->comp); // (shadowVarInfo[pState->lvAssignDef] == NULL && shadowVarInfo[lclNew] == NULL); // Neither of them has an assign group yet. Make a new one. shadowVarInfo[pState->lvAssignDef].assignGroup = bv; - shadowVarInfo[lclNum].assignGroup = bv; + shadowVarInfo[lclNum].assignGroup = bv; bv->bitVectSet(pState->lvAssignDef); bv->bitVectSet(lclNum); } } return WALK_CONTINUE; - + // Calls - Mark arg variables case GT_CALL: newState.isUnderIndir = false; - newState.isAssignSrc = false; + newState.isAssignSrc = false; { if (tree->gtCall.gtCallObjp) { newState.isUnderIndir = true; - comp->fgWalkTreePre(&tree->gtCall.gtCallObjp, gsMarkPtrsAndAssignGroups, (void*)&newState); + comp->fgWalkTreePre(&tree->gtCall.gtCallObjp, gsMarkPtrsAndAssignGroups, (void*)&newState); } for (GenTreeArgList* args = tree->gtCall.gtCallArgs; args; args = args->Rest()) { - comp->fgWalkTreePre(&args->Current(), gsMarkPtrsAndAssignGroups, (void*)&newState); + comp->fgWalkTreePre(&args->Current(), gsMarkPtrsAndAssignGroups, (void*)&newState); } for (GenTreeArgList* args = tree->gtCall.gtCallLateArgs; args; args = args->Rest()) { - comp->fgWalkTreePre(&args->Current(), gsMarkPtrsAndAssignGroups, (void*)&newState); + comp->fgWalkTreePre(&args->Current(), gsMarkPtrsAndAssignGroups, (void*)&newState); } if (tree->gtCall.gtCallType == CT_INDIRECT) @@ -213,7 +213,7 @@ Compiler::fgWalkResult Compiler::gsMarkPtrsAndAssignGroups(GenTreePtr* pTree, fg // A function pointer is treated like a write-through pointer since // it controls what code gets executed, and so indirectly can cause // a write to memory. - comp->fgWalkTreePre(&tree->gtCall.gtCallAddr, gsMarkPtrsAndAssignGroups, (void*)&newState); + comp->fgWalkTreePre(&tree->gtCall.gtCallAddr, gsMarkPtrsAndAssignGroups, (void*)&newState); } } return WALK_SKIP_SUBTREES; @@ -223,7 +223,7 @@ Compiler::fgWalkResult Compiler::gsMarkPtrsAndAssignGroups(GenTreePtr* pTree, fg // We'll assume p in "**p = " can be vulnerable because by changing 'p', someone // could control where **p stores to. { - comp->fgWalkTreePre(&tree->gtOp.gtOp1, comp->gsMarkPtrsAndAssignGroups, (void*)&newState); + comp->fgWalkTreePre(&tree->gtOp.gtOp1, comp->gsMarkPtrsAndAssignGroups, (void*)&newState); } return WALK_SKIP_SUBTREES; @@ -251,7 +251,7 @@ Compiler::fgWalkResult Compiler::gsMarkPtrsAndAssignGroups(GenTreePtr* pTree, fg { // Walk dst side comp->fgWalkTreePre(&tree->gtOp.gtOp1, comp->gsMarkPtrsAndAssignGroups, (void*)&newState); - + // Now handle src side isLocVar = tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR; isLocFld = tree->gtOp.gtOp1->OperGet() == GT_LCL_FLD; @@ -262,7 +262,7 @@ Compiler::fgWalkResult Compiler::gsMarkPtrsAndAssignGroups(GenTreePtr* pTree, fg newState.lvAssignDef = lclNum; newState.isAssignSrc = true; } - + comp->fgWalkTreePre(&tree->gtOp.gtOp2, comp->gsMarkPtrsAndAssignGroups, (void*)&newState); } @@ -377,7 +377,7 @@ bool Compiler::gsFindVulnerableParams() */ void Compiler::gsParamsToShadows() { - // Cache old count since we'll add new variables, and + // Cache old count since we'll add new variables, and // gsShadowVarInfo will not grow to accomodate the new ones. UINT lvaOldCount = lvaCount; @@ -513,7 +513,7 @@ void Compiler::gsParamsToShadows() GenTreePtr src = gtNewLclvNode(shadowVar, lvaTable[shadowVar].TypeGet()); GenTreePtr dst = gtNewLclvNode(lclNum, varDsc->TypeGet()); - + src->gtFlags |= GTF_DONT_CSE; dst->gtFlags |= GTF_DONT_CSE; @@ -530,7 +530,7 @@ void Compiler::gsParamsToShadows() { opAssign = gtNewAssignNode(dst, src); } - + (void)fgInsertStmtNearEnd(block, fgMorphTree(opAssign)); } } @@ -552,8 +552,8 @@ Compiler::fgWalkResult Compiler::gsReplaceShadowParams(GenTreePtr* pTree, fgWalk { asg = tree; // "asg" is the assignment tree. tree = tree->gtOp.gtOp1; // "tree" is the local var tree at the left-hand size of the assignment. - } - + } + if (tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_LCL_FLD) { UINT paramNum = tree->gtLclVarCommon.gtLclNum; @@ -571,7 +571,7 @@ Compiler::fgWalkResult Compiler::gsReplaceShadowParams(GenTreePtr* pTree, fgWalk if (varTypeIsSmall(comp->lvaTable[paramNum].TypeGet())) { tree->gtType = TYP_INT; - if (asg) + if (asg) { // If this is an assignment tree, propagate the type to it as well. asg->gtType = TYP_INT; diff --git a/src/jit/gtlist.h b/src/jit/gtlist.h index a03bcfe4b0..92265a7359 100644 --- a/src/jit/gtlist.h +++ b/src/jit/gtlist.h @@ -9,245 +9,270 @@ #endif /*****************************************************************************/ // -// Node enum -// , "Node name" -// ,commutative -// ,operKind +// Node enum +// ,"Node name" +// ,GenTree struct flavor +// ,commutative +// ,operKind -GTNODE(NONE , "<none>" ,0,GTK_SPECIAL) +GTNODE(NONE , "<none>" ,char ,0,GTK_SPECIAL) //----------------------------------------------------------------------------- // Leaf nodes (i.e. these nodes have no sub-operands): //----------------------------------------------------------------------------- -GTNODE(LCL_VAR , "lclVar" ,0,GTK_LEAF|GTK_LOCAL) // local variable -GTNODE(LCL_FLD , "lclFld" ,0,GTK_LEAF|GTK_LOCAL) // field in a non-primitive variable -GTNODE(LCL_VAR_ADDR , "&lclVar" ,0,GTK_LEAF) // address of local variable -GTNODE(LCL_FLD_ADDR , "&lclFld" ,0,GTK_LEAF) // address of field in a non-primitive variable -GTNODE(STORE_LCL_VAR , "st.lclVar" ,0,GTK_UNOP|GTK_LOCAL|GTK_NOVALUE) // store to local variable -GTNODE(STORE_LCL_FLD , "st.lclFld" ,0,GTK_UNOP|GTK_LOCAL|GTK_NOVALUE) // store to field in a non-primitive variable -GTNODE(CATCH_ARG , "catchArg" ,0,GTK_LEAF) // Exception object in a catch block -GTNODE(LABEL , "codeLabel" ,0,GTK_LEAF) // Jump-target -GTNODE(FTN_ADDR , "ftnAddr" ,0,GTK_LEAF) // Address of a function -GTNODE(RET_EXPR , "retExpr" ,0,GTK_LEAF) // Place holder for the return expression from an inline candidate +GTNODE(LCL_VAR , "lclVar" ,GenTreeLclVar ,0,GTK_LEAF|GTK_LOCAL) // local variable +GTNODE(LCL_FLD , "lclFld" ,GenTreeLclFld ,0,GTK_LEAF|GTK_LOCAL) // field in a non-primitive variable +GTNODE(LCL_VAR_ADDR , "&lclVar" ,GenTreeLclVar ,0,GTK_LEAF) // address of local variable +GTNODE(LCL_FLD_ADDR , "&lclFld" ,GenTreeLclFld ,0,GTK_LEAF) // address of field in a non-primitive variable +GTNODE(STORE_LCL_VAR , "st.lclVar" ,GenTreeLclVar ,0,GTK_UNOP|GTK_LOCAL|GTK_NOVALUE) // store to local variable +GTNODE(STORE_LCL_FLD , "st.lclFld" ,GenTreeLclFld ,0,GTK_UNOP|GTK_LOCAL|GTK_NOVALUE) // store to field in a non-primitive variable +GTNODE(CATCH_ARG , "catchArg" ,GenTree ,0,GTK_LEAF) // Exception object in a catch block +GTNODE(LABEL , "codeLabel" ,GenTreeLabel ,0,GTK_LEAF) // Jump-target +GTNODE(FTN_ADDR , "ftnAddr" ,GenTreeFptrVal ,0,GTK_LEAF) // Address of a function +GTNODE(RET_EXPR , "retExpr" ,GenTreeRetExpr ,0,GTK_LEAF) // Place holder for the return expression from an inline candidate //----------------------------------------------------------------------------- // Constant nodes: //----------------------------------------------------------------------------- -GTNODE(CNS_INT , "const" ,0,GTK_LEAF|GTK_CONST) -GTNODE(CNS_LNG , "lconst" ,0,GTK_LEAF|GTK_CONST) -GTNODE(CNS_DBL , "dconst" ,0,GTK_LEAF|GTK_CONST) -GTNODE(CNS_STR , "sconst" ,0,GTK_LEAF|GTK_CONST) +GTNODE(CNS_INT , "const" ,GenTreeIntCon ,0,GTK_LEAF|GTK_CONST) +GTNODE(CNS_LNG , "lconst" ,GenTreeLngCon ,0,GTK_LEAF|GTK_CONST) +GTNODE(CNS_DBL , "dconst" ,GenTreeDblCon ,0,GTK_LEAF|GTK_CONST) +GTNODE(CNS_STR , "sconst" ,GenTreeStrCon ,0,GTK_LEAF|GTK_CONST) //----------------------------------------------------------------------------- // Unary operators (1 operand): //----------------------------------------------------------------------------- -GTNODE(NOT , "~" ,0,GTK_UNOP) -GTNODE(NOP , "nop" ,0,GTK_UNOP) -GTNODE(NEG , "unary -" ,0,GTK_UNOP) -GTNODE(COPY , "copy" ,0,GTK_UNOP) // Copies a variable from its current location to a register that satisfies - // code generation constraints. The child is the actual lclVar node. -GTNODE(RELOAD , "reload" ,0,GTK_UNOP) -GTNODE(CHS , "flipsign" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) // GT_CHS is actually unary -- op2 is ignored. - // Changing to unary presently causes problems, though -- take a little work to fix. - -GTNODE(ARR_LENGTH , "arrLen" ,0,GTK_UNOP|GTK_EXOP) // array-length - -GTNODE(INTRINSIC , "intrinsic" ,0,GTK_BINOP|GTK_EXOP) // intrinsics - -GTNODE(LOCKADD , "lockAdd" ,0,GTK_BINOP|GTK_NOVALUE) -GTNODE(XADD , "XAdd" ,0,GTK_BINOP) -GTNODE(XCHG , "Xchg" ,0,GTK_BINOP) -GTNODE(CMPXCHG , "cmpxchg" ,0,GTK_SPECIAL) -GTNODE(MEMORYBARRIER , "memoryBarrier" ,0,GTK_LEAF|GTK_NOVALUE) - -GTNODE(CAST , "cast" ,0,GTK_UNOP|GTK_EXOP) // conversion to another type -GTNODE(CKFINITE , "ckfinite" ,0,GTK_UNOP) // Check for NaN -GTNODE(LCLHEAP , "lclHeap" ,0,GTK_UNOP) // alloca() -GTNODE(JMP , "jump" ,0,GTK_LEAF|GTK_NOVALUE) // Jump to another function - - -GTNODE(ADDR , "addr" ,0,GTK_UNOP) // address of -GTNODE(IND , "indir" ,0,GTK_UNOP) // load indirection -GTNODE(STOREIND , "storeIndir" ,0,GTK_BINOP|GTK_NOVALUE) // store indirection - - // TODO-Cleanup: GT_ARR_BOUNDS_CHECK should be made a GTK_BINOP now that it has only two child nodes -GTNODE(ARR_BOUNDS_CHECK , "arrBndsChk" ,0,GTK_SPECIAL|GTK_NOVALUE) // array bounds check -GTNODE(OBJ , "obj" ,0,GTK_UNOP|GTK_EXOP) // Object that MAY have gc pointers, and thus includes the relevant gc layout info. -GTNODE(STORE_OBJ , "storeObj" ,0,GTK_BINOP|GTK_EXOP|GTK_NOVALUE) // Object that MAY have gc pointers, and thus includes the relevant gc layout info. -GTNODE(BLK , "blk" ,0,GTK_UNOP) // Block/object with no gc pointers, and with a known size (e.g. a struct with no gc fields) -GTNODE(STORE_BLK , "storeBlk" ,0,GTK_BINOP|GTK_NOVALUE) // Block/object with no gc pointers, and with a known size (e.g. a struct with no gc fields) -GTNODE(DYN_BLK , "DynBlk" ,0,GTK_SPECIAL) // Dynamically sized block object -GTNODE(STORE_DYN_BLK , "storeDynBlk" ,0,GTK_SPECIAL|GTK_NOVALUE) // Dynamically sized block object -GTNODE(BOX , "box" ,0,GTK_UNOP|GTK_EXOP|GTK_NOTLIR) +GTNODE(NOT , "~" ,GenTreeOp ,0,GTK_UNOP) +GTNODE(NOP , "nop" ,GenTree ,0,GTK_UNOP) +GTNODE(NEG , "unary -" ,GenTreeOp ,0,GTK_UNOP) +GTNODE(COPY , "copy" ,GenTreeCopyOrReload,0,GTK_UNOP) // Copies a variable from its current location to a register that satisfies + // code generation constraints. The child is the actual lclVar node. +GTNODE(RELOAD , "reload" ,GenTreeCopyOrReload,0,GTK_UNOP) +GTNODE(CHS , "flipsign" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) // GT_CHS is actually unary -- op2 is ignored. + // Changing to unary presently causes problems, though -- take a little work to fix. + +GTNODE(ARR_LENGTH , "arrLen" ,GenTreeArrLen ,0,GTK_UNOP|GTK_EXOP) // array-length + +GTNODE(INTRINSIC , "intrinsic" ,GenTreeIntrinsic ,0,GTK_BINOP|GTK_EXOP) // intrinsics + +GTNODE(LOCKADD , "lockAdd" ,GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE) +GTNODE(XADD , "XAdd" ,GenTreeOp ,0,GTK_BINOP) +GTNODE(XCHG , "Xchg" ,GenTreeOp ,0,GTK_BINOP) +GTNODE(CMPXCHG , "cmpxchg" ,GenTreeCmpXchg ,0,GTK_SPECIAL) +GTNODE(MEMORYBARRIER , "memoryBarrier",GenTree ,0,GTK_LEAF|GTK_NOVALUE) + +GTNODE(CAST , "cast" ,GenTreeCast ,0,GTK_UNOP|GTK_EXOP) // conversion to another type +GTNODE(CKFINITE , "ckfinite" ,GenTreeOp ,0,GTK_UNOP) // Check for NaN +GTNODE(LCLHEAP , "lclHeap" ,GenTreeOp ,0,GTK_UNOP) // alloca() +GTNODE(JMP , "jump" ,GenTreeVal ,0,GTK_LEAF|GTK_NOVALUE) // Jump to another function + +GTNODE(ADDR , "addr" ,GenTreeOp ,0,GTK_UNOP) // address of +GTNODE(IND , "indir" ,GenTreeOp ,0,GTK_UNOP) // load indirection +GTNODE(STOREIND , "storeIndir" ,GenTreeStoreInd ,0,GTK_BINOP|GTK_NOVALUE) // store indirection + + // TODO-Cleanup: GT_ARR_BOUNDS_CHECK should be made a GTK_BINOP now that it has only two child nodes +GTNODE(ARR_BOUNDS_CHECK , "arrBndsChk" ,GenTreeBoundsChk ,0,GTK_SPECIAL|GTK_NOVALUE)// array bounds check +GTNODE(OBJ , "obj" ,GenTreeObj ,0,GTK_UNOP|GTK_EXOP) // Object that MAY have gc pointers, and thus includes the relevant gc layout info. +GTNODE(STORE_OBJ , "storeObj" ,GenTreeBlk ,0,GTK_BINOP|GTK_EXOP|GTK_NOVALUE) // Object that MAY have gc pointers, and thus includes the relevant gc layout info. +GTNODE(BLK , "blk" ,GenTreeBlk ,0,GTK_UNOP) // Block/object with no gc pointers, and with a known size (e.g. a struct with no gc fields) +GTNODE(STORE_BLK , "storeBlk" ,GenTreeBlk ,0,GTK_BINOP|GTK_NOVALUE) // Block/object with no gc pointers, and with a known size (e.g. a struct with no gc fields) +GTNODE(DYN_BLK , "DynBlk" ,GenTreeBlk ,0,GTK_SPECIAL) // Dynamically sized block object +GTNODE(STORE_DYN_BLK , "storeDynBlk" ,GenTreeBlk ,0,GTK_SPECIAL|GTK_NOVALUE)// Dynamically sized block object +GTNODE(BOX , "box" ,GenTreeBox ,0,GTK_UNOP|GTK_EXOP|GTK_NOTLIR) #ifdef FEATURE_SIMD -GTNODE(SIMD_CHK , "simdChk" ,0,GTK_SPECIAL|GTK_NOVALUE) // Compare whether an index is less than the given SIMD vector length, and call CORINFO_HELP_RNGCHKFAIL if not. - // TODO-CQ: In future may want to add a field that specifies different exceptions but we'll - // need VM assistance for that. - // TODO-CQ: It would actually be very nice to make this an unconditional throw, and expose the control flow that - // does the compare, so that it can be more easily optimized. But that involves generating qmarks at import time... +GTNODE(SIMD_CHK , "simdChk" ,GenTreeBoundsChk ,0,GTK_SPECIAL|GTK_NOVALUE)// Compare whether an index is less than the given SIMD vector length, and call CORINFO_HELP_RNGCHKFAIL if not. + // TODO-CQ: In future may want to add a field that specifies different exceptions but we'll + // need VM assistance for that. + // TODO-CQ: It would actually be very nice to make this an unconditional throw, and expose the control flow that + // does the compare, so that it can be more easily optimized. But that involves generating qmarks at import time... #endif // FEATURE_SIMD -GTNODE(ALLOCOBJ , "allocObj" ,0,GTK_UNOP|GTK_EXOP) // object allocator +GTNODE(ALLOCOBJ , "allocObj" ,GenTreeAllocObj ,0,GTK_UNOP|GTK_EXOP) // object allocator + +GTNODE(INIT_VAL , "initVal" ,GenTreeOp ,0,GTK_UNOP) // Initialization value for an initBlk //----------------------------------------------------------------------------- // Binary operators (2 operands): //----------------------------------------------------------------------------- -GTNODE(ADD , "+" ,1,GTK_BINOP) -GTNODE(SUB , "-" ,0,GTK_BINOP) -GTNODE(MUL , "*" ,1,GTK_BINOP) -GTNODE(DIV , "/" ,0,GTK_BINOP) -GTNODE(MOD , "%" ,0,GTK_BINOP) +GTNODE(ADD , "+" ,GenTreeOp ,1,GTK_BINOP) +GTNODE(SUB , "-" ,GenTreeOp ,0,GTK_BINOP) +GTNODE(MUL , "*" ,GenTreeOp ,1,GTK_BINOP) +GTNODE(DIV , "/" ,GenTreeOp ,0,GTK_BINOP) +GTNODE(MOD , "%" ,GenTreeOp ,0,GTK_BINOP) -GTNODE(UDIV , "un-/" ,0,GTK_BINOP) -GTNODE(UMOD , "un-%" ,0,GTK_BINOP) +GTNODE(UDIV , "un-/" ,GenTreeOp ,0,GTK_BINOP) +GTNODE(UMOD , "un-%" ,GenTreeOp ,0,GTK_BINOP) -GTNODE(OR , "|" ,1,GTK_BINOP|GTK_LOGOP) -GTNODE(XOR , "^" ,1,GTK_BINOP|GTK_LOGOP) -GTNODE(AND , "&" ,1,GTK_BINOP|GTK_LOGOP) +GTNODE(OR , "|" ,GenTreeOp ,1,GTK_BINOP|GTK_LOGOP) +GTNODE(XOR , "^" ,GenTreeOp ,1,GTK_BINOP|GTK_LOGOP) +GTNODE(AND , "&" ,GenTreeOp ,1,GTK_BINOP|GTK_LOGOP) -GTNODE(LSH , "<<" ,0,GTK_BINOP) -GTNODE(RSH , ">>" ,0,GTK_BINOP) -GTNODE(RSZ , ">>>" ,0,GTK_BINOP) -GTNODE(ROL , "rol" ,0,GTK_BINOP) -GTNODE(ROR , "ror" ,0,GTK_BINOP) -GTNODE(MULHI , "mulhi" ,1,GTK_BINOP) // returns high bits (top N bits of the 2N bit result of an NxN multiply) +GTNODE(LSH , "<<" ,GenTreeOp ,0,GTK_BINOP) +GTNODE(RSH , ">>" ,GenTreeOp ,0,GTK_BINOP) +GTNODE(RSZ , ">>>" ,GenTreeOp ,0,GTK_BINOP) +GTNODE(ROL , "rol" ,GenTreeOp ,0,GTK_BINOP) +GTNODE(ROR , "ror" ,GenTreeOp ,0,GTK_BINOP) +GTNODE(MULHI , "mulhi" ,GenTreeOp ,1,GTK_BINOP) // returns high bits (top N bits of the 2N bit result of an NxN multiply) + // GT_MULHI is used in division by a constant (fgMorphDivByConst). We turn + // the div into a MULHI + some adjustments. In codegen, we only use the + // results of the high register, and we drop the low results. -GTNODE(ASG , "=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) -GTNODE(ASG_ADD , "+=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) -GTNODE(ASG_SUB , "-=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) -GTNODE(ASG_MUL , "*=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) -GTNODE(ASG_DIV , "/=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) -GTNODE(ASG_MOD , "%=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) +GTNODE(ASG , "=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) +GTNODE(ASG_ADD , "+=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) +GTNODE(ASG_SUB , "-=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) +GTNODE(ASG_MUL , "*=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) +GTNODE(ASG_DIV , "/=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) +GTNODE(ASG_MOD , "%=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) -GTNODE(ASG_UDIV , "/=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) -GTNODE(ASG_UMOD , "%=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) +GTNODE(ASG_UDIV , "/=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) +GTNODE(ASG_UMOD , "%=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) -GTNODE(ASG_OR , "|=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) -GTNODE(ASG_XOR , "^=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) -GTNODE(ASG_AND , "&=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) -GTNODE(ASG_LSH , "<<=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) -GTNODE(ASG_RSH , ">>=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) -GTNODE(ASG_RSZ , ">>>=" ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) +GTNODE(ASG_OR , "|=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) +GTNODE(ASG_XOR , "^=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) +GTNODE(ASG_AND , "&=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) +GTNODE(ASG_LSH , "<<=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) +GTNODE(ASG_RSH , ">>=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) +GTNODE(ASG_RSZ , ">>>=" ,GenTreeOp ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR) -GTNODE(EQ , "==" ,0,GTK_BINOP|GTK_RELOP) -GTNODE(NE , "!=" ,0,GTK_BINOP|GTK_RELOP) -GTNODE(LT , "<" ,0,GTK_BINOP|GTK_RELOP) -GTNODE(LE , "<=" ,0,GTK_BINOP|GTK_RELOP) -GTNODE(GE , ">=" ,0,GTK_BINOP|GTK_RELOP) -GTNODE(GT , ">" ,0,GTK_BINOP|GTK_RELOP) +GTNODE(EQ , "==" ,GenTreeOp ,0,GTK_BINOP|GTK_RELOP) +GTNODE(NE , "!=" ,GenTreeOp ,0,GTK_BINOP|GTK_RELOP) +GTNODE(LT , "<" ,GenTreeOp ,0,GTK_BINOP|GTK_RELOP) +GTNODE(LE , "<=" ,GenTreeOp ,0,GTK_BINOP|GTK_RELOP) +GTNODE(GE , ">=" ,GenTreeOp ,0,GTK_BINOP|GTK_RELOP) +GTNODE(GT , ">" ,GenTreeOp ,0,GTK_BINOP|GTK_RELOP) -GTNODE(COMMA , "comma" ,0,GTK_BINOP|GTK_NOTLIR) +GTNODE(COMMA , "comma" ,GenTreeOp ,0,GTK_BINOP|GTK_NOTLIR) -GTNODE(QMARK , "qmark" ,0,GTK_BINOP|GTK_EXOP|GTK_NOTLIR) -GTNODE(COLON , "colon" ,0,GTK_BINOP|GTK_NOTLIR) +GTNODE(QMARK , "qmark" ,GenTreeQmark ,0,GTK_BINOP|GTK_EXOP|GTK_NOTLIR) +GTNODE(COLON , "colon" ,GenTreeColon ,0,GTK_BINOP|GTK_NOTLIR) -GTNODE(INDEX , "[]" ,0,GTK_BINOP|GTK_EXOP|GTK_NOTLIR) // SZ-array-element +GTNODE(INDEX , "[]" ,GenTreeIndex ,0,GTK_BINOP|GTK_EXOP|GTK_NOTLIR) // SZ-array-element -GTNODE(MKREFANY , "mkrefany" ,0,GTK_BINOP) +GTNODE(MKREFANY , "mkrefany" ,GenTreeOp ,0,GTK_BINOP) -GTNODE(LEA , "lea" ,0,GTK_BINOP|GTK_EXOP) +GTNODE(LEA , "lea" ,GenTreeAddrMode ,0,GTK_BINOP|GTK_EXOP) #if !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_) // A GT_LONG node simply represents the long value produced by the concatenation // of its two (lower and upper half) operands. Some GT_LONG nodes are transient, // during the decomposing of longs; others are handled by codegen as operands of // nodes such as calls, returns and stores of long lclVars. -GTNODE(LONG , "gt_long" ,0,GTK_BINOP) - -// The following are nodes representing the upper half of a 64-bit operation -// that requires a carry/borrow. However, they are all named GT_XXX_HI for -// consistency. -GTNODE(ADD_LO , "+Lo" ,1,GTK_BINOP) -GTNODE(ADD_HI , "+Hi" ,1,GTK_BINOP) -GTNODE(SUB_LO , "-Lo" ,0,GTK_BINOP) -GTNODE(SUB_HI , "-Hi" ,0,GTK_BINOP) -GTNODE(MUL_HI , "*Hi" ,1,GTK_BINOP) -GTNODE(DIV_HI , "/Hi" ,0,GTK_BINOP) -GTNODE(MOD_HI , "%Hi" ,0,GTK_BINOP) +GTNODE(LONG , "gt_long" ,GenTreeOp ,0,GTK_BINOP) + +// The following are nodes representing x86 specific long operators, including +// high operators of a 64-bit operations that requires a carry/borrow, which are +// named GT_XXX_HI for consistency, low operators of 64-bit operations that need +// to not be modified in phases post-decompose, and operators that return 64-bit +// results in one instruction. +GTNODE(ADD_LO , "+Lo" ,GenTreeOp ,1,GTK_BINOP) +GTNODE(ADD_HI , "+Hi" ,GenTreeOp ,1,GTK_BINOP) +GTNODE(SUB_LO , "-Lo" ,GenTreeOp ,0,GTK_BINOP) +GTNODE(SUB_HI , "-Hi" ,GenTreeOp ,0,GTK_BINOP) +GTNODE(DIV_HI , "/Hi" ,GenTreeOp ,0,GTK_BINOP) +GTNODE(MOD_HI , "%Hi" ,GenTreeOp ,0,GTK_BINOP) +GTNODE(MUL_LONG , "*long" ,GenTreeOp ,1,GTK_BINOP) // A mul that returns the 2N bit result of an NxN multiply. This op + // is used for x86 multiplies that take two ints and return a long + // result. All other multiplies with long results are morphed into + // helper calls. It is similar to GT_MULHI, the difference being that + // GT_MULHI drops the lo part of the result, whereas GT_MUL_LONG keeps + // both parts of the result. + +// The following are nodes that specify shifts that take a GT_LONG op1. The GT_LONG +// contains the hi and lo parts of three operand shift form where one op will be +// shifted into the other op as part of the operation (LSH_HI will shift +// the high bits of the lo operand into the high operand as it shifts left. RSH_LO +// will shift the lo bits of the high operand into the lo operand). LSH_HI +// represents the high operation of a 64-bit left shift by a constant int, and +// RSH_LO represents the lo operation of a 64-bit right shift by a constant int. +GTNODE(LSH_HI , "<<Hi" ,GenTreeOp ,0,GTK_BINOP) +GTNODE(RSH_LO , ">>Lo" ,GenTreeOp ,0,GTK_BINOP) #endif // !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_) #ifdef FEATURE_SIMD -GTNODE(SIMD , "simd" ,0,GTK_BINOP|GTK_EXOP) // SIMD functions/operators/intrinsics +GTNODE(SIMD , "simd" ,GenTreeSIMD ,0,GTK_BINOP|GTK_EXOP) // SIMD functions/operators/intrinsics #endif // FEATURE_SIMD //----------------------------------------------------------------------------- // Other nodes that look like unary/binary operators: //----------------------------------------------------------------------------- -GTNODE(JTRUE , "jmpTrue" ,0,GTK_UNOP|GTK_NOVALUE) +GTNODE(JTRUE , "jmpTrue" ,GenTreeOp ,0,GTK_UNOP|GTK_NOVALUE) +GTNODE(JCC , "jcc" ,GenTreeJumpCC ,0,GTK_LEAF|GTK_NOVALUE) -GTNODE(LIST , "<list>" ,0,GTK_BINOP) +GTNODE(LIST , "<list>" ,GenTreeArgList ,0,GTK_BINOP|GTK_NOVALUE) +GTNODE(FIELD_LIST , "<fldList>" ,GenTreeFieldList ,0,GTK_BINOP) // List of fields of a struct, when passed as an argument //----------------------------------------------------------------------------- // Other nodes that have special structure: //----------------------------------------------------------------------------- -GTNODE(FIELD , "field" ,0,GTK_SPECIAL) // Member-field -GTNODE(ARR_ELEM , "arrMD&" ,0,GTK_SPECIAL) // Multi-dimensional array-element address -GTNODE(ARR_INDEX , "arrMDIdx" ,0,GTK_BINOP|GTK_EXOP) // Effective, bounds-checked index for one dimension of a multi-dimensional array element -GTNODE(ARR_OFFSET , "arrMDOffs" ,0,GTK_SPECIAL) // Flattened offset of multi-dimensional array element -GTNODE(CALL , "call()" ,0,GTK_SPECIAL) +GTNODE(FIELD , "field" ,GenTreeField ,0,GTK_SPECIAL) // Member-field +GTNODE(ARR_ELEM , "arrMD&" ,GenTreeArrElem ,0,GTK_SPECIAL) // Multi-dimensional array-element address +GTNODE(ARR_INDEX , "arrMDIdx" ,GenTreeArrIndex ,0,GTK_BINOP|GTK_EXOP) // Effective, bounds-checked index for one dimension of a multi-dimensional array element +GTNODE(ARR_OFFSET , "arrMDOffs" ,GenTreeArrOffs ,0,GTK_SPECIAL) // Flattened offset of multi-dimensional array element +GTNODE(CALL , "call()" ,GenTreeCall ,0,GTK_SPECIAL) //----------------------------------------------------------------------------- // Statement operator nodes: //----------------------------------------------------------------------------- -GTNODE(BEG_STMTS , "begStmts" ,0,GTK_SPECIAL|GTK_NOVALUE) // used only temporarily in importer by impBegin/EndTreeList() -GTNODE(STMT , "stmtExpr" ,0,GTK_SPECIAL|GTK_NOVALUE) // top-level list nodes in bbTreeList +GTNODE(BEG_STMTS , "begStmts" ,GenTree ,0,GTK_SPECIAL|GTK_NOVALUE)// used only temporarily in importer by impBegin/EndTreeList() +GTNODE(STMT , "stmtExpr" ,GenTreeStmt ,0,GTK_SPECIAL|GTK_NOVALUE)// top-level list nodes in bbTreeList -GTNODE(RETURN , "return" ,0,GTK_UNOP|GTK_NOVALUE) // return from current function -GTNODE(SWITCH , "switch" ,0,GTK_UNOP|GTK_NOVALUE) // switch +GTNODE(RETURN , "return" ,GenTreeOp ,0,GTK_UNOP|GTK_NOVALUE) // return from current function +GTNODE(SWITCH , "switch" ,GenTreeOp ,0,GTK_UNOP|GTK_NOVALUE) // switch -GTNODE(NO_OP , "no_op" ,0,GTK_LEAF|GTK_NOVALUE) // nop! +GTNODE(NO_OP , "no_op" ,GenTree ,0,GTK_LEAF|GTK_NOVALUE) // nop! -GTNODE(START_NONGC, "start_nongc",0,GTK_LEAF|GTK_NOVALUE) // starts a new instruction group that will be non-gc interruptible +GTNODE(START_NONGC , "start_nongc" ,GenTree ,0,GTK_LEAF|GTK_NOVALUE) // starts a new instruction group that will be non-gc interruptible -GTNODE(PROF_HOOK , "prof_hook" ,0,GTK_LEAF|GTK_NOVALUE) // profiler Enter/Leave/TailCall hook +GTNODE(PROF_HOOK , "prof_hook" ,GenTree ,0,GTK_LEAF|GTK_NOVALUE) // profiler Enter/Leave/TailCall hook -GTNODE(RETFILT , "retfilt", 0,GTK_UNOP|GTK_NOVALUE) // end filter with TYP_I_IMPL return value +GTNODE(RETFILT , "retfilt" ,GenTreeOp ,0,GTK_UNOP|GTK_NOVALUE) // end filter with TYP_I_IMPL return value #if !FEATURE_EH_FUNCLETS -GTNODE(END_LFIN , "endLFin" ,0,GTK_LEAF|GTK_NOVALUE) // end locally-invoked finally +GTNODE(END_LFIN , "endLFin" ,GenTreeVal ,0,GTK_LEAF|GTK_NOVALUE) // end locally-invoked finally #endif // !FEATURE_EH_FUNCLETS //----------------------------------------------------------------------------- // Nodes used for optimizations. //----------------------------------------------------------------------------- -GTNODE(PHI , "phi" ,0,GTK_UNOP) // phi node for ssa. -GTNODE(PHI_ARG , "phiArg" ,0,GTK_LEAF|GTK_LOCAL) // phi(phiarg, phiarg, phiarg) +GTNODE(PHI , "phi" ,GenTreeOp ,0,GTK_UNOP) // phi node for ssa. +GTNODE(PHI_ARG , "phiArg" ,GenTreePhiArg ,0,GTK_LEAF|GTK_LOCAL) // phi(phiarg, phiarg, phiarg) //----------------------------------------------------------------------------- // Nodes used by Lower to generate a closer CPU representation of other nodes //----------------------------------------------------------------------------- -GTNODE(JMPTABLE , "jumpTable" , 0, GTK_LEAF) // Generates the jump table for switches -GTNODE(SWITCH_TABLE, "tableSwitch", 0, GTK_BINOP|GTK_NOVALUE) // Jump Table based switch construct +#ifndef LEGACY_BACKEND +GTNODE(JMPTABLE , "jumpTable" ,GenTreeJumpTable ,0, GTK_LEAF) // Generates the jump table for switches +#endif +GTNODE(SWITCH_TABLE , "tableSwitch" ,GenTreeOp ,0, GTK_BINOP|GTK_NOVALUE) // Jump Table based switch construct //----------------------------------------------------------------------------- // Nodes used only within the code generator: //----------------------------------------------------------------------------- -GTNODE(REG_VAR , "regVar" ,0,GTK_LEAF|GTK_LOCAL) // register variable -GTNODE(CLS_VAR , "clsVar" ,0,GTK_LEAF) // static data member -GTNODE(CLS_VAR_ADDR , "&clsVar" ,0,GTK_LEAF) // static data member address -GTNODE(STORE_CLS_VAR, "st.clsVar" ,0,GTK_LEAF|GTK_NOVALUE) // store to static data member -GTNODE(ARGPLACE , "argPlace" ,0,GTK_LEAF) // placeholder for a register arg -GTNODE(NULLCHECK , "nullcheck" ,0,GTK_UNOP|GTK_NOVALUE) // null checks the source -GTNODE(PHYSREG , "physregSrc" ,0,GTK_LEAF) // read from a physical register -GTNODE(PHYSREGDST , "physregDst" ,0,GTK_UNOP|GTK_NOVALUE) // write to a physical register -GTNODE(EMITNOP , "emitnop" ,0,GTK_LEAF|GTK_NOVALUE) // emitter-placed nop -GTNODE(PINVOKE_PROLOG,"pinvoke_prolog",0,GTK_LEAF|GTK_NOVALUE) // pinvoke prolog seq -GTNODE(PINVOKE_EPILOG,"pinvoke_epilog",0,GTK_LEAF|GTK_NOVALUE) // pinvoke epilog seq -GTNODE(PUTARG_REG , "putarg_reg" ,0,GTK_UNOP) // operator that places outgoing arg in register -GTNODE(PUTARG_STK , "putarg_stk" ,0,GTK_UNOP) // operator that places outgoing arg in stack -GTNODE(RETURNTRAP , "returnTrap" ,0,GTK_UNOP|GTK_NOVALUE) // a conditional call to wait on gc -GTNODE(SWAP , "swap" ,0,GTK_BINOP|GTK_NOVALUE) // op1 and op2 swap (registers) -GTNODE(IL_OFFSET , "il_offset" ,0,GTK_LEAF|GTK_NOVALUE) // marks an IL offset for debugging purposes +GTNODE(REG_VAR , "regVar" ,GenTreeLclVar ,0,GTK_LEAF|GTK_LOCAL) // register variable +GTNODE(CLS_VAR , "clsVar" ,GenTreeClsVar ,0,GTK_LEAF) // static data member +GTNODE(CLS_VAR_ADDR , "&clsVar" ,GenTreeClsVar ,0,GTK_LEAF) // static data member address +GTNODE(ARGPLACE , "argPlace" ,GenTreeArgPlace ,0,GTK_LEAF) // placeholder for a register arg +GTNODE(NULLCHECK , "nullcheck" ,GenTreeOp ,0,GTK_UNOP|GTK_NOVALUE) // null checks the source +GTNODE(PHYSREG , "physregSrc" ,GenTreePhysReg ,0,GTK_LEAF) // read from a physical register +GTNODE(PHYSREGDST , "physregDst" ,GenTreeOp ,0,GTK_UNOP|GTK_NOVALUE) // write to a physical register +GTNODE(EMITNOP , "emitnop" ,GenTree ,0,GTK_LEAF|GTK_NOVALUE) // emitter-placed nop +GTNODE(PINVOKE_PROLOG ,"pinvoke_prolog",GenTree ,0,GTK_LEAF|GTK_NOVALUE) // pinvoke prolog seq +GTNODE(PINVOKE_EPILOG ,"pinvoke_epilog",GenTree ,0,GTK_LEAF|GTK_NOVALUE) // pinvoke epilog seq +GTNODE(PUTARG_REG , "putarg_reg" ,GenTreeOp ,0,GTK_UNOP) // operator that places outgoing arg in register +GTNODE(PUTARG_STK , "putarg_stk" ,GenTreePutArgStk ,0,GTK_UNOP) // operator that places outgoing arg in stack +GTNODE(RETURNTRAP , "returnTrap" ,GenTreeOp ,0,GTK_UNOP|GTK_NOVALUE) // a conditional call to wait on gc +GTNODE(SWAP , "swap" ,GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE) // op1 and op2 swap (registers) +GTNODE(IL_OFFSET , "il_offset" ,GenTreeStmt ,0,GTK_LEAF|GTK_NOVALUE) // marks an IL offset for debugging purposes /*****************************************************************************/ #undef GTNODE diff --git a/src/jit/gtstructs.h b/src/jit/gtstructs.h index 895d3b6598..ac912407be 100644 --- a/src/jit/gtstructs.h +++ b/src/jit/gtstructs.h @@ -65,7 +65,8 @@ GTSTRUCT_1(Cast , GT_CAST) GTSTRUCT_1(Box , GT_BOX) GTSTRUCT_1(Field , GT_FIELD) GTSTRUCT_1(Call , GT_CALL) -GTSTRUCT_1(ArgList , GT_LIST) +GTSTRUCT_2(ArgList , GT_LIST, GT_FIELD_LIST) +GTSTRUCT_1(FieldList , GT_FIELD_LIST) GTSTRUCT_1(Colon , GT_COLON) GTSTRUCT_1(FptrVal , GT_FTN_ADDR) GTSTRUCT_1(Intrinsic , GT_INTRINSIC) @@ -100,6 +101,7 @@ GTSTRUCT_1(PhysReg , GT_PHYSREG) GTSTRUCT_1(SIMD , GT_SIMD) #endif // FEATURE_SIMD GTSTRUCT_1(AllocObj , GT_ALLOCOBJ) +GTSTRUCT_1(JumpCC , GT_JCC) /*****************************************************************************/ #undef GTSTRUCT_0 #undef GTSTRUCT_1 diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp index d04ded78fa..cb09ff8b8c 100644 --- a/src/jit/importer.cpp +++ b/src/jit/importer.cpp @@ -63,15 +63,12 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void Compiler::impInit() { -#ifdef DEBUG - impTreeList = impTreeLast = nullptr; -#endif -#if defined(DEBUG) +#ifdef DEBUG + impTreeList = nullptr; + impTreeLast = nullptr; impInlinedCodeSize = 0; #endif - - seenConditionalJump = false; } /***************************************************************************** @@ -600,13 +597,9 @@ inline void Compiler::impAppendStmt(GenTreePtr stmt, unsigned chkLevel) // Assignment to (unaliased) locals don't count as a side-effect as // we handle them specially using impSpillLclRefs(). Temp locals should // be fine too. - // TODO-1stClassStructs: The check below should apply equally to struct assignments, - // but previously the block ops were always being marked GTF_GLOB_REF, even if - // the operands could not be global refs. if ((expr->gtOper == GT_ASG) && (expr->gtOp.gtOp1->gtOper == GT_LCL_VAR) && - !(expr->gtOp.gtOp1->gtFlags & GTF_GLOB_REF) && !gtHasLocalsWithAddrOp(expr->gtOp.gtOp2) && - !varTypeIsStruct(expr->gtOp.gtOp1)) + !(expr->gtOp.gtOp1->gtFlags & GTF_GLOB_REF) && !gtHasLocalsWithAddrOp(expr->gtOp.gtOp2)) { unsigned op2Flags = expr->gtOp.gtOp2->gtFlags & GTF_GLOB_EFFECT; assert(flags == (op2Flags | GTF_ASG)); @@ -673,8 +666,6 @@ inline void Compiler::impAppendStmt(GenTreePtr stmt, unsigned chkLevel) impMarkContiguousSIMDFieldAssignments(stmt); #endif -#ifdef DEBUGGING_SUPPORT - /* Once we set impCurStmtOffs in an appended tree, we are ready to report the following offsets. So reset impCurStmtOffs */ @@ -683,8 +674,6 @@ inline void Compiler::impAppendStmt(GenTreePtr stmt, unsigned chkLevel) impCurStmtOffsSet(BAD_IL_OFFSET); } -#endif - #ifdef DEBUG if (impLastILoffsStmt == nullptr) { @@ -1143,9 +1132,13 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr destAddr, if (destAddr->OperGet() == GT_ADDR) { GenTree* destNode = destAddr->gtGetOp1(); - // If the actual destination is already a block node, or is a node that + // If the actual destination is a local (for non-LEGACY_BACKEND), or already a block node, or is a node that // will be morphed, don't insert an OBJ(ADDR). - if (destNode->gtOper == GT_INDEX || destNode->OperIsBlk()) + if (destNode->gtOper == GT_INDEX || destNode->OperIsBlk() +#ifndef LEGACY_BACKEND + || ((destNode->OperGet() == GT_LCL_VAR) && (destNode->TypeGet() == src->TypeGet())) +#endif // !LEGACY_BACKEND + ) { dest = destNode; } @@ -1194,6 +1187,9 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr destAddr, { // Mark the struct LclVar as used in a MultiReg return context // which currently makes it non promotable. + // TODO-1stClassStructs: Eliminate this pessimization when we can more generally + // handle multireg returns. + lcl->gtFlags |= GTF_DONT_CSE; lvaTable[lcl->gtLclVarCommon.gtLclNum].lvIsMultiRegRet = true; } else // The call result is not a multireg return @@ -1208,12 +1204,20 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr destAddr, dest = lcl; #if defined(_TARGET_ARM_) + // TODO-Cleanup: This should have been taken care of in the above HasMultiRegRetVal() case, + // but that method has not been updadted to include ARM. impMarkLclDstNotPromotable(lcl->gtLclVarCommon.gtLclNum, src, structHnd); + lcl->gtFlags |= GTF_DONT_CSE; #elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // Not allowed for FEATURE_CORCLR which is the only SKU available for System V OSs. assert(!src->gtCall.IsVarargs() && "varargs not allowed for System V OSs."); // Make the struct non promotable. The eightbytes could contain multiple fields. + // TODO-1stClassStructs: Eliminate this pessimization when we can more generally + // handle multireg returns. + // TODO-Cleanup: Why is this needed here? This seems that it will set this even for + // non-multireg returns. + lcl->gtFlags |= GTF_DONT_CSE; lvaTable[lcl->gtLclVarCommon.gtLclNum].lvIsMultiRegRet = true; #endif } @@ -1255,10 +1259,11 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr destAddr, src->gtType = genActualType(returnType); call->gtType = src->gtType; - // 1stClassStructToDo: We shouldn't necessarily need this. - if (dest != nullptr) + // If we've changed the type, and it no longer matches a local destination, + // we must use an indirection. + if ((dest != nullptr) && (dest->OperGet() == GT_LCL_VAR) && (dest->TypeGet() != asgType)) { - dest = gtNewOperNode(GT_IND, returnType, gtNewOperNode(GT_ADDR, TYP_BYREF, dest)); + dest = nullptr; } // !!! The destination could be on stack. !!! @@ -1329,21 +1334,19 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr destAddr, } else if (src->IsLocal()) { - // TODO-1stClassStructs: Eliminate this; it is only here to minimize diffs in the - // initial implementation. Previously the source would have been under a GT_ADDR, which - // would cause it to be marked GTF_DONT_CSE. asgType = src->TypeGet(); - src->gtFlags |= GTF_DONT_CSE; - if (asgType == TYP_STRUCT) - { - GenTree* srcAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, src); - src = gtNewOperNode(GT_IND, TYP_STRUCT, srcAddr); - } } else if (asgType == TYP_STRUCT) { asgType = impNormStructType(structHnd); src->gtType = asgType; +#ifdef LEGACY_BACKEND + if (asgType == TYP_STRUCT) + { + GenTree* srcAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, src); + src = gtNewOperNode(GT_IND, TYP_STRUCT, srcAddr); + } +#endif } if (dest == nullptr) { @@ -1459,6 +1462,8 @@ GenTreePtr Compiler::impGetStructAddr(GenTreePtr structVal, // into which the gcLayout will be written. // pNumGCVars - (optional, default nullptr) - if non-null, a pointer to an unsigned, // which will be set to the number of GC fields in the struct. +// pSimdBaseType - (optional, default nullptr) - if non-null, and the struct is a SIMD +// type, set to the SIMD base type // // Return Value: // The JIT type for the struct (e.g. TYP_STRUCT, or TYP_SIMD*). @@ -1480,53 +1485,69 @@ var_types Compiler::impNormStructType(CORINFO_CLASS_HANDLE structHnd, var_types* pSimdBaseType) { assert(structHnd != NO_CLASS_HANDLE); - unsigned originalSize = info.compCompHnd->getClassSize(structHnd); - unsigned numGCVars = 0; - var_types structType = TYP_STRUCT; - var_types simdBaseType = TYP_UNKNOWN; - bool definitelyHasGCPtrs = false; -#ifdef FEATURE_SIMD - // We don't want to consider this as a possible SIMD type if it has GC pointers. - // (Saves querying about the SIMD assembly.) - BYTE gcBytes[maxPossibleSIMDStructBytes / TARGET_POINTER_SIZE]; - if ((gcLayout == nullptr) && (originalSize >= minSIMDStructBytes()) && (originalSize <= maxSIMDStructBytes())) - { - gcLayout = gcBytes; - } -#endif // FEATURE_SIMD + const DWORD structFlags = info.compCompHnd->getClassAttribs(structHnd); + var_types structType = TYP_STRUCT; + +#ifdef FEATURE_CORECLR + const bool hasGCPtrs = (structFlags & CORINFO_FLG_CONTAINS_GC_PTR) != 0; +#else + // Desktop CLR won't report FLG_CONTAINS_GC_PTR for RefAnyClass - need to check explicitly. + const bool isRefAny = (structHnd == impGetRefAnyClass()); + const bool hasGCPtrs = isRefAny || ((structFlags & CORINFO_FLG_CONTAINS_GC_PTR) != 0); +#endif - if (gcLayout != nullptr) - { - numGCVars = info.compCompHnd->getClassGClayout(structHnd, gcLayout); - definitelyHasGCPtrs = (numGCVars != 0); - } #ifdef FEATURE_SIMD // Check to see if this is a SIMD type. - if (featureSIMD && (originalSize <= getSIMDVectorRegisterByteLength()) && (originalSize >= TARGET_POINTER_SIZE) && - !definitelyHasGCPtrs) + if (featureSIMD && !hasGCPtrs) { - unsigned int sizeBytes; - simdBaseType = getBaseTypeAndSizeOfSIMDType(structHnd, &sizeBytes); - if (simdBaseType != TYP_UNKNOWN) + unsigned originalSize = info.compCompHnd->getClassSize(structHnd); + + if ((originalSize >= minSIMDStructBytes()) && (originalSize <= maxSIMDStructBytes())) { - assert(sizeBytes == originalSize); - structType = getSIMDTypeForSize(sizeBytes); - if (pSimdBaseType != nullptr) + unsigned int sizeBytes; + var_types simdBaseType = getBaseTypeAndSizeOfSIMDType(structHnd, &sizeBytes); + if (simdBaseType != TYP_UNKNOWN) { - *pSimdBaseType = simdBaseType; - } + assert(sizeBytes == originalSize); + structType = getSIMDTypeForSize(sizeBytes); + if (pSimdBaseType != nullptr) + { + *pSimdBaseType = simdBaseType; + } #ifdef _TARGET_AMD64_ - // Amd64: also indicate that we use floating point registers - compFloatingPointUsed = true; + // Amd64: also indicate that we use floating point registers + compFloatingPointUsed = true; #endif + } } } #endif // FEATURE_SIMD - if (pNumGCVars != nullptr) + + // Fetch GC layout info if requested + if (gcLayout != nullptr) + { + unsigned numGCVars = info.compCompHnd->getClassGClayout(structHnd, gcLayout); + + // Verify that the quick test up above via the class attributes gave a + // safe view of the type's GCness. + // + // Note there are cases where hasGCPtrs is true but getClassGClayout + // does not report any gc fields. + assert(hasGCPtrs || (numGCVars == 0)); + + if (pNumGCVars != nullptr) + { + *pNumGCVars = numGCVars; + } + } + else { - *pNumGCVars = numGCVars; + // Can't safely ask for number of GC pointers without also + // asking for layout. + assert(pNumGCVars == nullptr); } + return structType; } @@ -1777,15 +1798,19 @@ GenTreePtr Compiler::impReadyToRunLookupToTree(CORINFO_CONST_LOOKUP* pLookup, unsigned handleFlags, void* compileTimeHandle) { - CORINFO_GENERIC_HANDLE handle = 0; - void* pIndirection = 0; + CORINFO_GENERIC_HANDLE handle = nullptr; + void* pIndirection = nullptr; assert(pLookup->accessType != IAT_PPVALUE); if (pLookup->accessType == IAT_VALUE) + { handle = pLookup->handle; + } else if (pLookup->accessType == IAT_PVALUE) + { pIndirection = pLookup->addr; - return gtNewIconEmbHndNode(handle, pIndirection, handleFlags, 0, 0, compileTimeHandle); + } + return gtNewIconEmbHndNode(handle, pIndirection, handleFlags, 0, nullptr, compileTimeHandle); } GenTreePtr Compiler::impReadyToRunHelperToTree( @@ -1798,7 +1823,9 @@ GenTreePtr Compiler::impReadyToRunHelperToTree( CORINFO_CONST_LOOKUP lookup; #if COR_JIT_EE_VERSION > 460 if (!info.compCompHnd->getReadyToRunHelper(pResolvedToken, pGenericLookupKind, helper, &lookup)) - return NULL; + { + return nullptr; + } #else info.compCompHnd->getReadyToRunHelper(pResolvedToken, helper, &lookup); #endif @@ -1828,7 +1855,9 @@ GenTreePtr Compiler::impMethodPointer(CORINFO_RESOLVED_TOKEN* pResolvedToken, CO *op1->gtFptrVal.gtLdftnResolvedToken = *pResolvedToken; } else + { op1->gtFptrVal.gtEntryPoint.addr = nullptr; + } #endif break; @@ -1852,6 +1881,46 @@ GenTreePtr Compiler::impMethodPointer(CORINFO_RESOLVED_TOKEN* pResolvedToken, CO return op1; } +//------------------------------------------------------------------------ +// getRuntimeContextTree: find pointer to context for runtime lookup. +// +// Arguments: +// kind - lookup kind. +// +// Return Value: +// Return GenTree pointer to generic shared context. +// +// Notes: +// Reports about generic context using. + +GenTreePtr Compiler::getRuntimeContextTree(CORINFO_RUNTIME_LOOKUP_KIND kind) +{ + GenTreePtr ctxTree = nullptr; + + // Collectible types requires that for shared generic code, if we use the generic context parameter + // that we report it. (This is a conservative approach, we could detect some cases particularly when the + // context parameter is this that we don't need the eager reporting logic.) + lvaGenericsContextUsed = true; + + if (kind == CORINFO_LOOKUP_THISOBJ) + { + // this Object + ctxTree = gtNewLclvNode(info.compThisArg, TYP_REF); + + // Vtable pointer of this object + ctxTree = gtNewOperNode(GT_IND, TYP_I_IMPL, ctxTree); + ctxTree->gtFlags |= GTF_EXCEPT; // Null-pointer exception + ctxTree->gtFlags |= GTF_IND_INVARIANT; + } + else + { + assert(kind == CORINFO_LOOKUP_METHODPARAM || kind == CORINFO_LOOKUP_CLASSPARAM); + + ctxTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL); // Exact method descriptor as passed in as last arg + } + return ctxTree; +} + /*****************************************************************************/ /* Import a dictionary lookup to access a handle in code shared between generic instantiations. @@ -1874,36 +1943,12 @@ GenTreePtr Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedTok CORINFO_LOOKUP* pLookup, void* compileTimeHandle) { - CORINFO_RUNTIME_LOOKUP_KIND kind = pLookup->lookupKind.runtimeLookupKind; - CORINFO_RUNTIME_LOOKUP* pRuntimeLookup = &pLookup->runtimeLookup; // This method can only be called from the importer instance of the Compiler. // In other word, it cannot be called by the instance of the Compiler for the inlinee. assert(!compIsForInlining()); - GenTreePtr ctxTree; - - // Collectible types requires that for shared generic code, if we use the generic context parameter - // that we report it. (This is a conservative approach, we could detect some cases particularly when the - // context parameter is this that we don't need the eager reporting logic.) - lvaGenericsContextUsed = true; - - if (kind == CORINFO_LOOKUP_THISOBJ) - { - // this Object - ctxTree = gtNewLclvNode(info.compThisArg, TYP_REF); - - // Vtable pointer of this object - ctxTree = gtNewOperNode(GT_IND, TYP_I_IMPL, ctxTree); - ctxTree->gtFlags |= GTF_EXCEPT; // Null-pointer exception - ctxTree->gtFlags |= GTF_IND_INVARIANT; - } - else - { - assert(kind == CORINFO_LOOKUP_METHODPARAM || kind == CORINFO_LOOKUP_CLASSPARAM); - - ctxTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL); // Exact method descriptor as passed in as last arg - } + GenTreePtr ctxTree = getRuntimeContextTree(pLookup->lookupKind.runtimeLookupKind); #ifdef FEATURE_READYTORUN_COMPILER if (opts.IsReadyToRun()) @@ -1913,6 +1958,7 @@ GenTreePtr Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedTok } #endif + CORINFO_RUNTIME_LOOKUP* pRuntimeLookup = &pLookup->runtimeLookup; // It's available only via the run-time helper function if (pRuntimeLookup->indirections == CORINFO_USEHELPER) { @@ -2083,8 +2129,6 @@ bool Compiler::impSpillStackEntry(unsigned level, guard.Init(&impNestedStackSpill, bAssertOnRecursion); #endif - assert(!fgGlobalMorph); // use impInlineSpillStackEntry() during inlining - GenTreePtr tree = verCurrentState.esStack[level].val; /* Allocate a temp if we haven't been asked to use a particular one */ @@ -2179,8 +2223,6 @@ void Compiler::impSpillStackEnsure(bool spillLeaves) void Compiler::impSpillEvalStack() { - assert(!fgGlobalMorph); // use impInlineSpillEvalStack() during inlining - for (unsigned level = 0; level < verCurrentState.esStackDepth; level++) { impSpillStackEntry(level, BAD_VAR_NUM DEBUGARG(false) DEBUGARG("impSpillEvalStack")); @@ -2318,8 +2360,6 @@ Compiler::fgWalkResult Compiler::impFindValueClasses(GenTreePtr* pTree, fgWalkDa void Compiler::impSpillLclRefs(ssize_t lclNum) { - assert(!fgGlobalMorph); // use impInlineSpillLclRefs() during inlining - /* Before we make any appends to the tree list we must spill the * "special" side effects (GTF_ORDER_SIDEEFF) - GT_CATCH_ARG */ @@ -2676,7 +2716,6 @@ static inline bool impOpcodeIsCallOpcode(OPCODE opcode) } /*****************************************************************************/ -#ifdef DEBUGGING_SUPPORT static inline bool impOpcodeIsCallSiteBoundary(OPCODE opcode) { @@ -2695,8 +2734,6 @@ static inline bool impOpcodeIsCallSiteBoundary(OPCODE opcode) } } -#endif // DEBUGGING_SUPPORT - /*****************************************************************************/ // One might think it is worth caching these values, but results indicate @@ -2816,27 +2853,6 @@ GenTreePtr Compiler::impImplicitR4orR8Cast(GenTreePtr tree, var_types dstTyp) return tree; } -/*****************************************************************************/ -BOOL Compiler::impLocAllocOnStack() -{ - if (!compLocallocUsed) - { - return (FALSE); - } - - // Returns true if a GT_LCLHEAP node is encountered in any of the trees - // that have been pushed on the importer evaluatuion stack. - // - for (unsigned i = 0; i < verCurrentState.esStackDepth; i++) - { - if (fgWalkTreePre(&verCurrentState.esStack[i].val, Compiler::fgChkLocAllocCB) == WALK_ABORT) - { - return (TRUE); - } - } - return (FALSE); -} - //------------------------------------------------------------------------ // impInitializeArrayIntrinsic: Attempts to replace a call to InitializeArray // with a GT_COPYBLK node. @@ -3236,7 +3252,7 @@ GenTreePtr Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, #if COR_JIT_EE_VERSION > 460 CorInfoIntrinsics intrinsicID = info.compCompHnd->getIntrinsicID(method, &mustExpand); #else - CorInfoIntrinsics intrinsicID = info.compCompHnd->getIntrinsicID(method); + CorInfoIntrinsics intrinsicID = info.compCompHnd->getIntrinsicID(method); #endif *pIntrinsicID = intrinsicID; @@ -3307,9 +3323,9 @@ GenTreePtr Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, op1 = nullptr; -#ifdef LEGACY_BACKEND +#if defined(LEGACY_BACKEND) if (IsTargetIntrinsic(intrinsicID)) -#else +#elif !defined(_TARGET_X86_) // Intrinsics that are not implemented directly by target instructions will // be re-materialized as users calls in rationalizer. For prefixed tail calls, // don't do this optimization, because @@ -3317,6 +3333,11 @@ GenTreePtr Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, // b) It will be non-trivial task or too late to re-materialize a surviving // tail prefixed GT_INTRINSIC as tail call in rationalizer. if (!IsIntrinsicImplementedByUserCall(intrinsicID) || !tailCall) +#else + // On x86 RyuJIT, importing intrinsics that are implemented as user calls can cause incorrect calculation + // of the depth of the stack if these intrinsics are used as arguments to another call. This causes bad + // code generation for certain EH constructs. + if (!IsIntrinsicImplementedByUserCall(intrinsicID)) #endif { switch (sig->numArgs) @@ -3534,7 +3555,7 @@ GenTreePtr Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, // Get native TypeHandle argument to old helper op1 = op1->gtCall.gtCallArgs; - assert(op1->IsList()); + assert(op1->OperIsList()); assert(op1->gtOp.gtOp2 == nullptr); op1 = op1->gtOp.gtOp1; retNode = op1; @@ -3886,7 +3907,7 @@ void Compiler::verHandleVerificationFailure(BasicBlock* block DEBUGARG(bool logM #endif // DEBUG // Add the non verifiable flag to the compiler - if ((opts.eeFlags & CORJIT_FLG_IMPORT_ONLY) != 0) + if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IMPORT_ONLY)) { tiIsVerifiableCode = FALSE; } @@ -4913,14 +4934,26 @@ GenTreePtr Compiler::impImportLdvirtftn(GenTreePtr thisPtr, } #ifdef FEATURE_READYTORUN_COMPILER - if (opts.IsReadyToRun() && !pCallInfo->exactContextNeedsRuntimeLookup) + if (opts.IsReadyToRun()) { - GenTreeCall* call = gtNewHelperCallNode(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR, TYP_I_IMPL, GTF_EXCEPT, - gtNewArgList(thisPtr)); + if (!pCallInfo->exactContextNeedsRuntimeLookup) + { + GenTreeCall* call = gtNewHelperCallNode(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR, TYP_I_IMPL, GTF_EXCEPT, + gtNewArgList(thisPtr)); - call->setEntryPoint(pCallInfo->codePointerLookup.constLookup); + call->setEntryPoint(pCallInfo->codePointerLookup.constLookup); - return call; + return call; + } + + // We need a runtime lookup. CoreRT has a ReadyToRun helper for that too. + if (IsTargetAbi(CORINFO_CORERT_ABI)) + { + GenTreePtr ctxTree = getRuntimeContextTree(pCallInfo->codePointerLookup.lookupKind.runtimeLookupKind); + + return impReadyToRunHelperToTree(pResolvedToken, CORINFO_HELP_READYTORUN_GENERIC_HANDLE, TYP_I_IMPL, + gtNewArgList(ctxTree), &pCallInfo->codePointerLookup.lookupKind); + } } #endif @@ -5001,7 +5034,7 @@ void Compiler::impImportAndPushBox(CORINFO_RESOLVED_TOKEN* pResolvedToken) if (opts.IsReadyToRun()) { op1 = impReadyToRunHelperToTree(pResolvedToken, CORINFO_HELP_READYTORUN_NEW, TYP_REF); - usingReadyToRunHelper = (op1 != NULL); + usingReadyToRunHelper = (op1 != nullptr); } if (!usingReadyToRunHelper) @@ -5150,7 +5183,7 @@ void Compiler::impImportNewObjArray(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORI CLANG_FORMAT_COMMENT_ANCHOR; #if COR_JIT_EE_VERSION > 460 - if (!opts.IsReadyToRun() || (eeGetEEInfo()->targetAbi == CORINFO_CORERT_ABI)) + if (!opts.IsReadyToRun() || IsTargetAbi(CORINFO_CORERT_ABI)) { LclVarDsc* newObjArrayArgsVar; @@ -5325,61 +5358,110 @@ GenTreePtr Compiler::impTransformThis(GenTreePtr thisPtr, } } -bool Compiler::impCanPInvokeInline(var_types callRetTyp) +//------------------------------------------------------------------------ +// impCanPInvokeInline: examine information from a call to see if the call +// qualifies as an inline pinvoke. +// +// Arguments: +// block - block contaning the call, or for inlinees, block +// containing the call being inlined +// +// Return Value: +// true if this call qualifies as an inline pinvoke, false otherwise +// +// Notes: +// Checks basic legality and then a number of ambient conditions +// where we could pinvoke but choose not to + +bool Compiler::impCanPInvokeInline(BasicBlock* block) { - return impCanPInvokeInlineCallSite(callRetTyp) && getInlinePInvokeEnabled() && (!opts.compDbgCode) && + return impCanPInvokeInlineCallSite(block) && getInlinePInvokeEnabled() && (!opts.compDbgCode) && (compCodeOpt() != SMALL_CODE) && (!opts.compNoPInvokeInlineCB) // profiler is preventing inline pinvoke ; } -// Returns false only if the callsite really cannot be inlined. Ignores global variables -// like debugger, profiler etc. -bool Compiler::impCanPInvokeInlineCallSite(var_types callRetTyp) +//------------------------------------------------------------------------ +// impCanPInvokeInlineSallSite: basic legality checks using information +// from a call to see if the call qualifies as an inline pinvoke. +// +// Arguments: +// block - block contaning the call, or for inlinees, block +// containing the call being inlined +// +// Return Value: +// true if this call can legally qualify as an inline pinvoke, false otherwise +// +// Notes: +// For runtimes that support exception handling interop there are +// restrictions on using inline pinvoke in handler regions. +// +// * We have to disable pinvoke inlining inside of filters because +// in case the main execution (i.e. in the try block) is inside +// unmanaged code, we cannot reuse the inlined stub (we still need +// the original state until we are in the catch handler) +// +// * We disable pinvoke inlining inside handlers since the GSCookie +// is in the inlined Frame (see +// CORINFO_EE_INFO::InlinedCallFrameInfo::offsetOfGSCookie), but +// this would not protect framelets/return-address of handlers. +// +// These restrictions are currently also in place for CoreCLR but +// can be relaxed when coreclr/#8459 is addressed. + +bool Compiler::impCanPInvokeInlineCallSite(BasicBlock* block) { - return - // We have to disable pinvoke inlining inside of filters - // because in case the main execution (i.e. in the try block) is inside - // unmanaged code, we cannot reuse the inlined stub (we still need the - // original state until we are in the catch handler) - (!bbInFilterILRange(compCurBB)) && - // We disable pinvoke inlining inside handlers since the GSCookie is - // in the inlined Frame (see CORINFO_EE_INFO::InlinedCallFrameInfo::offsetOfGSCookie), - // but this would not protect framelets/return-address of handlers. - !compCurBB->hasHndIndex() && #ifdef _TARGET_AMD64_ - // Turns out JIT64 doesn't perform PInvoke inlining inside try regions, here's an excerpt of - // the comment from JIT64 explaining why: - // - //// [VSWhidbey: 611015] - because the jitted code links in the Frame (instead - //// of the stub) we rely on the Frame not being 'active' until inside the - //// stub. This normally happens by the stub setting the return address - //// pointer in the Frame object inside the stub. On a normal return, the - //// return address pointer is zeroed out so the Frame can be safely re-used, - //// but if an exception occurs, nobody zeros out the return address pointer. - //// Thus if we re-used the Frame object, it would go 'active' as soon as we - //// link it into the Frame chain. - //// - //// Technically we only need to disable PInvoke inlining if we're in a - //// handler or if we're - //// in a try body with a catch or filter/except where other non-handler code - //// in this method might run and try to re-use the dirty Frame object. - // - // Now, because of this, the VM actually assumes that in 64 bit we never PInvoke - // inline calls on any EH construct, you can verify that on VM\ExceptionHandling.cpp:203 - // The method responsible for resuming execution is UpdateObjectRefInResumeContextCallback - // you can see how it aligns with JIT64 policy of not inlining PInvoke calls almost right - // at the beginning of the body of the method. - !compCurBB->hasTryIndex() && -#endif - (!impLocAllocOnStack()) && (callRetTyp != TYP_STRUCT); + // On x64, we disable pinvoke inlining inside of try regions. + // Here is the comment from JIT64 explaining why: + // + // [VSWhidbey: 611015] - because the jitted code links in the + // Frame (instead of the stub) we rely on the Frame not being + // 'active' until inside the stub. This normally happens by the + // stub setting the return address pointer in the Frame object + // inside the stub. On a normal return, the return address + // pointer is zeroed out so the Frame can be safely re-used, but + // if an exception occurs, nobody zeros out the return address + // pointer. Thus if we re-used the Frame object, it would go + // 'active' as soon as we link it into the Frame chain. + // + // Technically we only need to disable PInvoke inlining if we're + // in a handler or if we're in a try body with a catch or + // filter/except where other non-handler code in this method + // might run and try to re-use the dirty Frame object. + // + // A desktop test case where this seems to matter is + // jit\jit64\ebvts\mcpp\sources2\ijw\__clrcall\vector_ctor_dtor.02\deldtor_clr.exe + const bool inX64Try = block->hasTryIndex(); +#else + const bool inX64Try = false; +#endif // _TARGET_AMD64_ + + return !inX64Try && !block->hasHndIndex(); } -void Compiler::impCheckForPInvokeCall(GenTreePtr call, - CORINFO_METHOD_HANDLE methHnd, - CORINFO_SIG_INFO* sig, - unsigned mflags) +//------------------------------------------------------------------------ +// impCheckForPInvokeCall examine call to see if it is a pinvoke and if so +// if it can be expressed as an inline pinvoke. +// +// Arguments: +// call - tree for the call +// methHnd - handle for the method being called (may be null) +// sig - signature of the method being called +// mflags - method flags for the method being called +// block - block contaning the call, or for inlinees, block +// containing the call being inlined +// +// Notes: +// Sets GTF_CALL_M_PINVOKE on the call for pinvokes. +// +// Also sets GTF_CALL_UNMANAGED on call for inline pinvokes if the +// call passes a combination of legality and profitabilty checks. +// +// If GTF_CALL_UNMANAGED is set, increments info.compCallUnmanaged + +void Compiler::impCheckForPInvokeCall( + GenTreePtr call, CORINFO_METHOD_HANDLE methHnd, CORINFO_SIG_INFO* sig, unsigned mflags, BasicBlock* block) { - var_types callRetTyp = JITtype2varType(sig->retType); CorInfoUnmanagedCallConv unmanagedCallConv; // If VM flagged it as Pinvoke, flag the call node accordingly @@ -5422,15 +5504,12 @@ void Compiler::impCheckForPInvokeCall(GenTreePtr call, if (opts.compMustInlinePInvokeCalli && methHnd == nullptr) { -#ifdef _TARGET_X86_ - // CALLI in IL stubs must be inlined - assert(impCanPInvokeInlineCallSite(callRetTyp)); - assert(!info.compCompHnd->pInvokeMarshalingRequired(methHnd, sig)); -#endif // _TARGET_X86_ + // Always inline pinvoke. } else { - if (!impCanPInvokeInline(callRetTyp)) + // Check legality and profitability. + if (!impCanPInvokeInline(block)) { return; } @@ -5439,6 +5518,14 @@ void Compiler::impCheckForPInvokeCall(GenTreePtr call, { return; } + + // Size-speed tradeoff: don't use inline pinvoke at rarely + // executed call sites. The non-inline version is more + // compact. + if (block->isRunRarely()) + { + return; + } } JITLOG((LL_INFO1000000, "\nInline a CALLI PINVOKE call from method %s", info.compFullName)); @@ -5446,8 +5533,6 @@ void Compiler::impCheckForPInvokeCall(GenTreePtr call, call->gtFlags |= GTF_CALL_UNMANAGED; info.compCallUnmanaged++; - assert(!compIsForInlining()); - // AMD64 convention is same for native and managed if (unmanagedCallConv == CORINFO_UNMANAGED_CALLCONV_C) { @@ -5736,6 +5821,7 @@ GenTreePtr Compiler::impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN* pResolve break; case CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER: + { #ifdef FEATURE_READYTORUN_COMPILER if (opts.IsReadyToRun()) { @@ -5762,8 +5848,39 @@ GenTreePtr Compiler::impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN* pResolve new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, pFieldInfo->offset, fs)); } break; + } +#if COR_JIT_EE_VERSION > 460 + case CORINFO_FIELD_STATIC_READYTORUN_HELPER: + { +#ifdef FEATURE_READYTORUN_COMPILER + noway_assert(opts.IsReadyToRun()); + CORINFO_LOOKUP_KIND kind = info.compCompHnd->getLocationOfThisType(info.compMethodHnd); + assert(kind.needsRuntimeLookup); + + GenTreePtr ctxTree = getRuntimeContextTree(kind.runtimeLookupKind); + GenTreeArgList* args = gtNewArgList(ctxTree); + + unsigned callFlags = 0; + + if (info.compCompHnd->getClassAttribs(pResolvedToken->hClass) & CORINFO_FLG_BEFOREFIELDINIT) + { + callFlags |= GTF_CALL_HOISTABLE; + } + var_types type = TYP_BYREF; + op1 = gtNewHelperCallNode(CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE, type, callFlags, args); + op1->gtCall.setEntryPoint(pFieldInfo->fieldLookup); + FieldSeqNode* fs = GetFieldSeqStore()->CreateSingleton(pResolvedToken->hField); + op1 = gtNewOperNode(GT_ADD, type, op1, + new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, pFieldInfo->offset, fs)); +#else + unreached(); +#endif // FEATURE_READYTORUN_COMPILER + } + break; +#endif // COR_JIT_EE_VERSION > 460 default: + { if (!(access & CORINFO_ACCESS_ADDRESS)) { // In future, it may be better to just create the right tree here instead of folding it later. @@ -5820,6 +5937,7 @@ GenTreePtr Compiler::impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN* pResolve } } break; + } } if (pFieldInfo->fieldFlags & CORINFO_FLG_FIELD_STATIC_IN_HEAP) @@ -6071,7 +6189,7 @@ bool Compiler::impIsTailCallILPattern(bool tailPrefixed, ((nextOpcode == CEE_NOP) || ((nextOpcode == CEE_POP) && (++cntPop == 1)))); // Next opcode = nop or exactly // one pop seen so far. #else - nextOpcode = (OPCODE)getU1LittleEndian(codeAddrOfNextOpcode); + nextOpcode = (OPCODE)getU1LittleEndian(codeAddrOfNextOpcode); #endif if (isCallPopAndRet) @@ -6845,9 +6963,15 @@ var_types Compiler::impImportCall(OPCODE opcode, //--------------------------- Inline NDirect ------------------------------ - if (!compIsForInlining()) + // For inline cases we technically should look at both the current + // block and the call site block (or just the latter if we've + // fused the EH trees). However the block-related checks pertain to + // EH and we currently won't inline a method with EH. So for + // inlinees, just checking the call site block is sufficient. { - impCheckForPInvokeCall(call, methHnd, sig, mflags); + // New lexical block here to avoid compilation errors because of GOTOs. + BasicBlock* block = compIsForInlining() ? impInlineInfo->iciBlock : compCurBB; + impCheckForPInvokeCall(call, methHnd, sig, mflags, block); } if (call->gtFlags & GTF_CALL_UNMANAGED) @@ -7035,7 +7159,7 @@ var_types Compiler::impImportCall(OPCODE opcode, { instParam = impReadyToRunLookupToTree(&callInfo->instParamLookup, GTF_ICON_CLASS_HDL, exactClassHandle); - if (instParam == NULL) + if (instParam == nullptr) { return callRetTyp; } @@ -7452,10 +7576,6 @@ DONE_CALL: { call = impFixupCallStructReturn(call, sig->retTypeClass); } - else if (varTypeIsLong(callRetTyp)) - { - call = impInitCallLongReturn(call); - } if ((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0) { @@ -7467,6 +7587,13 @@ DONE_CALL: // TODO: Still using the widened type. call = gtNewInlineCandidateReturnExpr(call, genActualType(callRetTyp)); } + else + { + // For non-candidates we must also spill, since we + // might have locals live on the eval stack that this + // call can modify. + impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("non-inline candidate call")); + } } if (!bIntrinsicImported) @@ -7738,42 +7865,6 @@ GenTreePtr Compiler::impFixupCallStructReturn(GenTreePtr call, CORINFO_CLASS_HAN return call; } -//------------------------------------------------------------------------------------- -// impInitCallLongReturn: -// Initialize the ReturnTypDesc for a call that returns a TYP_LONG -// -// Arguments: -// call - GT_CALL GenTree node -// -// Return Value: -// Returns new GenTree node after initializing the ReturnTypeDesc of call node -// -GenTreePtr Compiler::impInitCallLongReturn(GenTreePtr call) -{ - assert(call->gtOper == GT_CALL); - -#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) - // LEGACY_BACKEND does not use multi reg returns for calls with long return types - - if (varTypeIsLong(call)) - { - GenTreeCall* callNode = call->AsCall(); - - // The return type will remain as the incoming long type - callNode->gtReturnType = call->gtType; - - // Initialize Return type descriptor of call node - ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc(); - retTypeDesc->InitializeLongReturnType(this); - - // must be a long returned in two registers - assert(retTypeDesc->GetReturnRegCount() == 2); - } -#endif // _TARGET_X86_ && !LEGACY_BACKEND - - return call; -} - /***************************************************************************** For struct return values, re-type the operand in the case where the ABI does not use a struct return buffer @@ -7804,6 +7895,9 @@ GenTreePtr Compiler::impFixupStructReturnType(GenTreePtr op, CORINFO_CLASS_HANDL unsigned lclNum = op->gtLclVarCommon.gtLclNum; lvaTable[lclNum].lvIsMultiRegRet = true; + // TODO-1stClassStructs: Handle constant propagation and CSE-ing of multireg returns. + op->gtFlags |= GTF_DONT_CSE; + return op; } @@ -7828,6 +7922,10 @@ GenTreePtr Compiler::impFixupStructReturnType(GenTreePtr op, CORINFO_CLASS_HANDL unsigned lclNum = op->gtLclVarCommon.gtLclNum; // Make sure this struct type stays as struct so that we can return it as an HFA lvaTable[lclNum].lvIsMultiRegRet = true; + + // TODO-1stClassStructs: Handle constant propagation and CSE-ing of multireg returns. + op->gtFlags |= GTF_DONT_CSE; + return op; } @@ -7860,6 +7958,10 @@ GenTreePtr Compiler::impFixupStructReturnType(GenTreePtr op, CORINFO_CLASS_HANDL // Make sure this struct type is not struct promoted lvaTable[lclNum].lvIsMultiRegRet = true; + + // TODO-1stClassStructs: Handle constant propagation and CSE-ing of multireg returns. + op->gtFlags |= GTF_DONT_CSE; + return op; } @@ -9311,8 +9413,6 @@ void Compiler::impImportBlockCode(BasicBlock* block) opcodeOffs = (IL_OFFSET)(codeAddr - info.compCode); -#if defined(DEBUGGING_SUPPORT) || defined(DEBUG) - #ifndef DEBUG if (opts.compDbgInfo) #endif @@ -9424,8 +9524,6 @@ void Compiler::impImportBlockCode(BasicBlock* block) } } -#endif // defined(DEBUGGING_SUPPORT) || defined(DEBUG) - CORINFO_CLASS_HANDLE clsHnd = DUMMY_INIT(NULL); CORINFO_CLASS_HANDLE ldelemClsHnd = DUMMY_INIT(NULL); CORINFO_CLASS_HANDLE stelemClsHnd = DUMMY_INIT(NULL); @@ -9515,6 +9613,14 @@ void Compiler::impImportBlockCode(BasicBlock* block) SPILL_APPEND: + // We need to call impSpillLclRefs() for a struct type lclVar. + // This is done for non-block assignments in the handling of stloc. + if ((op1->OperGet() == GT_ASG) && varTypeIsStruct(op1->gtOp.gtOp1) && + (op1->gtOp.gtOp1->gtOper == GT_LCL_VAR)) + { + impSpillLclRefs(op1->gtOp.gtOp1->AsLclVarCommon()->gtLclNum); + } + /* Append 'op1' to the list of statements */ impAppendTree(op1, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs); goto DONE_APPEND; @@ -11087,8 +11193,6 @@ void Compiler::impImportBlockCode(BasicBlock* block) COND_JUMP: - seenConditionalJump = true; - /* Fold comparison if we can */ op1 = gtFoldExpr(op1); @@ -12328,14 +12432,12 @@ void Compiler::impImportBlockCode(BasicBlock* block) // At present this can only be String else if (clsFlags & CORINFO_FLG_VAROBJSIZE) { -#if COR_JIT_EE_VERSION > 460 - if (eeGetEEInfo()->targetAbi == CORINFO_CORERT_ABI) + if (IsTargetAbi(CORINFO_CORERT_ABI)) { // The dummy argument does not exist in CoreRT newObjThisPtr = nullptr; } else -#endif { // This is the case for variable-sized objects that are not // arrays. In this case, call the constructor with a null 'this' @@ -12368,6 +12470,33 @@ void Compiler::impImportBlockCode(BasicBlock* block) // The lookup of the code pointer will be handled by CALL in this case if (clsFlags & CORINFO_FLG_VALUECLASS) { + if (compIsForInlining()) + { + // If value class has GC fields, inform the inliner. It may choose to + // bail out on the inline. + DWORD typeFlags = info.compCompHnd->getClassAttribs(resolvedToken.hClass); + if ((typeFlags & CORINFO_FLG_CONTAINS_GC_PTR) != 0) + { + compInlineResult->Note(InlineObservation::CALLEE_HAS_GC_STRUCT); + if (compInlineResult->IsFailure()) + { + return; + } + + // Do further notification in the case where the call site is rare; + // some policies do not track the relative hotness of call sites for + // "always" inline cases. + if (impInlineInfo->iciBlock->isRunRarely()) + { + compInlineResult->Note(InlineObservation::CALLSITE_RARE_GC_STRUCT); + if (compInlineResult->IsFailure()) + { + return; + } + } + } + } + CorInfoType jitTyp = info.compCompHnd->asCorInfoType(resolvedToken.hClass); unsigned size = info.compCompHnd->getClassSize(resolvedToken.hClass); @@ -12403,7 +12532,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) if (opts.IsReadyToRun()) { op1 = impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_NEW, TYP_REF); - usingReadyToRunHelper = (op1 != NULL); + usingReadyToRunHelper = (op1 != nullptr); } if (!usingReadyToRunHelper) @@ -12503,6 +12632,10 @@ void Compiler::impImportBlockCode(BasicBlock* block) if (compIsForInlining()) { + if (compDonotInline()) + { + return; + } // We rule out inlinees with explicit tail calls in fgMakeBasicBlocks. assert((prefixFlags & PREFIX_TAILCALL_EXPLICIT) == 0); } @@ -12696,7 +12829,9 @@ void Compiler::impImportBlockCode(BasicBlock* block) return; case CORINFO_FIELD_STATIC_GENERICS_STATIC_HELPER: - +#if COR_JIT_EE_VERSION > 460 + case CORINFO_FIELD_STATIC_READYTORUN_HELPER: +#endif /* We may be able to inline the field accessors in specific instantiations of generic * methods */ compInlineResult->NoteFatal(InlineObservation::CALLSITE_LDFLD_NEEDS_HELPER); @@ -12828,7 +12963,9 @@ void Compiler::impImportBlockCode(BasicBlock* block) #ifdef FEATURE_READYTORUN_COMPILER if (fieldInfo.fieldAccessor == CORINFO_FIELD_INSTANCE_WITH_BASE) + { op1->gtField.gtFieldLookup = fieldInfo.fieldLookup; + } #endif op1->gtFlags |= (obj->gtFlags & GTF_GLOB_EFFECT); @@ -12925,6 +13062,9 @@ void Compiler::impImportBlockCode(BasicBlock* block) case CORINFO_FIELD_STATIC_RVA_ADDRESS: case CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER: case CORINFO_FIELD_STATIC_GENERICS_STATIC_HELPER: +#if COR_JIT_EE_VERSION > 460 + case CORINFO_FIELD_STATIC_READYTORUN_HELPER: +#endif op1 = impImportStaticFieldAccess(&resolvedToken, (CORINFO_ACCESS_FLAGS)aflags, &fieldInfo, lclTyp); break; @@ -13068,6 +13208,9 @@ void Compiler::impImportBlockCode(BasicBlock* block) return; case CORINFO_FIELD_STATIC_GENERICS_STATIC_HELPER: +#if COR_JIT_EE_VERSION > 460 + case CORINFO_FIELD_STATIC_READYTORUN_HELPER: +#endif /* We may be able to inline the field accessors in specific instantiations of generic * methods */ @@ -13134,7 +13277,9 @@ void Compiler::impImportBlockCode(BasicBlock* block) #ifdef FEATURE_READYTORUN_COMPILER if (fieldInfo.fieldAccessor == CORINFO_FIELD_INSTANCE_WITH_BASE) + { op1->gtField.gtFieldLookup = fieldInfo.fieldLookup; + } #endif op1->gtFlags |= (obj->gtFlags & GTF_GLOB_EFFECT); @@ -13185,6 +13330,9 @@ void Compiler::impImportBlockCode(BasicBlock* block) case CORINFO_FIELD_STATIC_RVA_ADDRESS: case CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER: case CORINFO_FIELD_STATIC_GENERICS_STATIC_HELPER: +#if COR_JIT_EE_VERSION > 460 + case CORINFO_FIELD_STATIC_READYTORUN_HELPER: +#endif op1 = impImportStaticFieldAccess(&resolvedToken, (CORINFO_ACCESS_FLAGS)aflags, &fieldInfo, lclTyp); break; @@ -13376,7 +13524,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) { op1 = impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_NEWARR_1, TYP_REF, gtNewArgList(op2)); - usingReadyToRunHelper = (op1 != NULL); + usingReadyToRunHelper = (op1 != nullptr); if (!usingReadyToRunHelper) { @@ -13388,9 +13536,11 @@ void Compiler::impImportBlockCode(BasicBlock* block) // Reason: performance (today, we'll always use the slow helper for the R2R generics case) // Need to restore array classes before creating array objects on the heap - op1 = impTokenToHandle(&resolvedToken, NULL, TRUE /*mustRestoreHandle*/); - if (op1 == NULL) // compDonotInline() + op1 = impTokenToHandle(&resolvedToken, nullptr, TRUE /*mustRestoreHandle*/); + if (op1 == nullptr) + { // compDonotInline() return; + } } } @@ -13498,7 +13648,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) GenTreePtr opLookup = impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_ISINSTANCEOF, TYP_REF, gtNewArgList(op1)); - usingReadyToRunHelper = (opLookup != NULL); + usingReadyToRunHelper = (opLookup != nullptr); op1 = (usingReadyToRunHelper ? opLookup : op1); if (!usingReadyToRunHelper) @@ -13510,9 +13660,11 @@ void Compiler::impImportBlockCode(BasicBlock* block) // 3) Perform the 'is instance' check on the input object // Reason: performance (today, we'll always use the slow helper for the R2R generics case) - op2 = impTokenToHandle(&resolvedToken, NULL, FALSE); - if (op2 == NULL) // compDonotInline() + op2 = impTokenToHandle(&resolvedToken, nullptr, FALSE); + if (op2 == nullptr) + { // compDonotInline() return; + } } } @@ -14026,7 +14178,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) { GenTreePtr opLookup = impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_CHKCAST, TYP_REF, gtNewArgList(op1)); - usingReadyToRunHelper = (opLookup != NULL); + usingReadyToRunHelper = (opLookup != nullptr); op1 = (usingReadyToRunHelper ? opLookup : op1); if (!usingReadyToRunHelper) @@ -14038,9 +14190,11 @@ void Compiler::impImportBlockCode(BasicBlock* block) // 3) Check the object on the stack for the type-cast // Reason: performance (today, we'll always use the slow helper for the R2R generics case) - op2 = impTokenToHandle(&resolvedToken, NULL, FALSE); - if (op2 == NULL) // compDonotInline() + op2 = impTokenToHandle(&resolvedToken, nullptr, FALSE); + if (op2 == nullptr) + { // compDonotInline() return; + } } } @@ -14075,20 +14229,6 @@ void Compiler::impImportBlockCode(BasicBlock* block) compInlineResult->NoteFatal(InlineObservation::CALLEE_THROW_WITH_INVALID_STACK); return; } - - /* Don't inline non-void conditionals that have a throw in one of the branches */ - - /* NOTE: If we do allow this, note that we can't simply do a - checkLiveness() to match the liveness at the end of the "then" - and "else" branches of the GT_COLON. The branch with the throw - will keep nothing live, so we should use the liveness at the - end of the non-throw branch. */ - - if (seenConditionalJump && (impInlineInfo->inlineCandidateInfo->fncRetType != TYP_VOID)) - { - compInlineResult->NoteFatal(InlineObservation::CALLSITE_CONDITIONAL_THROW); - return; - } } if (tiVerificationNeeded) @@ -14714,6 +14854,10 @@ GenTreePtr Compiler::impAssignMultiRegTypeToVar(GenTreePtr op, CORINFO_CLASS_HAN unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for multireg return.")); impAssignTempGen(tmpNum, op, hClass, (unsigned)CHECK_SPILL_NONE); GenTreePtr ret = gtNewLclvNode(tmpNum, op->gtType); + + // TODO-1stClassStructs: Handle constant propagation and CSE-ing of multireg returns. + ret->gtFlags |= GTF_DONT_CSE; + assert(IsMultiRegReturnedType(hClass)); // Mark the var so that fields are not promoted and stay together. @@ -14852,7 +14996,8 @@ bool Compiler::impReturnInstruction(BasicBlock* block, int prefixFlags, OPCODE& if (lvaInlineeReturnSpillTemp != BAD_VAR_NUM) { - assert(info.compRetNativeType != TYP_VOID && fgMoreThanOneReturnBlock()); + assert(info.compRetNativeType != TYP_VOID && + (fgMoreThanOneReturnBlock() || impInlineInfo->hasPinnedLocals)); // This is a bit of a workaround... // If we are inlining a call that returns a struct, where the actual "native" return type is @@ -14943,7 +15088,7 @@ bool Compiler::impReturnInstruction(BasicBlock* block, int prefixFlags, OPCODE& // in this case we have to insert multiple struct copies to the temp // and the retexpr is just the temp. assert(info.compRetNativeType != TYP_VOID); - assert(fgMoreThanOneReturnBlock()); + assert(fgMoreThanOneReturnBlock() || impInlineInfo->hasPinnedLocals); impAssignTempGen(lvaInlineeReturnSpillTemp, op2, se.seTypeInfo.GetClassHandle(), (unsigned)CHECK_SPILL_ALL); @@ -16469,7 +16614,7 @@ void Compiler::impImport(BasicBlock* method) // coupled with the JIT64 IL Verification logic. Look inside verHandleVerificationFailure // method for further explanation on why we raise this exception instead of making the jitted // code throw the verification exception during execution. - if (tiVerificationNeeded && (opts.eeFlags & CORJIT_FLG_IMPORT_ONLY) != 0) + if (tiVerificationNeeded && opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IMPORT_ONLY)) { BADCODE("Basic block marked as not verifiable"); } @@ -16989,18 +17134,10 @@ void Compiler::impInlineRecordArgInfo(InlineInfo* pInlineInfo, #endif // FEATURE_SIMD } - if (curArgVal->gtFlags & GTF_ORDER_SIDEEFF) - { - // Right now impInlineSpillLclRefs and impInlineSpillGlobEffects don't take - // into account special side effects, so we disallow them during inlining. - inlineResult->NoteFatal(InlineObservation::CALLSITE_ARG_HAS_SIDE_EFFECT); - return; - } - - if (curArgVal->gtFlags & GTF_GLOB_EFFECT) + if (curArgVal->gtFlags & GTF_ALL_EFFECT) { inlCurArgInfo->argHasGlobRef = (curArgVal->gtFlags & GTF_GLOB_REF) != 0; - inlCurArgInfo->argHasSideEff = (curArgVal->gtFlags & GTF_SIDE_EFFECT) != 0; + inlCurArgInfo->argHasSideEff = (curArgVal->gtFlags & (GTF_ALL_EFFECT & ~GTF_GLOB_REF)) != 0; } if (curArgVal->gtOper == GT_LCL_VAR) @@ -17251,6 +17388,7 @@ void Compiler::impInlineInitVars(InlineInfo* pInlineInfo) var_types sigType = (var_types)eeGetArgType(argLst, &methInfo->args); lclVarInfo[i].lclVerTypeInfo = verParseArgSigToTypeInfo(&methInfo->args, argLst); + #ifdef FEATURE_SIMD if ((!foundSIMDType || (sigType == TYP_STRUCT)) && isSIMDClass(&(lclVarInfo[i].lclVerTypeInfo))) { @@ -17377,16 +17515,49 @@ void Compiler::impInlineInitVars(InlineInfo* pInlineInfo) var_types type = (var_types)eeGetArgType(localsSig, &methInfo->locals, &isPinned); lclVarInfo[i + argCnt].lclHasLdlocaOp = false; + lclVarInfo[i + argCnt].lclIsPinned = isPinned; lclVarInfo[i + argCnt].lclTypeInfo = type; if (isPinned) { - inlineResult->NoteFatal(InlineObservation::CALLEE_HAS_PINNED_LOCALS); - return; + // Pinned locals may cause inlines to fail. + inlineResult->Note(InlineObservation::CALLEE_HAS_PINNED_LOCALS); + if (inlineResult->IsFailure()) + { + return; + } } lclVarInfo[i + argCnt].lclVerTypeInfo = verParseArgSigToTypeInfo(&methInfo->locals, localsSig); + // If this local is a struct type with GC fields, inform the inliner. It may choose to bail + // out on the inline. + if (type == TYP_STRUCT) + { + CORINFO_CLASS_HANDLE lclHandle = lclVarInfo[i + argCnt].lclVerTypeInfo.GetClassHandle(); + DWORD typeFlags = info.compCompHnd->getClassAttribs(lclHandle); + if ((typeFlags & CORINFO_FLG_CONTAINS_GC_PTR) != 0) + { + inlineResult->Note(InlineObservation::CALLEE_HAS_GC_STRUCT); + if (inlineResult->IsFailure()) + { + return; + } + + // Do further notification in the case where the call site is rare; some policies do + // not track the relative hotness of call sites for "always" inline cases. + if (pInlineInfo->iciBlock->isRunRarely()) + { + inlineResult->Note(InlineObservation::CALLSITE_RARE_GC_STRUCT); + if (inlineResult->IsFailure()) + { + + return; + } + } + } + } + localsSig = info.compCompHnd->getArgNext(localsSig); #ifdef FEATURE_SIMD @@ -17431,6 +17602,28 @@ unsigned Compiler::impInlineFetchLocal(unsigned lclNum DEBUGARG(const char* reas lvaTable[tmpNum].lvHasLdAddrOp = 1; } + if (impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclIsPinned) + { + lvaTable[tmpNum].lvPinned = 1; + + if (!impInlineInfo->hasPinnedLocals) + { + // If the inlinee returns a value, use a spill temp + // for the return value to ensure that even in case + // where the return expression refers to one of the + // pinned locals, we can unpin the local right after + // the inlined method body. + if ((info.compRetNativeType != TYP_VOID) && (lvaInlineeReturnSpillTemp == BAD_VAR_NUM)) + { + lvaInlineeReturnSpillTemp = + lvaGrabTemp(false DEBUGARG("Inline candidate pinned local return spill temp")); + lvaTable[lvaInlineeReturnSpillTemp].lvType = info.compRetNativeType; + } + } + + impInlineInfo->hasPinnedLocals = true; + } + if (impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclVerTypeInfo.IsStruct()) { if (varTypeIsStruct(lclTyp)) @@ -17895,10 +18088,17 @@ void Compiler::impMarkInlineCandidate(GenTreePtr callNode, bool Compiler::IsTargetIntrinsic(CorInfoIntrinsics intrinsicId) { -#if defined(_TARGET_AMD64_) +#if defined(_TARGET_AMD64_) || (defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)) switch (intrinsicId) { // Amd64 only has SSE2 instruction to directly compute sqrt/abs. + // + // TODO: Because the x86 backend only targets SSE for floating-point code, + // it does not treat Sine, Cosine, or Round as intrinsics (JIT32 + // implemented those intrinsics as x87 instructions). If this poses + // a CQ problem, it may be necessary to change the implementation of + // the helper calls to decrease call overhead or switch back to the + // x87 instructions. This is tracked by #7097. case CORINFO_INTRINSIC_Sqrt: case CORINFO_INTRINSIC_Abs: return true; diff --git a/src/jit/inline.cpp b/src/jit/inline.cpp index deccc0e84b..05fcf1c6b9 100644 --- a/src/jit/inline.cpp +++ b/src/jit/inline.cpp @@ -447,7 +447,7 @@ void InlineContext::DumpData(unsigned indent) else if (m_Success) { const char* inlineReason = InlGetObservationString(m_Observation); - printf("%*s%u,\"%s\",\"%s\"", indent, "", m_Ordinal, inlineReason, calleeName); + printf("%*s%u,\"%s\",\"%s\",", indent, "", m_Ordinal, inlineReason, calleeName); m_Policy->DumpData(jitstdout); printf("\n"); } @@ -500,14 +500,25 @@ void InlineContext::DumpXml(FILE* file, unsigned indent) fprintf(file, "%*s<Offset>%u</Offset>\n", indent + 2, "", offset); fprintf(file, "%*s<Reason>%s</Reason>\n", indent + 2, "", inlineReason); - // Optionally, dump data about the last inline - if ((JitConfig.JitInlineDumpData() != 0) && (this == m_InlineStrategy->GetLastContext())) + // Optionally, dump data about the inline + const int dumpDataSetting = JitConfig.JitInlineDumpData(); + + // JitInlineDumpData=1 -- dump data plus deltas for last inline only + if ((dumpDataSetting == 1) && (this == m_InlineStrategy->GetLastContext())) { fprintf(file, "%*s<Data>", indent + 2, ""); m_InlineStrategy->DumpDataContents(file); fprintf(file, "</Data>\n"); } + // JitInlineDumpData=2 -- dump data for all inlines, no deltas + if ((dumpDataSetting == 2) && (m_Policy != nullptr)) + { + fprintf(file, "%*s<Data>", indent + 2, ""); + m_Policy->DumpData(file); + fprintf(file, "</Data>\n"); + } + newIndent = indent + 2; } @@ -646,10 +657,11 @@ void InlineResult::Report() m_Reported = true; #ifdef DEBUG - const char* callee = nullptr; + const char* callee = nullptr; + const bool showInlines = (JitConfig.JitPrintInlinedMethods() == 1); // Optionally dump the result - if (VERBOSE) + if (VERBOSE || showInlines) { const char* format = "INLINER: during '%s' result '%s' reason '%s' for '%s' calling '%s'\n"; const char* caller = (m_Caller == nullptr) ? "n/a" : m_RootCompiler->eeGetMethodFullName(m_Caller); @@ -689,12 +701,18 @@ void InlineResult::Report() #ifdef DEBUG + const char* obsString = InlGetObservationString(obs); + if (VERBOSE) { - const char* obsString = InlGetObservationString(obs); JITDUMP("\nINLINER: Marking %s as NOINLINE because of %s\n", callee, obsString); } + if (showInlines) + { + printf("Marking %s as NOINLINE because of %s\n", callee, obsString); + } + #endif // DEBUG COMP_HANDLE comp = m_RootCompiler->info.compCompHnd; @@ -740,6 +758,7 @@ InlineStrategy::InlineStrategy(Compiler* compiler) , m_HasForceViaDiscretionary(false) #if defined(DEBUG) || defined(INLINE_DATA) , m_MethodXmlFilePosition(0) + , m_Random(nullptr) #endif // defined(DEBUG) || defined(INLINE_DATA) { @@ -1155,10 +1174,10 @@ InlineContext* InlineStrategy::NewRoot() InlineContext* InlineStrategy::NewSuccess(InlineInfo* inlineInfo) { InlineContext* calleeContext = new (m_Compiler, CMK_Inlining) InlineContext(this); - GenTree* stmt = inlineInfo->iciStmt; + GenTreeStmt* stmt = inlineInfo->iciStmt; BYTE* calleeIL = inlineInfo->inlineCandidateInfo->methInfo.ILCode; unsigned calleeILSize = inlineInfo->inlineCandidateInfo->methInfo.ILCodeSize; - InlineContext* parentContext = stmt->gtStmt.gtInlineContext; + InlineContext* parentContext = stmt->gtInlineContext; noway_assert(parentContext != nullptr); @@ -1213,35 +1232,22 @@ InlineContext* InlineStrategy::NewSuccess(InlineInfo* inlineInfo) // A new InlineContext for diagnostic purposes, or nullptr if // the desired context could not be created. -InlineContext* InlineStrategy::NewFailure(GenTree* stmt, InlineResult* inlineResult) +InlineContext* InlineStrategy::NewFailure(GenTreeStmt* stmt, InlineResult* inlineResult) { - // Check for a parent context first. We may insert new statements - // between the caller and callee that do not pick up either's - // context, and these statements may have calls that we later - // examine and fail to inline. - // - // See fgInlinePrependStatements for examples. - - InlineContext* parentContext = stmt->gtStmt.gtInlineContext; - - if (parentContext == nullptr) - { - // Assume for now this is a failure to inline a call in a - // statement inserted between caller and callee. Just ignore - // it for the time being. - - return nullptr; - } - + // Check for a parent context first. We should now have a parent + // context for all statements. + InlineContext* parentContext = stmt->gtInlineContext; + assert(parentContext != nullptr); InlineContext* failedContext = new (m_Compiler, CMK_Inlining) InlineContext(this); - failedContext->m_Parent = parentContext; - // Push on front here will put siblings in reverse lexical - // order which we undo in the dumper + // Pushing the new context on the front of the parent child list + // will put siblings in reverse lexical order which we undo in the + // dumper. + failedContext->m_Parent = parentContext; failedContext->m_Sibling = parentContext->m_Child; parentContext->m_Child = failedContext; failedContext->m_Child = nullptr; - failedContext->m_Offset = stmt->AsStmt()->gtStmtILoffsx; + failedContext->m_Offset = stmt->gtStmtILoffsx; failedContext->m_Observation = inlineResult->GetObservation(); failedContext->m_Callee = inlineResult->GetCallee(); failedContext->m_Success = false; @@ -1354,7 +1360,7 @@ void InlineStrategy::DumpDataEnsurePolicyIsSet() // successful policy, so fake one up. if (m_LastSuccessfulPolicy == nullptr) { - const bool isPrejitRoot = (opts.eeFlags & CORJIT_FLG_PREJIT) != 0; + const bool isPrejitRoot = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT); m_LastSuccessfulPolicy = InlinePolicy::GetPolicy(m_Compiler, isPrejitRoot); // Add in a bit of data.... @@ -1388,7 +1394,7 @@ void InlineStrategy::DumpDataHeader(FILE* file) void InlineStrategy::DumpDataSchema(FILE* file) { DumpDataEnsurePolicyIsSet(); - fprintf(file, "Method,Version,HotSize,ColdSize,JitTime,SizeEstimate,TimeEstimate"); + fprintf(file, "Method,Version,HotSize,ColdSize,JitTime,SizeEstimate,TimeEstimate,"); m_LastSuccessfulPolicy->DumpSchema(file); } @@ -1424,7 +1430,7 @@ void InlineStrategy::DumpDataContents(FILE* file) microsecondsSpentJitting = (unsigned)((counts / countsPerSec) * 1000 * 1000); } - fprintf(file, "%08X,%u,%u,%u,%u,%d,%d", currentMethodToken, m_InlineCount, info.compTotalHotCodeSize, + fprintf(file, "%08X,%u,%u,%u,%u,%d,%d,", currentMethodToken, m_InlineCount, info.compTotalHotCodeSize, info.compTotalColdCodeSize, microsecondsSpentJitting, m_CurrentSizeEstimate / 10, m_CurrentTimeEstimate); m_LastSuccessfulPolicy->DumpData(file); } @@ -1461,10 +1467,22 @@ void InlineStrategy::DumpXml(FILE* file, unsigned indent) fprintf(file, "<InlineForest>\n"); fprintf(file, "<Policy>%s</Policy>\n", m_LastSuccessfulPolicy->GetName()); - if (JitConfig.JitInlineDumpData() != 0) + const int dumpDataSetting = JitConfig.JitInlineDumpData(); + if (dumpDataSetting != 0) { fprintf(file, "<DataSchema>"); - DumpDataSchema(file); + + if (dumpDataSetting == 1) + { + // JitInlineDumpData=1 -- dump schema for data plus deltas + DumpDataSchema(file); + } + else if (dumpDataSetting == 2) + { + // JitInlineDumpData=2 -- dump schema for data only + m_LastSuccessfulPolicy->DumpSchema(file); + } + fprintf(file, "</DataSchema>\n"); } @@ -1484,7 +1502,7 @@ void InlineStrategy::DumpXml(FILE* file, unsigned indent) const Compiler::Info& info = m_Compiler->info; const Compiler::Options& opts = m_Compiler->opts; - const bool isPrejitRoot = (opts.eeFlags & CORJIT_FLG_PREJIT) != 0; + const bool isPrejitRoot = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT); const bool isForceInline = (info.compFlags & CORINFO_FLG_FORCEINLINE) != 0; // We'd really like the method identifier to be unique and @@ -1589,6 +1607,52 @@ void InlineStrategy::FinalizeXml(FILE* file) ReplayPolicy::FinalizeXml(); } +//------------------------------------------------------------------------ +// GetRandom: setup or access random state +// +// Return Value: +// New or pre-existing random state. +// +// Notes: +// Random state is kept per jit compilation request. Seed is partially +// specified externally (via stress or policy setting) and partially +// specified internally via method hash. + +CLRRandom* InlineStrategy::GetRandom() +{ + if (m_Random == nullptr) + { + int externalSeed = 0; + +#ifdef DEBUG + + if (m_Compiler->compRandomInlineStress()) + { + externalSeed = getJitStressLevel(); + } + +#endif // DEBUG + + int randomPolicyFlag = JitConfig.JitInlinePolicyRandom(); + if (randomPolicyFlag != 0) + { + externalSeed = randomPolicyFlag; + } + + int internalSeed = m_Compiler->info.compMethodHash(); + + assert(externalSeed != 0); + assert(internalSeed != 0); + + int seed = externalSeed ^ internalSeed; + + m_Random = new (m_Compiler, CMK_Inlining) CLRRandom(); + m_Random->Init(seed); + } + + return m_Random; +} + #endif // defined(DEBUG) || defined(INLINE_DATA) //------------------------------------------------------------------------ diff --git a/src/jit/inline.def b/src/jit/inline.def index 2c933fb8a9..ff0b21100e 100644 --- a/src/jit/inline.def +++ b/src/jit/inline.def @@ -40,7 +40,6 @@ INLINE_OBSERVATION(HAS_MANAGED_VARARGS, bool, "managed varargs", INLINE_OBSERVATION(HAS_NATIVE_VARARGS, bool, "native varargs", FATAL, CALLEE) INLINE_OBSERVATION(HAS_NO_BODY, bool, "has no body", FATAL, CALLEE) INLINE_OBSERVATION(HAS_NULL_FOR_LDELEM, bool, "has null pointer for ldelem", FATAL, CALLEE) -INLINE_OBSERVATION(HAS_PINNED_LOCALS, bool, "has pinned locals", FATAL, CALLEE) INLINE_OBSERVATION(IS_ARRAY_METHOD, bool, "is array method", FATAL, CALLEE) INLINE_OBSERVATION(IS_GENERIC_VIRTUAL, bool, "generic virtual", FATAL, CALLEE) INLINE_OBSERVATION(IS_JIT_NOINLINE, bool, "noinline per JitNoinline", FATAL, CALLEE) @@ -78,6 +77,8 @@ INLINE_OBSERVATION(BELOW_ALWAYS_INLINE_SIZE, bool, "below ALWAYS_INLINE size" INLINE_OBSERVATION(CLASS_PROMOTABLE, bool, "promotable value class", INFORMATION, CALLEE) INLINE_OBSERVATION(DOES_NOT_RETURN, bool, "does not return", INFORMATION, CALLEE) INLINE_OBSERVATION(END_OPCODE_SCAN, bool, "done looking at opcodes", INFORMATION, CALLEE) +INLINE_OBSERVATION(HAS_GC_STRUCT, bool, "has gc field in struct local", INFORMATION, CALLEE) +INLINE_OBSERVATION(HAS_PINNED_LOCALS, bool, "has pinned locals", INFORMATION, CALLEE) INLINE_OBSERVATION(HAS_SIMD, bool, "has SIMD arg, local, or ret", INFORMATION, CALLEE) INLINE_OBSERVATION(HAS_SWITCH, bool, "has switch", INFORMATION, CALLEE) INLINE_OBSERVATION(IL_CODE_SIZE, int, "number of bytes of IL", INFORMATION, CALLEE) @@ -112,7 +113,6 @@ INLINE_OBSERVATION(HAS_NEWOBJ, bool, "has newobj", // ------ Call Site Correctness ------- INLINE_OBSERVATION(ARG_HAS_NULL_THIS, bool, "this pointer argument is null", FATAL, CALLSITE) -INLINE_OBSERVATION(ARG_HAS_SIDE_EFFECT, bool, "argument has side effect", FATAL, CALLSITE) INLINE_OBSERVATION(ARG_IS_MKREFANY, bool, "argument is mkrefany", FATAL, CALLSITE) INLINE_OBSERVATION(ARG_NO_BASH_TO_INT, bool, "argument can't bash to int", FATAL, CALLSITE) INLINE_OBSERVATION(ARG_NO_BASH_TO_REF, bool, "argument can't bash to ref", FATAL, CALLSITE) @@ -122,7 +122,6 @@ INLINE_OBSERVATION(CANT_EMBED_VARARGS_COOKIE, bool, "can't embed varargs cooki INLINE_OBSERVATION(CLASS_INIT_FAILURE_SPEC, bool, "speculative class init failed", FATAL, CALLSITE) INLINE_OBSERVATION(COMPILATION_ERROR, bool, "compilation error", FATAL, CALLSITE) INLINE_OBSERVATION(COMPILATION_FAILURE, bool, "failed to compile", FATAL, CALLSITE) -INLINE_OBSERVATION(CONDITIONAL_THROW, bool, "conditional throw", FATAL, CALLSITE) INLINE_OBSERVATION(CROSS_BOUNDARY_CALLI, bool, "cross-boundary calli", FATAL, CALLSITE) INLINE_OBSERVATION(CROSS_BOUNDARY_SECURITY, bool, "cross-boundary security check", FATAL, CALLSITE) INLINE_OBSERVATION(EXCEEDS_THRESHOLD, bool, "exceeds profit threshold", FATAL, CALLSITE) @@ -140,7 +139,7 @@ INLINE_OBSERVATION(IS_TOO_DEEP, bool, "too deep", INLINE_OBSERVATION(IS_VIRTUAL, bool, "virtual", FATAL, CALLSITE) INLINE_OBSERVATION(IS_VM_NOINLINE, bool, "noinline per VM", FATAL, CALLSITE) INLINE_OBSERVATION(IS_WITHIN_CATCH, bool, "within catch region", FATAL, CALLSITE) -INLINE_OBSERVATION(IS_WITHIN_FILTER, bool, "within filterregion", FATAL, CALLSITE) +INLINE_OBSERVATION(IS_WITHIN_FILTER, bool, "within filter region", FATAL, CALLSITE) INLINE_OBSERVATION(LDARGA_NOT_LOCAL_VAR, bool, "ldarga not on local var", FATAL, CALLSITE) INLINE_OBSERVATION(LDFLD_NEEDS_HELPER, bool, "ldfld needs helper", FATAL, CALLSITE) INLINE_OBSERVATION(LDVIRTFN_ON_NON_VIRTUAL, bool, "ldvirtfn on non-virtual", FATAL, CALLSITE) @@ -149,6 +148,7 @@ INLINE_OBSERVATION(NOT_CANDIDATE, bool, "not inline candidate", INLINE_OBSERVATION(NOT_PROFITABLE_INLINE, bool, "unprofitable inline", FATAL, CALLSITE) INLINE_OBSERVATION(OVER_BUDGET, bool, "inline exceeds budget", FATAL, CALLSITE) INLINE_OBSERVATION(OVER_INLINE_LIMIT, bool, "limited by JitInlineLimit", FATAL, CALLSITE) +INLINE_OBSERVATION(PIN_IN_TRY_REGION, bool, "within try region, pinned", FATAL, CALLSITE) INLINE_OBSERVATION(RANDOM_REJECT, bool, "random reject", FATAL, CALLSITE) INLINE_OBSERVATION(REQUIRES_SAME_THIS, bool, "requires same this", FATAL, CALLSITE) INLINE_OBSERVATION(RETURN_TYPE_MISMATCH, bool, "return type mismatch", FATAL, CALLSITE) @@ -157,12 +157,14 @@ INLINE_OBSERVATION(TOO_MANY_LOCALS, bool, "too many locals", // ------ Call Site Performance ------- +INLINE_OBSERVATION(RARE_GC_STRUCT, bool, "rarely called, has gc struct", INFORMATION, CALLSITE) // ------ Call Site Information ------- INLINE_OBSERVATION(CONSTANT_ARG_FEEDS_TEST, bool, "constant argument feeds test", INFORMATION, CALLSITE) INLINE_OBSERVATION(DEPTH, int, "depth", INFORMATION, CALLSITE) INLINE_OBSERVATION(FREQUENCY, int, "rough call site frequency", INFORMATION, CALLSITE) +INLINE_OBSERVATION(IN_TRY_REGION, bool, "call site in try region", INFORMATION, CALLSITE) INLINE_OBSERVATION(IS_PROFITABLE_INLINE, bool, "profitable inline", INFORMATION, CALLSITE) INLINE_OBSERVATION(IS_SAME_THIS, bool, "same this as root caller", INFORMATION, CALLSITE) INLINE_OBSERVATION(IS_SIZE_DECREASING_INLINE, bool, "size decreasing inline", INFORMATION, CALLSITE) diff --git a/src/jit/inline.h b/src/jit/inline.h index e3d5750754..2634ebe6fa 100644 --- a/src/jit/inline.h +++ b/src/jit/inline.h @@ -85,11 +85,6 @@ const unsigned int MAX_INL_ARGS = 10; // does not include obj pointer const unsigned int MAX_INL_LCLS = 8; #endif // LEGACY_BACKEND -// Flags lost during inlining. - -#define CORJIT_FLG_LOST_WHEN_INLINING \ - (CORJIT_FLG_BBOPT | CORJIT_FLG_BBINSTR | CORJIT_FLG_PROF_ENTERLEAVE | CORJIT_FLG_DEBUG_EnC | CORJIT_FLG_DEBUG_INFO) - // Forward declarations class InlineStrategy; @@ -542,6 +537,7 @@ struct InlLclVarInfo var_types lclTypeInfo; typeInfo lclVerTypeInfo; bool lclHasLdlocaOp; // Is there LDLOCA(s) operation on this argument? + bool lclIsPinned; }; // InlineInfo provides detailed information about a particular inline candidate. @@ -568,12 +564,13 @@ struct InlineInfo InlLclVarInfo lclVarInfo[MAX_INL_LCLS + MAX_INL_ARGS + 1]; // type information from local sig bool thisDereferencedFirst; + bool hasPinnedLocals; #ifdef FEATURE_SIMD bool hasSIMDTypeArgLocalOrReturn; #endif // FEATURE_SIMD GenTreeCall* iciCall; // The GT_CALL node to be inlined. - GenTree* iciStmt; // The statement iciCall is in. + GenTreeStmt* iciStmt; // The statement iciCall is in. BasicBlock* iciBlock; // The basic block iciStmt is in. }; @@ -706,7 +703,7 @@ public: InlineContext* NewSuccess(InlineInfo* inlineInfo); // Create context for a failing inline. - InlineContext* NewFailure(GenTree* stmt, InlineResult* inlineResult); + InlineContext* NewFailure(GenTreeStmt* stmt, InlineResult* inlineResult); // Compiler associated with this strategy Compiler* GetCompiler() const @@ -823,6 +820,9 @@ public: m_MethodXmlFilePosition = val; } + // Set up or access random state (for use by RandomPolicy) + CLRRandom* GetRandom(); + #endif // defined(DEBUG) || defined(INLINE_DATA) // Some inline limit values @@ -887,7 +887,8 @@ private: bool m_HasForceViaDiscretionary; #if defined(DEBUG) || defined(INLINE_DATA) - long m_MethodXmlFilePosition; + long m_MethodXmlFilePosition; + CLRRandom* m_Random; #endif // defined(DEBUG) || defined(INLINE_DATA) }; diff --git a/src/jit/inlinepolicy.cpp b/src/jit/inlinepolicy.cpp index f80f3a5ec0..61e70c3ed4 100644 --- a/src/jit/inlinepolicy.cpp +++ b/src/jit/inlinepolicy.cpp @@ -27,22 +27,22 @@ InlinePolicy* InlinePolicy::GetPolicy(Compiler* compiler, bool isPrejitRoot) { -#ifdef DEBUG +#if defined(DEBUG) || defined(INLINE_DATA) - // Optionally install the RandomPolicy. - bool useRandomPolicy = compiler->compRandomInlineStress(); +#if defined(DEBUG) + const bool useRandomPolicyForStress = compiler->compRandomInlineStress(); +#else + const bool useRandomPolicyForStress = false; +#endif // defined(DEBUG) + + const bool useRandomPolicy = (JitConfig.JitInlinePolicyRandom() != 0); - if (useRandomPolicy) + // Optionally install the RandomPolicy. + if (useRandomPolicyForStress || useRandomPolicy) { - unsigned seed = getJitStressLevel(); - assert(seed != 0); - return new (compiler, CMK_Inlining) RandomPolicy(compiler, isPrejitRoot, seed); + return new (compiler, CMK_Inlining) RandomPolicy(compiler, isPrejitRoot); } -#endif // DEBUG - -#if defined(DEBUG) || defined(INLINE_DATA) - // Optionally install the ReplayPolicy. bool useReplayPolicy = JitConfig.JitInlinePolicyReplay() != 0; @@ -106,7 +106,7 @@ InlinePolicy* InlinePolicy::GetPolicy(Compiler* compiler, bool isPrejitRoot) void LegalPolicy::NoteFatal(InlineObservation obs) { // As a safeguard, all fatal impact must be - // reported via noteFatal. + // reported via NoteFatal. assert(InlGetImpact(obs) == InlineImpact::FATAL); NoteInternal(obs); assert(InlDecisionIsFailure(m_Decision)); @@ -243,7 +243,7 @@ void LegacyPolicy::NoteBool(InlineObservation obs, bool value) InlineImpact impact = InlGetImpact(obs); // As a safeguard, all fatal impact must be - // reported via noteFatal. + // reported via NoteFatal. assert(impact != InlineImpact::FATAL); // Handle most information here @@ -383,6 +383,12 @@ void LegacyPolicy::NoteBool(InlineObservation obs, bool value) break; } + case InlineObservation::CALLEE_HAS_PINNED_LOCALS: + // The legacy policy is to never inline methods with + // pinned locals. + SetNever(obs); + break; + default: // Ignore the remainder for now break; @@ -443,16 +449,16 @@ void LegacyPolicy::NoteInt(InlineObservation obs, int value) // Now that we know size and forceinline state, // update candidacy. - if (m_CodeSize <= InlineStrategy::ALWAYS_INLINE_SIZE) - { - // Candidate based on small size - SetCandidate(InlineObservation::CALLEE_BELOW_ALWAYS_INLINE_SIZE); - } - else if (m_IsForceInline) + if (m_IsForceInline) { // Candidate based on force inline SetCandidate(InlineObservation::CALLEE_IS_FORCE_INLINE); } + else if (m_CodeSize <= InlineStrategy::ALWAYS_INLINE_SIZE) + { + // Candidate based on small size + SetCandidate(InlineObservation::CALLEE_BELOW_ALWAYS_INLINE_SIZE); + } else if (m_CodeSize <= m_RootCompiler->m_inlineStrategy->GetMaxInlineILSize()) { // Candidate, pending profitability evaluation @@ -842,11 +848,21 @@ int LegacyPolicy::CodeSizeEstimate() // NoteBool: handle a boolean observation with non-fatal impact // // Arguments: -// obs - the current obsevation +// obs - the current observation // value - the value of the observation void EnhancedLegacyPolicy::NoteBool(InlineObservation obs, bool value) { + +#ifdef DEBUG + // Check the impact + InlineImpact impact = InlGetImpact(obs); + + // As a safeguard, all fatal impact must be + // reported via NoteFatal. + assert(impact != InlineImpact::FATAL); +#endif // DEBUG + switch (obs) { case InlineObservation::CALLEE_DOES_NOT_RETURN: @@ -854,6 +870,36 @@ void EnhancedLegacyPolicy::NoteBool(InlineObservation obs, bool value) m_IsNoReturnKnown = true; break; + case InlineObservation::CALLSITE_RARE_GC_STRUCT: + // If this is a discretionary or always inline candidate + // with a gc struct, we may change our mind about inlining + // if the call site is rare, to avoid costs associated with + // zeroing the GC struct up in the root prolog. + if (m_Observation == InlineObservation::CALLEE_BELOW_ALWAYS_INLINE_SIZE) + { + assert(m_CallsiteFrequency == InlineCallsiteFrequency::UNUSED); + SetFailure(obs); + return; + } + else if (m_Observation == InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE) + { + assert(m_CallsiteFrequency == InlineCallsiteFrequency::RARE); + SetFailure(obs); + return; + } + break; + + case InlineObservation::CALLEE_HAS_PINNED_LOCALS: + if (m_CallsiteIsInTryRegion) + { + // Inlining a method with pinned locals in a try + // region requires wrapping the inline body in a + // try/finally to ensure unpinning. Bail instead. + SetFailure(InlineObservation::CALLSITE_PIN_IN_TRY_REGION); + return; + } + break; + default: // Pass all other information to the legacy policy LegacyPolicy::NoteBool(obs, value); @@ -928,7 +974,7 @@ bool EnhancedLegacyPolicy::PropagateNeverToRuntime() const return propagate; } -#ifdef DEBUG +#if defined(DEBUG) || defined(INLINE_DATA) //------------------------------------------------------------------------ // RandomPolicy: construct a new RandomPolicy @@ -936,89 +982,10 @@ bool EnhancedLegacyPolicy::PropagateNeverToRuntime() const // Arguments: // compiler -- compiler instance doing the inlining (root compiler) // isPrejitRoot -- true if this compiler is prejitting the root method -// seed -- seed value for the random number generator - -RandomPolicy::RandomPolicy(Compiler* compiler, bool isPrejitRoot, unsigned seed) - : LegalPolicy(isPrejitRoot) - , m_RootCompiler(compiler) - , m_Random(nullptr) - , m_CodeSize(0) - , m_IsForceInline(false) - , m_IsForceInlineKnown(false) -{ - // If necessary, setup and seed the random state. - if (compiler->inlRNG == nullptr) - { - compiler->inlRNG = new (compiler, CMK_Inlining) CLRRandom(); - unsigned hash = m_RootCompiler->info.compMethodHash(); - assert(hash != 0); - assert(seed != 0); - int hashSeed = static_cast<int>(hash ^ seed); - compiler->inlRNG->Init(hashSeed); - } - - m_Random = compiler->inlRNG; -} - -//------------------------------------------------------------------------ -// NoteSuccess: handle finishing all the inlining checks successfully - -void RandomPolicy::NoteSuccess() +RandomPolicy::RandomPolicy(Compiler* compiler, bool isPrejitRoot) : DiscretionaryPolicy(compiler, isPrejitRoot) { - assert(InlDecisionIsCandidate(m_Decision)); - m_Decision = InlineDecision::SUCCESS; -} - -//------------------------------------------------------------------------ -// NoteBool: handle a boolean observation with non-fatal impact -// -// Arguments: -// obs - the current obsevation -// value - the value of the observation -void RandomPolicy::NoteBool(InlineObservation obs, bool value) -{ - // Check the impact - InlineImpact impact = InlGetImpact(obs); - - // As a safeguard, all fatal impact must be - // reported via noteFatal. - assert(impact != InlineImpact::FATAL); - - // Handle most information here - bool isInformation = (impact == InlineImpact::INFORMATION); - bool propagate = !isInformation; - - if (isInformation) - { - switch (obs) - { - case InlineObservation::CALLEE_IS_FORCE_INLINE: - // The RandomPolicy still honors force inlines. - // - // We may make the force-inline observation more than - // once. All observations should agree. - assert(!m_IsForceInlineKnown || (m_IsForceInline == value)); - m_IsForceInline = value; - m_IsForceInlineKnown = true; - break; - - case InlineObservation::CALLEE_HAS_SWITCH: - case InlineObservation::CALLEE_UNSUPPORTED_OPCODE: - // Pass these on, they should cause inlining to fail. - propagate = true; - break; - - default: - // Ignore the remainder for now - break; - } - } - - if (propagate) - { - NoteInternal(obs); - } + m_Random = compiler->m_inlineStrategy->GetRandom(); } //------------------------------------------------------------------------ @@ -1032,7 +999,6 @@ void RandomPolicy::NoteInt(InlineObservation obs, int value) { switch (obs) { - case InlineObservation::CALLEE_IL_CODE_SIZE: { assert(m_IsForceInlineKnown); @@ -1054,7 +1020,8 @@ void RandomPolicy::NoteInt(InlineObservation obs, int value) } default: - // Ignore all other information + // Defer to superclass for all other information + DiscretionaryPolicy::NoteInt(obs, value); break; } } @@ -1087,6 +1054,16 @@ void RandomPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) } } + // If we're also dumping inline data, make additional observations + // based on the method info, and estimate code size and perf + // impact, so that the reports have the necessary data. + if (JitConfig.JitInlineDumpData() != 0) + { + MethodInfoObservations(methodInfo); + EstimateCodeSize(); + EstimatePerformanceImpact(); + } + // Use a probability curve that roughly matches the observed // behavior of the LegacyPolicy. That way we're inlining // differently but not creating enormous methods. @@ -1165,7 +1142,7 @@ void RandomPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) } } -#endif // DEBUG +#endif // defined(DEBUG) || defined(INLINE_DATA) #ifdef _MSC_VER // Disable warning about new array member initialization behavior @@ -1181,7 +1158,7 @@ void RandomPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) // clang-format off DiscretionaryPolicy::DiscretionaryPolicy(Compiler* compiler, bool isPrejitRoot) - : LegacyPolicy(compiler, isPrejitRoot) + : EnhancedLegacyPolicy(compiler, isPrejitRoot) , m_Depth(0) , m_BlockCount(0) , m_Maxstack(0) @@ -1227,6 +1204,7 @@ DiscretionaryPolicy::DiscretionaryPolicy(Compiler* compiler, bool isPrejitRoot) , m_IsSameThis(false) , m_CallerHasNewArray(false) , m_CallerHasNewObj(false) + , m_CalleeHasGCStruct(false) { // Empty } @@ -1278,8 +1256,17 @@ void DiscretionaryPolicy::NoteBool(InlineObservation obs, bool value) m_CallerHasNewObj = value; break; + case InlineObservation::CALLEE_HAS_GC_STRUCT: + m_CalleeHasGCStruct = value; + break; + + case InlineObservation::CALLSITE_RARE_GC_STRUCT: + // This is redundant since this policy tracks call site + // hotness for all candidates. So ignore. + break; + default: - LegacyPolicy::NoteBool(obs, value); + EnhancedLegacyPolicy::NoteBool(obs, value); break; } } @@ -1295,7 +1282,6 @@ void DiscretionaryPolicy::NoteInt(InlineObservation obs, int value) { switch (obs) { - case InlineObservation::CALLEE_IL_CODE_SIZE: // Override how code size is handled { @@ -1323,7 +1309,7 @@ void DiscretionaryPolicy::NoteInt(InlineObservation obs, int value) // on similarity of impact on codegen. OPCODE opcode = static_cast<OPCODE>(value); ComputeOpcodeBin(opcode); - LegacyPolicy::NoteInt(obs, value); + EnhancedLegacyPolicy::NoteInt(obs, value); break; } @@ -1344,8 +1330,8 @@ void DiscretionaryPolicy::NoteInt(InlineObservation obs, int value) break; default: - // Delegate remainder to the LegacyPolicy. - LegacyPolicy::NoteInt(obs, value); + // Delegate remainder to the super class. + EnhancedLegacyPolicy::NoteInt(obs, value); break; } } @@ -1660,8 +1646,8 @@ void DiscretionaryPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo // model for actual inlining. EstimatePerformanceImpact(); - // Delegate to LegacyPolicy for the rest - LegacyPolicy::DetermineProfitability(methodInfo); + // Delegate to super class for the rest + EnhancedLegacyPolicy::DetermineProfitability(methodInfo); } //------------------------------------------------------------------------ @@ -1869,7 +1855,7 @@ int DiscretionaryPolicy::CodeSizeEstimate() void DiscretionaryPolicy::DumpSchema(FILE* file) const { - fprintf(file, ",ILSize"); + fprintf(file, "ILSize"); fprintf(file, ",CallsiteFrequency"); fprintf(file, ",InstructionCount"); fprintf(file, ",LoadStoreCount"); @@ -1938,6 +1924,8 @@ void DiscretionaryPolicy::DumpSchema(FILE* file) const fprintf(file, ",IsSameThis"); fprintf(file, ",CallerHasNewArray"); fprintf(file, ",CallerHasNewObj"); + fprintf(file, ",CalleeDoesNotReturn"); + fprintf(file, ",CalleeHasGCStruct"); } //------------------------------------------------------------------------ @@ -1949,7 +1937,7 @@ void DiscretionaryPolicy::DumpSchema(FILE* file) const void DiscretionaryPolicy::DumpData(FILE* file) const { - fprintf(file, ",%u", m_CodeSize); + fprintf(file, "%u", m_CodeSize); fprintf(file, ",%u", m_CallsiteFrequency); fprintf(file, ",%u", m_InstructionCount); fprintf(file, ",%u", m_LoadStoreCount); @@ -2018,6 +2006,8 @@ void DiscretionaryPolicy::DumpData(FILE* file) const fprintf(file, ",%u", m_IsSameThis ? 1 : 0); fprintf(file, ",%u", m_CallerHasNewArray ? 1 : 0); fprintf(file, ",%u", m_CallerHasNewObj ? 1 : 0); + fprintf(file, ",%u", m_IsNoReturn ? 1 : 0); + fprintf(file, ",%u", m_CalleeHasGCStruct ? 1 : 0); } #endif // defined(DEBUG) || defined(INLINE_DATA) @@ -2473,7 +2463,7 @@ bool ReplayPolicy::FindMethod() // See if token matches unsigned token = 0; - int count = sscanf(buffer, " <Token>%u</Token> ", &token); + int count = sscanf_s(buffer, " <Token>%u</Token> ", &token); if ((count != 1) || (token != methodToken)) { continue; @@ -2487,7 +2477,7 @@ bool ReplayPolicy::FindMethod() // See if hash matches unsigned hash = 0; - count = sscanf(buffer, " <Hash>%u</Hash> ", &hash); + count = sscanf_s(buffer, " <Hash>%u</Hash> ", &hash); if ((count != 1) || (hash != methodHash)) { continue; @@ -2646,7 +2636,7 @@ bool ReplayPolicy::FindInline(unsigned token, unsigned hash, unsigned offset) // Match token unsigned inlineToken = 0; - int count = sscanf(buffer, " <Token>%u</Token> ", &inlineToken); + int count = sscanf_s(buffer, " <Token>%u</Token> ", &inlineToken); if ((count != 1) || (inlineToken != token)) { @@ -2661,7 +2651,7 @@ bool ReplayPolicy::FindInline(unsigned token, unsigned hash, unsigned offset) // Match hash unsigned inlineHash = 0; - count = sscanf(buffer, " <Hash>%u</Hash> ", &inlineHash); + count = sscanf_s(buffer, " <Hash>%u</Hash> ", &inlineHash); if ((count != 1) || (inlineHash != hash)) { @@ -2676,7 +2666,7 @@ bool ReplayPolicy::FindInline(unsigned token, unsigned hash, unsigned offset) // Match offset unsigned inlineOffset = 0; - count = sscanf(buffer, " <Offset>%u</Offset> ", &inlineOffset); + count = sscanf_s(buffer, " <Offset>%u</Offset> ", &inlineOffset); if ((count != 1) || (inlineOffset != offset)) { continue; @@ -2695,7 +2685,7 @@ bool ReplayPolicy::FindInline(unsigned token, unsigned hash, unsigned offset) if (fgets(buffer, sizeof(buffer), s_ReplayFile) != nullptr) { unsigned collectData = 0; - count = sscanf(buffer, " <CollectData>%u</CollectData> ", &collectData); + count = sscanf_s(buffer, " <CollectData>%u</CollectData> ", &collectData); if (count == 1) { diff --git a/src/jit/inlinepolicy.h b/src/jit/inlinepolicy.h index 62031c86a0..3239dcbe89 100644 --- a/src/jit/inlinepolicy.h +++ b/src/jit/inlinepolicy.h @@ -98,6 +98,7 @@ public: , m_HasSimd(false) , m_LooksLikeWrapperMethod(false) , m_MethodIsMostlyLoadStore(false) + , m_CallsiteIsInTryRegion(false) { // empty } @@ -165,6 +166,7 @@ protected: bool m_HasSimd : 1; bool m_LooksLikeWrapperMethod : 1; bool m_MethodIsMostlyLoadStore : 1; + bool m_CallsiteIsInTryRegion : 1; }; // EnhancedLegacyPolicy extends the legacy policy by rejecting @@ -196,65 +198,15 @@ protected: bool m_IsNoReturnKnown : 1; }; -#ifdef DEBUG - -// RandomPolicy implements a policy that inlines at random. -// It is mostly useful for stress testing. - -class RandomPolicy : public LegalPolicy -{ -public: - // Construct a RandomPolicy - RandomPolicy(Compiler* compiler, bool isPrejitRoot, unsigned seed); - - // Policy observations - void NoteSuccess() override; - void NoteBool(InlineObservation obs, bool value) override; - void NoteInt(InlineObservation obs, int value) override; - - // Policy determinations - void DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) override; - - // Policy policies - bool PropagateNeverToRuntime() const override - { - return true; - } - bool IsLegacyPolicy() const override - { - return false; - } - - // Policy estimates - int CodeSizeEstimate() override - { - return 0; - } - - const char* GetName() const override - { - return "RandomPolicy"; - } - -private: - // Data members - Compiler* m_RootCompiler; - CLRRandom* m_Random; - unsigned m_CodeSize; - bool m_IsForceInline : 1; - bool m_IsForceInlineKnown : 1; -}; - -#endif // DEBUG - -// DiscretionaryPolicy is a variant of the legacy policy. It differs -// in that there is no ALWAYS_INLINE class, there is no IL size limit, -// it does not try and maintain legacy compatabilty, and in prejit mode, -// discretionary failures do not set the "NEVER" inline bit. +// DiscretionaryPolicy is a variant of the enhanced legacy policy. It +// differs in that there is no ALWAYS_INLINE class, there is no IL +// size limit, it does not try and maintain legacy compatabilty, and +// in prejit mode, discretionary failures do not set the "NEVER" +// inline bit. // // It is useful for gathering data about inline costs. -class DiscretionaryPolicy : public LegacyPolicy +class DiscretionaryPolicy : public EnhancedLegacyPolicy { public: // Construct a DiscretionaryPolicy @@ -266,10 +218,6 @@ public: // Policy policies bool PropagateNeverToRuntime() const override; - bool IsLegacyPolicy() const override - { - return false; - } // Policy determinations void DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) override; @@ -346,6 +294,7 @@ protected: bool m_IsSameThis; bool m_CallerHasNewArray; bool m_CallerHasNewObj; + bool m_CalleeHasGCStruct; }; // ModelPolicy is an experimental policy that uses the results @@ -382,6 +331,35 @@ public: #if defined(DEBUG) || defined(INLINE_DATA) +// RandomPolicy implements a policy that inlines at random. +// It is mostly useful for stress testing. + +class RandomPolicy : public DiscretionaryPolicy +{ +public: + // Construct a RandomPolicy + RandomPolicy(Compiler* compiler, bool isPrejitRoot); + + // Policy observations + void NoteInt(InlineObservation obs, int value) override; + + // Policy determinations + void DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) override; + + const char* GetName() const override + { + return "RandomPolicy"; + } + +private: + // Data members + CLRRandom* m_Random; +}; + +#endif // defined(DEBUG) || defined(INLINE_DATA) + +#if defined(DEBUG) || defined(INLINE_DATA) + // FullPolicy is an experimental policy that will always inline if // possible, subject to externally settable depth and size limits. // diff --git a/src/jit/instr.cpp b/src/jit/instr.cpp index d516e0dea4..edc4483c6b 100644 --- a/src/jit/instr.cpp +++ b/src/jit/instr.cpp @@ -149,8 +149,6 @@ const char* CodeGen::genSizeStr(emitAttr attr) nullptr, "xmmword ptr ", nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, "ymmword ptr" }; @@ -3054,7 +3052,7 @@ bool CodeGenInterface::validImmForBL(ssize_t addr) return // If we are running the altjit for NGEN, then assume we can use the "BL" instruction. // This matches the usual behavior for NGEN, since we normally do generate "BL". - (!compiler->info.compMatchedVM && (compiler->opts.eeFlags & CORJIT_FLG_PREJIT)) || + (!compiler->info.compMatchedVM && compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) || (compiler->eeGetRelocTypeHint((void*)addr) == IMAGE_REL_BASED_THUMB_BRANCH24); } bool CodeGen::arm_Valid_Imm_For_BL(ssize_t addr) @@ -3240,7 +3238,7 @@ instruction CodeGen::ins_Move_Extend(var_types srcType, bool srcInReg) * * Parameters * srcType - source type - * aligned - whether source is 16-byte aligned if srcType is a SIMD type + * aligned - whether source is properly aligned if srcType is a SIMD type */ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false*/) { @@ -3258,8 +3256,7 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false* #endif // FEATURE_SIMD if (compiler->canUseAVX()) { - // TODO-CQ: consider alignment of AVX vectors. - return INS_movupd; + return (aligned) ? INS_movapd : INS_movupd; } else { @@ -3404,7 +3401,7 @@ instruction CodeGen::ins_Copy(var_types dstType) * * Parameters * dstType - destination type - * aligned - whether destination is 16-byte aligned if dstType is a SIMD type + * aligned - whether destination is properly aligned if dstType is a SIMD type */ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false*/) { @@ -3422,8 +3419,7 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false #endif // FEATURE_SIMD if (compiler->canUseAVX()) { - // TODO-CQ: consider alignment of AVX vectors. - return INS_movupd; + return (aligned) ? INS_movapd : INS_movupd; } else { diff --git a/src/jit/instr.h b/src/jit/instr.h index c38f8d2073..2d50234fdc 100644 --- a/src/jit/instr.h +++ b/src/jit/instr.h @@ -284,15 +284,19 @@ END_DECLARE_TYPED_ENUM(emitAttr,unsigned) #define EmitSize(x) (EA_ATTR(genTypeSize(TypeGet(x)))) // Enum specifying the instruction set for generating floating point or SIMD code. +// These enums are ordered such that each one is inclusive of previous instruction sets +// and the VM ensures this as well when setting the CONFIG flags. enum InstructionSet { #ifdef _TARGET_XARCH_ - InstructionSet_SSE2, - InstructionSet_AVX, + InstructionSet_SSE2, // SSE2 Instruction set + InstructionSet_SSE3_4, // SSE3, SSSE3, SSE4.1 and SSE4.2 instruction set + InstructionSet_AVX, // AVX2 instruction set + // TODO-Cleaup - This should be named as InstructionSet_AVX2 #elif defined(_TARGET_ARM_) InstructionSet_NEON, #endif - InstructionSet_NONE + InstructionSet_NONE // No instruction set is available indicating an invalid value }; // clang-format on diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h index 436563babf..4317334bf2 100644 --- a/src/jit/instrsxarch.h +++ b/src/jit/instrsxarch.h @@ -178,6 +178,7 @@ INST3(FIRST_SSE2_INSTRUCTION, "FIRST_SSE2_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CO // These are the SSE instructions used on x86 INST3( mov_i2xmm, "movd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x6E)) // Move int reg to a xmm reg. reg1=xmm reg, reg2=int reg INST3( mov_xmm2i, "movd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x7E)) // Move xmm reg to an int reg. reg1=xmm reg, reg2=int reg +INST3( pmovmskb, "pmovmskb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xD7)) // Move the MSB bits of all bytes in a xmm reg to an int reg INST3( movq, "movq" , 0, IUM_WR, 0, 0, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E)) INST3( movsdsse2, "movsd" , 0, IUM_WR, 0, 0, SSEDBL(0x11), BAD_CODE, SSEDBL(0x10)) @@ -317,6 +318,8 @@ INST3( insertps, "insertps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SS INST3( pcmpeqq, "pcmpeqq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x29)) // Packed compare 64-bit integers for equality INST3( pcmpgtq, "pcmpgtq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x37)) // Packed compare 64-bit integers for equality INST3( pmulld, "pmulld" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x40)) // Packed multiply 32 bit unsigned integers and store lower 32 bits of each result +INST3( ptest, "ptest" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x17)) // Packed logical compare +INST3( phaddd, "phaddd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x02)) // Packed horizontal add INST3(LAST_SSE4_INSTRUCTION, "LAST_SSE4_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE) INST3(FIRST_AVX_INSTRUCTION, "FIRST_AVX_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE) @@ -367,25 +370,25 @@ INST2(sar_N , "sar" , 0, IUM_RW, 0, 1, 0x0038C0, 0x0038C0) INST1(r_movsb, "rep movsb" , 0, IUM_RD, 0, 0, 0x00A4F3) INST1(r_movsd, "rep movsd" , 0, IUM_RD, 0, 0, 0x00A5F3) -#ifndef LEGACY_BACKEND +#if !defined(LEGACY_BACKEND) && defined(_TARGET_AMD64_) INST1(r_movsq, "rep movsq" , 0, IUM_RD, 0, 0, 0xF3A548) -#endif // !LEGACY_BACKEND +#endif // !LEGACY_BACKEND || !defined(_TARGET_AMD64_) INST1(movsb , "movsb" , 0, IUM_RD, 0, 0, 0x0000A4) INST1(movsd , "movsd" , 0, IUM_RD, 0, 0, 0x0000A5) -#ifndef LEGACY_BACKEND +#if !defined(LEGACY_BACKEND) && defined(_TARGET_AMD64_) INST1(movsq, "movsq" , 0, IUM_RD, 0, 0, 0x00A548) -#endif // !LEGACY_BACKEND +#endif // !LEGACY_BACKEND || !defined(_TARGET_AMD64_) INST1(r_stosb, "rep stosb" , 0, IUM_RD, 0, 0, 0x00AAF3) INST1(r_stosd, "rep stosd" , 0, IUM_RD, 0, 0, 0x00ABF3) -#ifndef LEGACY_BACKEND +#if !defined(LEGACY_BACKEND) && defined(_TARGET_AMD64_) INST1(r_stosq, "rep stosq" , 0, IUM_RD, 0, 0, 0xF3AB48) -#endif // !LEGACY_BACKEND +#endif // !LEGACY_BACKEND || !defined(_TARGET_AMD64_) INST1(stosb, "stosb" , 0, IUM_RD, 0, 0, 0x0000AA) INST1(stosd, "stosd" , 0, IUM_RD, 0, 0, 0x0000AB) -#ifndef LEGACY_BACKEND +#if !defined(LEGACY_BACKEND) && defined(_TARGET_AMD64_) INST1(stosq, "stosq" , 0, IUM_RD, 0, 0, 0x00AB48) -#endif // !LEGACY_BACKEND +#endif // !LEGACY_BACKEND || !defined(_TARGET_AMD64_) INST1(int3 , "int3" , 0, IUM_RD, 0, 0, 0x0000CC) INST1(nop , "nop" , 0, IUM_RD, 0, 0, 0x000090) diff --git a/src/jit/jit.h b/src/jit/jit.h index 7bf5cd4051..220294f825 100644 --- a/src/jit/jit.h +++ b/src/jit/jit.h @@ -28,6 +28,7 @@ #ifdef _MSC_VER // These don't seem useful, so turning them off is no big deal +#pragma warning(disable : 4065) // "switch statement contains 'default' but no 'case' labels" (happens due to #ifdefs) #pragma warning(disable : 4510) // can't generate default constructor #pragma warning(disable : 4511) // can't generate copy constructor #pragma warning(disable : 4512) // can't generate assignment constructor @@ -209,6 +210,7 @@ #include "corhdr.h" #include "corjit.h" +#include "jitee.h" #define __OPERATOR_NEW_INLINE 1 // indicate that I will define these #define __PLACEMENT_NEW_INLINE // don't bring in the global placement new, it is easy to make a mistake @@ -259,6 +261,15 @@ struct CLRConfig #define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(x) #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || (defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)) +#define FEATURE_PUT_STRUCT_ARG_STK 1 +#define PUT_STRUCT_ARG_STK_ONLY_ARG(x) , x +#define PUT_STRUCT_ARG_STK_ONLY(x) x +#else // !(defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)|| (defined(_TARGET_X86_) && !defined(LEGACY_BACKEND))) +#define PUT_STRUCT_ARG_STK_ONLY_ARG(x) +#define PUT_STRUCT_ARG_STK_ONLY(x) +#endif // !(defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)|| (defined(_TARGET_X86_) && !defined(LEGACY_BACKEND))) + #if defined(UNIX_AMD64_ABI) #define UNIX_AMD64_ABI_ONLY_ARG(x) , x #define UNIX_AMD64_ABI_ONLY(x) x @@ -377,17 +388,6 @@ typedef ptrdiff_t ssize_t; /*****************************************************************************/ -// Debugging support is ON by default. Can be turned OFF by -// adding /DDEBUGGING_SUPPORT=0 on the command line. - -#ifndef DEBUGGING_SUPPORT -#define DEBUGGING_SUPPORT -#elif !DEBUGGING_SUPPORT -#undef DEBUGGING_SUPPORT -#endif - -/*****************************************************************************/ - // Late disassembly is OFF by default. Can be turned ON by // adding /DLATE_DISASM=1 on the command line. // Always OFF in the non-debug version @@ -465,6 +465,8 @@ typedef ptrdiff_t ssize_t; #define MEASURE_NODE_SIZE 0 // Collect stats about GenTree node allocations. #define MEASURE_PTRTAB_SIZE 0 // Collect stats about GC pointer table allocations. #define EMITTER_STATS 0 // Collect stats on the emitter. +#define NODEBASH_STATS 0 // Collect stats on changed gtOper values in GenTree's. +#define COUNT_AST_OPERS 0 // Display use counts for GenTree operators. #define VERBOSE_SIZES 0 // Always display GC info sizes. If set, DISPLAY_SIZES must also be set. #define VERBOSE_VERIFY 0 // Dump additional information when verifying code. Useful to debug verification bugs. @@ -472,9 +474,30 @@ typedef ptrdiff_t ssize_t; #ifdef DEBUG #define MEASURE_MEM_ALLOC 1 // Collect memory allocation stats. #define LOOP_HOIST_STATS 1 // Collect loop hoisting stats. +#define TRACK_LSRA_STATS 1 // Collect LSRA stats #else #define MEASURE_MEM_ALLOC 0 // You can set this to 1 to get memory stats in retail, as well #define LOOP_HOIST_STATS 0 // You can set this to 1 to get loop hoist stats in retail, as well +#define TRACK_LSRA_STATS 0 // You can set this to 1 to get LSRA stats in retail, as well +#endif + +// Timing calls to clr.dll is only available under certain conditions. +#ifndef FEATURE_JIT_METHOD_PERF +#define MEASURE_CLRAPI_CALLS 0 // Can't time these calls without METHOD_PERF. +#endif +#ifdef DEBUG +#define MEASURE_CLRAPI_CALLS 0 // No point in measuring DEBUG code. +#endif +#if !defined(_HOST_X86_) && !defined(_HOST_AMD64_) +#define MEASURE_CLRAPI_CALLS 0 // Cycle counters only hooked up on x86/x64. +#endif +#if !defined(_MSC_VER) && !defined(__clang__) +#define MEASURE_CLRAPI_CALLS 0 // Only know how to do this with VC and Clang. +#endif + +// If none of the above set the flag to 0, it's available. +#ifndef MEASURE_CLRAPI_CALLS +#define MEASURE_CLRAPI_CALLS 0 // Set to 1 to measure time in ICorJitInfo calls. #endif /*****************************************************************************/ @@ -686,7 +709,7 @@ inline size_t unsigned_abs(ssize_t x) /*****************************************************************************/ -#if CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_NODE_SIZE +#if CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_NODE_SIZE || MEASURE_MEM_ALLOC class Histogram { @@ -807,7 +830,7 @@ extern int jitNativeCode(CORINFO_METHOD_HANDLE methodHnd, CORINFO_METHOD_INFO* methodInfo, void** methodCodePtr, ULONG* methodCodeSize, - CORJIT_FLAGS* compileFlags, + JitFlags* compileFlags, void* inlineInfoPtr); #ifdef _HOST_64BIT_ diff --git a/src/jit/jit.settings.targets b/src/jit/jit.settings.targets index 9dbc225843..6c0474a00c 100644 --- a/src/jit/jit.settings.targets +++ b/src/jit/jit.settings.targets @@ -86,10 +86,11 @@ <CppCompile Include="..\jitconfig.cpp" /> <CppCompile Include="..\hostallocator.cpp" /> <CppCompile Include="..\objectalloc.cpp" /> - <CppCompile Inlcude="..\sideeffects.cpp" /> + <CppCompile Include="..\sideeffects.cpp" /> <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='True'" Include="..\CodeGenLegacy.cpp" /> <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\Lower.cpp" /> <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\LSRA.cpp" /> + <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\codegenlinear.cpp" /> </ItemGroup> <ItemGroup Condition="'$(TargetArch)'=='i386'"> <CppCompile Include="..\emitXArch.cpp" /> diff --git a/src/jit/jitconfig.h b/src/jit/jitconfig.h index d5b4e30796..9186e12982 100644 --- a/src/jit/jitconfig.h +++ b/src/jit/jitconfig.h @@ -5,6 +5,8 @@ #ifndef _JITCONFIG_H_ #define _JITCONFIG_H_ +#include "switches.h" + struct CORINFO_SIG_INFO; class ICorJitHost; diff --git a/src/jit/jitconfigvalues.h b/src/jit/jitconfigvalues.h index 6579817249..39a2505246 100644 --- a/src/jit/jitconfigvalues.h +++ b/src/jit/jitconfigvalues.h @@ -17,10 +17,10 @@ CONFIG_INTEGER(DebugBreakOnVerificationFailure, W("DebugBreakOnVerificationFailu // verification failure CONFIG_INTEGER(DiffableDasm, W("JitDiffableDasm"), 0) // Make the disassembly diff-able CONFIG_INTEGER(DisplayLoopHoistStats, W("JitLoopHoistStats"), 0) // Display JIT loop hoisting statistics -CONFIG_INTEGER(DisplayMemStats, W("JitMemStats"), 0) // Display JIT memory usage statistics -CONFIG_INTEGER(DumpJittedMethods, W("DumpJittedMethods"), 0) // Prints all jitted methods to the console -CONFIG_INTEGER(EnablePCRelAddr, W("JitEnablePCRelAddr"), 1) // Whether absolute addr be encoded as PC-rel offset by - // RyuJIT where possible +CONFIG_INTEGER(DisplayLsraStats, W("JitLsraStats"), 0) // Display JIT Linear Scan Register Allocator statistics +CONFIG_INTEGER(DumpJittedMethods, W("DumpJittedMethods"), 0) // Prints all jitted methods to the console +CONFIG_INTEGER(EnablePCRelAddr, W("JitEnablePCRelAddr"), 1) // Whether absolute addr be encoded as PC-rel offset by + // RyuJIT where possible CONFIG_INTEGER(InterpreterFallback, W("InterpreterFallback"), 0) // Fallback to the interpreter when the JIT compiler // fails CONFIG_INTEGER(JitAssertOnMaxRAPasses, W("JitAssertOnMaxRAPasses"), 0) @@ -154,10 +154,12 @@ CONFIG_METHODSET(JitNoProcedureSplittingEH, W("JitNoProcedureSplittingEH")) // D // exception handling CONFIG_METHODSET(JitStressOnly, W("JitStressOnly")) // Internal Jit stress mode: stress only the specified method(s) CONFIG_METHODSET(JitUnwindDump, W("JitUnwindDump")) // Dump the unwind codes for the method -CONFIG_METHODSET(NgenDisasm, W("NgenDisasm")) // Same as JitDisasm, but for ngen -CONFIG_METHODSET(NgenDump, W("NgenDump")) // Same as JitDump, but for ngen -CONFIG_METHODSET(NgenDumpIR, W("NgenDumpIR")) // Same as JitDumpIR, but for ngen -CONFIG_METHODSET(NgenEHDump, W("NgenEHDump")) // Dump the EH table for the method, as reported to the VM +CONFIG_METHODSET(JitOptRepeat, W("JitOptRepeat")) // Runs optimizer multiple times on the method +CONFIG_INTEGER(JitOptRepeatCount, W("JitOptRepeatCount"), 2) // Number of times to repeat opts when repeating +CONFIG_METHODSET(NgenDisasm, W("NgenDisasm")) // Same as JitDisasm, but for ngen +CONFIG_METHODSET(NgenDump, W("NgenDump")) // Same as JitDump, but for ngen +CONFIG_METHODSET(NgenDumpIR, W("NgenDumpIR")) // Same as JitDumpIR, but for ngen +CONFIG_METHODSET(NgenEHDump, W("NgenEHDump")) // Dump the EH table for the method, as reported to the VM CONFIG_METHODSET(NgenGCDump, W("NgenGCDump")) CONFIG_METHODSET(NgenUnwindDump, W("NgenUnwindDump")) // Dump the unwind codes for the method CONFIG_STRING(JitDumpFg, W("JitDumpFg")) // Dumps Xml/Dot Flowgraph for specified method @@ -186,6 +188,10 @@ CONFIG_STRING(NgenDumpIRFormat, W("NgenDumpIRFormat")) // Same as JitD CONFIG_STRING(NgenDumpIRPhase, W("NgenDumpIRPhase")) // Same as JitDumpIRPhase, but for ngen #endif // defined(DEBUG) +#ifdef FEATURE_ENABLE_NO_RANGE_CHECKS +CONFIG_INTEGER(JitNoRangeChks, W("JitNoRngChks"), 0) // If 1, don't generate range checks +#endif + // AltJitAssertOnNYI should be 0 on targets where JIT is under developement or bring up stage, so as to facilitate // fallback to main JIT on hitting a NYI. #if defined(_TARGET_ARM64_) || defined(_TARGET_X86_) @@ -194,11 +200,17 @@ CONFIG_INTEGER(AltJitAssertOnNYI, W("AltJitAssertOnNYI"), 0) // Controls the Alt CONFIG_INTEGER(AltJitAssertOnNYI, W("AltJitAssertOnNYI"), 1) // Controls the AltJit behavior of NYI stuff #endif // defined(_TARGET_ARM64_) || defined(_TARGET_X86_) +#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) +CONFIG_INTEGER(EnableSSE3_4, W("EnableSSE3_4"), 1) // Enable SSE3, SSSE3, SSE 4.1 and 4.2 instruction set as default +#endif + #if defined(_TARGET_AMD64_) -CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 1) // Enable AVX instruction set for wide operations as default -#else // !defined(_TARGET_AMD64_) +CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 1) // Enable AVX instruction set for wide operations as default. +// When both AVX and SSE3_4 are set, we will use the most capable instruction set available +// which will prefer AVX over SSE3/4. +#else // !defined(_TARGET_AMD64_) CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 0) // Enable AVX instruction set for wide operations as default -#endif // defined(_TARGET_AMD64_) +#endif // defined(_TARGET_AMD64_) #if !defined(DEBUG) && !defined(_DEBUG) CONFIG_INTEGER(JitEnableNoWayAssert, W("JitEnableNoWayAssert"), 0) @@ -206,9 +218,17 @@ CONFIG_INTEGER(JitEnableNoWayAssert, W("JitEnableNoWayAssert"), 0) CONFIG_INTEGER(JitEnableNoWayAssert, W("JitEnableNoWayAssert"), 1) #endif // !defined(DEBUG) && !defined(_DEBUG) +// The following should be wrapped inside "#if MEASURE_MEM_ALLOC / #endif", but +// some files include this one without bringing in the definitions from "jit.h" +// so we don't always know what the "true" value of that flag should be. For now +// we take the easy way out and always include the flag, even in release builds +// (normally MEASURE_MEM_ALLOC is off for release builds but if it's toggled on +// for release in "jit.h" the flag would be missing for some includers). +// TODO-Cleanup: need to make 'MEASURE_MEM_ALLOC' well-defined here at all times. +CONFIG_INTEGER(DisplayMemStats, W("JitMemStats"), 0) // Display JIT memory usage statistics + CONFIG_INTEGER(JitAggressiveInlining, W("JitAggressiveInlining"), 0) // Aggressive inlining of all methods -CONFIG_INTEGER(JitELTHookEnabled, W("JitELTHookEnabled"), 0) // On ARM, setting this will emit Enter/Leave/TailCall - // callbacks +CONFIG_INTEGER(JitELTHookEnabled, W("JitELTHookEnabled"), 0) // If 1, emit Enter/Leave/TailCall callbacks CONFIG_INTEGER(JitInlineSIMDMultiplier, W("JitInlineSIMDMultiplier"), 3) #if defined(FEATURE_ENABLE_NO_RANGE_CHECKS) @@ -242,6 +262,8 @@ CONFIG_INTEGER(JitInlineLimit, W("JitInlineLimit"), -1) CONFIG_INTEGER(JitInlinePolicyDiscretionary, W("JitInlinePolicyDiscretionary"), 0) CONFIG_INTEGER(JitInlinePolicyFull, W("JitInlinePolicyFull"), 0) CONFIG_INTEGER(JitInlinePolicySize, W("JitInlinePolicySize"), 0) +CONFIG_INTEGER(JitInlinePolicyRandom, W("JitInlinePolicyRandom"), 0) // nozero enables; value is the external random + // seed CONFIG_INTEGER(JitInlinePolicyReplay, W("JitInlinePolicyReplay"), 0) CONFIG_STRING(JitNoInlineRange, W("JitNoInlineRange")) CONFIG_STRING(JitInlineReplayFile, W("JitInlineReplayFile")) @@ -250,6 +272,8 @@ CONFIG_STRING(JitInlineReplayFile, W("JitInlineReplayFile")) CONFIG_INTEGER(JitInlinePolicyLegacy, W("JitInlinePolicyLegacy"), 0) CONFIG_INTEGER(JitInlinePolicyModel, W("JitInlinePolicyModel"), 0) +CONFIG_INTEGER(JitEECallTimingInfo, W("JitEECallTimingInfo"), 0) + #undef CONFIG_INTEGER #undef CONFIG_STRING #undef CONFIG_METHODSET diff --git a/src/jit/jitee.h b/src/jit/jitee.h new file mode 100644 index 0000000000..f9bd83f5bb --- /dev/null +++ b/src/jit/jitee.h @@ -0,0 +1,264 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +// This class wraps the CORJIT_FLAGS type in the JIT-EE interface (in corjit.h) such that the JIT can +// build with either the old flags (COR_JIT_EE_VERSION <= 460) or the new flags (COR_JIT_EE_VERSION > 460). +// It actually is exactly the same as the new definition, and must be kept up-to-date with the new definition. +// When built against an old JIT-EE interface, the old flags are converted into this structure. +class JitFlags +{ +public: + // clang-format off + enum JitFlag + { + JIT_FLAG_SPEED_OPT = 0, + JIT_FLAG_SIZE_OPT = 1, + JIT_FLAG_DEBUG_CODE = 2, // generate "debuggable" code (no code-mangling optimizations) + JIT_FLAG_DEBUG_EnC = 3, // We are in Edit-n-Continue mode + JIT_FLAG_DEBUG_INFO = 4, // generate line and local-var info + JIT_FLAG_MIN_OPT = 5, // disable all jit optimizations (not necesarily debuggable code) + JIT_FLAG_GCPOLL_CALLS = 6, // Emit calls to JIT_POLLGC for thread suspension. + JIT_FLAG_MCJIT_BACKGROUND = 7, // Calling from multicore JIT background thread, do not call JitComplete + + #if defined(_TARGET_X86_) + + JIT_FLAG_PINVOKE_RESTORE_ESP = 8, // Restore ESP after returning from inlined PInvoke + JIT_FLAG_TARGET_P4 = 9, + JIT_FLAG_USE_FCOMI = 10, // Generated code may use fcomi(p) instruction + JIT_FLAG_USE_CMOV = 11, // Generated code may use cmov instruction + JIT_FLAG_USE_SSE2 = 12, // Generated code may use SSE-2 instructions + + #else // !defined(_TARGET_X86_) + + JIT_FLAG_UNUSED1 = 8, + JIT_FLAG_UNUSED2 = 9, + JIT_FLAG_UNUSED3 = 10, + JIT_FLAG_UNUSED4 = 11, + JIT_FLAG_UNUSED5 = 12, + + #endif // !defined(_TARGET_X86_) + + #if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) + + JIT_FLAG_USE_SSE3_4 = 13, + JIT_FLAG_USE_AVX = 14, + JIT_FLAG_USE_AVX2 = 15, + JIT_FLAG_USE_AVX_512 = 16, + JIT_FLAG_FEATURE_SIMD = 17, + + #else // !defined(_TARGET_X86_) && !defined(_TARGET_AMD64_) + + JIT_FLAG_UNUSED6 = 13, + JIT_FLAG_UNUSED7 = 14, + JIT_FLAG_UNUSED8 = 15, + JIT_FLAG_UNUSED9 = 16, + JIT_FLAG_UNUSED10 = 17, + + #endif // !defined(_TARGET_X86_) && !defined(_TARGET_AMD64_) + + JIT_FLAG_MAKEFINALCODE = 18, // Use the final code generator, i.e., not the interpreter. + JIT_FLAG_READYTORUN = 19, // Use version-resilient code generation + JIT_FLAG_PROF_ENTERLEAVE = 20, // Instrument prologues/epilogues + JIT_FLAG_PROF_REJIT_NOPS = 21, // Insert NOPs to ensure code is re-jitable + JIT_FLAG_PROF_NO_PINVOKE_INLINE = 22, // Disables PInvoke inlining + JIT_FLAG_SKIP_VERIFICATION = 23, // (lazy) skip verification - determined without doing a full resolve. See comment below + JIT_FLAG_PREJIT = 24, // jit or prejit is the execution engine. + JIT_FLAG_RELOC = 25, // Generate relocatable code + JIT_FLAG_IMPORT_ONLY = 26, // Only import the function + JIT_FLAG_IL_STUB = 27, // method is an IL stub + JIT_FLAG_PROCSPLIT = 28, // JIT should separate code into hot and cold sections + JIT_FLAG_BBINSTR = 29, // Collect basic block profile information + JIT_FLAG_BBOPT = 30, // Optimize method based on profile information + JIT_FLAG_FRAMED = 31, // All methods have an EBP frame + JIT_FLAG_ALIGN_LOOPS = 32, // add NOPs before loops to align them at 16 byte boundaries + JIT_FLAG_PUBLISH_SECRET_PARAM = 33, // JIT must place stub secret param into local 0. (used by IL stubs) + JIT_FLAG_GCPOLL_INLINE = 34, // JIT must inline calls to GCPoll when possible + JIT_FLAG_SAMPLING_JIT_BACKGROUND = 35, // JIT is being invoked as a result of stack sampling for hot methods in the background + JIT_FLAG_USE_PINVOKE_HELPERS = 36, // The JIT should use the PINVOKE_{BEGIN,END} helpers instead of emitting inline transitions + JIT_FLAG_REVERSE_PINVOKE = 37, // The JIT should insert REVERSE_PINVOKE_{ENTER,EXIT} helpers into method prolog/epilog + JIT_FLAG_DESKTOP_QUIRKS = 38, // The JIT should generate desktop-quirk-compatible code + }; + // clang-format on + + JitFlags() : m_jitFlags(0) + { + // empty + } + + // Convenience constructor to set exactly one flags. + JitFlags(JitFlag flag) : m_jitFlags(0) + { + Set(flag); + } + + void Reset() + { + m_jitFlags = 0; + } + + void Set(JitFlag flag) + { + m_jitFlags |= 1ULL << (unsigned __int64)flag; + } + + void Clear(JitFlag flag) + { + m_jitFlags &= ~(1ULL << (unsigned __int64)flag); + } + + bool IsSet(JitFlag flag) const + { + return (m_jitFlags & (1ULL << (unsigned __int64)flag)) != 0; + } + + void Add(const JitFlags& other) + { + m_jitFlags |= other.m_jitFlags; + } + + void Remove(const JitFlags& other) + { + m_jitFlags &= ~other.m_jitFlags; + } + + bool IsEmpty() const + { + return m_jitFlags == 0; + } + +#if COR_JIT_EE_VERSION <= 460 + + void SetFromOldFlags(unsigned corJitFlags, unsigned corJitFlags2) + { + Reset(); + +#define CONVERT_OLD_FLAG(oldf, newf) \ + if ((corJitFlags & (oldf)) != 0) \ + this->Set(JitFlags::newf); +#define CONVERT_OLD_FLAG2(oldf, newf) \ + if ((corJitFlags2 & (oldf)) != 0) \ + this->Set(JitFlags::newf); + + CONVERT_OLD_FLAG(CORJIT_FLG_SPEED_OPT, JIT_FLAG_SPEED_OPT) + CONVERT_OLD_FLAG(CORJIT_FLG_SIZE_OPT, JIT_FLAG_SIZE_OPT) + CONVERT_OLD_FLAG(CORJIT_FLG_DEBUG_CODE, JIT_FLAG_DEBUG_CODE) + CONVERT_OLD_FLAG(CORJIT_FLG_DEBUG_EnC, JIT_FLAG_DEBUG_EnC) + CONVERT_OLD_FLAG(CORJIT_FLG_DEBUG_INFO, JIT_FLAG_DEBUG_INFO) + CONVERT_OLD_FLAG(CORJIT_FLG_MIN_OPT, JIT_FLAG_MIN_OPT) + CONVERT_OLD_FLAG(CORJIT_FLG_GCPOLL_CALLS, JIT_FLAG_GCPOLL_CALLS) + CONVERT_OLD_FLAG(CORJIT_FLG_MCJIT_BACKGROUND, JIT_FLAG_MCJIT_BACKGROUND) + +#if defined(_TARGET_X86_) + + CONVERT_OLD_FLAG(CORJIT_FLG_PINVOKE_RESTORE_ESP, JIT_FLAG_PINVOKE_RESTORE_ESP) + CONVERT_OLD_FLAG(CORJIT_FLG_TARGET_P4, JIT_FLAG_TARGET_P4) + CONVERT_OLD_FLAG(CORJIT_FLG_USE_FCOMI, JIT_FLAG_USE_FCOMI) + CONVERT_OLD_FLAG(CORJIT_FLG_USE_CMOV, JIT_FLAG_USE_CMOV) + CONVERT_OLD_FLAG(CORJIT_FLG_USE_SSE2, JIT_FLAG_USE_SSE2) + +#elif defined(_TARGET_AMD64_) + + CONVERT_OLD_FLAG(CORJIT_FLG_USE_SSE3_4, JIT_FLAG_USE_SSE3_4) + CONVERT_OLD_FLAG(CORJIT_FLG_USE_AVX, JIT_FLAG_USE_AVX) + CONVERT_OLD_FLAG(CORJIT_FLG_USE_AVX2, JIT_FLAG_USE_AVX2) + CONVERT_OLD_FLAG(CORJIT_FLG_USE_AVX_512, JIT_FLAG_USE_AVX_512) + CONVERT_OLD_FLAG(CORJIT_FLG_FEATURE_SIMD, JIT_FLAG_FEATURE_SIMD) + +#endif // !defined(_TARGET_X86_) && !defined(_TARGET_AMD64_) + + CONVERT_OLD_FLAG(CORJIT_FLG_MAKEFINALCODE, JIT_FLAG_MAKEFINALCODE) + CONVERT_OLD_FLAG(CORJIT_FLG_READYTORUN, JIT_FLAG_READYTORUN) + CONVERT_OLD_FLAG(CORJIT_FLG_PROF_ENTERLEAVE, JIT_FLAG_PROF_ENTERLEAVE) + CONVERT_OLD_FLAG(CORJIT_FLG_PROF_REJIT_NOPS, JIT_FLAG_PROF_REJIT_NOPS) + CONVERT_OLD_FLAG(CORJIT_FLG_PROF_NO_PINVOKE_INLINE, JIT_FLAG_PROF_NO_PINVOKE_INLINE) + CONVERT_OLD_FLAG(CORJIT_FLG_SKIP_VERIFICATION, JIT_FLAG_SKIP_VERIFICATION) + CONVERT_OLD_FLAG(CORJIT_FLG_PREJIT, JIT_FLAG_PREJIT) + CONVERT_OLD_FLAG(CORJIT_FLG_RELOC, JIT_FLAG_RELOC) + CONVERT_OLD_FLAG(CORJIT_FLG_IMPORT_ONLY, JIT_FLAG_IMPORT_ONLY) + CONVERT_OLD_FLAG(CORJIT_FLG_IL_STUB, JIT_FLAG_IL_STUB) + CONVERT_OLD_FLAG(CORJIT_FLG_PROCSPLIT, JIT_FLAG_PROCSPLIT) + CONVERT_OLD_FLAG(CORJIT_FLG_BBINSTR, JIT_FLAG_BBINSTR) + CONVERT_OLD_FLAG(CORJIT_FLG_BBOPT, JIT_FLAG_BBOPT) + CONVERT_OLD_FLAG(CORJIT_FLG_FRAMED, JIT_FLAG_FRAMED) + CONVERT_OLD_FLAG(CORJIT_FLG_ALIGN_LOOPS, JIT_FLAG_ALIGN_LOOPS) + CONVERT_OLD_FLAG(CORJIT_FLG_PUBLISH_SECRET_PARAM, JIT_FLAG_PUBLISH_SECRET_PARAM) + CONVERT_OLD_FLAG(CORJIT_FLG_GCPOLL_INLINE, JIT_FLAG_GCPOLL_INLINE) + + CONVERT_OLD_FLAG2(CORJIT_FLG2_SAMPLING_JIT_BACKGROUND, JIT_FLAG_SAMPLING_JIT_BACKGROUND) + +#undef CONVERT_OLD_FLAG +#undef CONVERT_OLD_FLAG2 + } + +#else // COR_JIT_EE_VERSION > 460 + + void SetFromFlags(CORJIT_FLAGS flags) + { + // We don't want to have to check every one, so we assume it is exactly the same values as the JitFlag + // values defined in this type. + m_jitFlags = flags.GetFlagsRaw(); + + C_ASSERT(sizeof(m_jitFlags) == sizeof(CORJIT_FLAGS)); + +#define FLAGS_EQUAL(a, b) C_ASSERT((unsigned)(a) == (unsigned)(b)) + + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_SPEED_OPT, JIT_FLAG_SPEED_OPT); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_SIZE_OPT, JIT_FLAG_SIZE_OPT); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_DEBUG_CODE, JIT_FLAG_DEBUG_CODE); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_DEBUG_EnC, JIT_FLAG_DEBUG_EnC); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_DEBUG_INFO, JIT_FLAG_DEBUG_INFO); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_MIN_OPT, JIT_FLAG_MIN_OPT); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_GCPOLL_CALLS, JIT_FLAG_GCPOLL_CALLS); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_MCJIT_BACKGROUND, JIT_FLAG_MCJIT_BACKGROUND); + +#if defined(_TARGET_X86_) + + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_PINVOKE_RESTORE_ESP, JIT_FLAG_PINVOKE_RESTORE_ESP); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_TARGET_P4, JIT_FLAG_TARGET_P4); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_USE_FCOMI, JIT_FLAG_USE_FCOMI); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_USE_CMOV, JIT_FLAG_USE_CMOV); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE2, JIT_FLAG_USE_SSE2); + +#endif + +#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) + + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_USE_SSE3_4, JIT_FLAG_USE_SSE3_4); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_USE_AVX, JIT_FLAG_USE_AVX); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_USE_AVX2, JIT_FLAG_USE_AVX2); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_USE_AVX_512, JIT_FLAG_USE_AVX_512); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_FEATURE_SIMD, JIT_FLAG_FEATURE_SIMD); + +#endif + + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_MAKEFINALCODE, JIT_FLAG_MAKEFINALCODE); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_READYTORUN, JIT_FLAG_READYTORUN); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_PROF_ENTERLEAVE, JIT_FLAG_PROF_ENTERLEAVE); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_PROF_REJIT_NOPS, JIT_FLAG_PROF_REJIT_NOPS); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_PROF_NO_PINVOKE_INLINE, JIT_FLAG_PROF_NO_PINVOKE_INLINE); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_SKIP_VERIFICATION, JIT_FLAG_SKIP_VERIFICATION); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_PREJIT, JIT_FLAG_PREJIT); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_RELOC, JIT_FLAG_RELOC); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_IMPORT_ONLY, JIT_FLAG_IMPORT_ONLY); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_IL_STUB, JIT_FLAG_IL_STUB); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_PROCSPLIT, JIT_FLAG_PROCSPLIT); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_BBINSTR, JIT_FLAG_BBINSTR); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_BBOPT, JIT_FLAG_BBOPT); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_FRAMED, JIT_FLAG_FRAMED); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_ALIGN_LOOPS, JIT_FLAG_ALIGN_LOOPS); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_PUBLISH_SECRET_PARAM, JIT_FLAG_PUBLISH_SECRET_PARAM); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_GCPOLL_INLINE, JIT_FLAG_GCPOLL_INLINE); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_SAMPLING_JIT_BACKGROUND, JIT_FLAG_SAMPLING_JIT_BACKGROUND); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_USE_PINVOKE_HELPERS, JIT_FLAG_USE_PINVOKE_HELPERS); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_REVERSE_PINVOKE, JIT_FLAG_REVERSE_PINVOKE); + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_DESKTOP_QUIRKS, JIT_FLAG_DESKTOP_QUIRKS); + +#undef FLAGS_EQUAL + } + +#endif // COR_JIT_EE_VERSION > 460 + +private: + unsigned __int64 m_jitFlags; +}; diff --git a/src/jit/jiteh.cpp b/src/jit/jiteh.cpp index b20c2f8a9a..4b3ceaecf6 100644 --- a/src/jit/jiteh.cpp +++ b/src/jit/jiteh.cpp @@ -2979,7 +2979,7 @@ void Compiler::dispOutgoingEHClause(unsigned num, const CORINFO_EH_CLAUSE& claus // Note: the flags field is kind of weird. It should be compared for equality // to determine the type of clause, even though it looks like a bitfield. In // Particular, CORINFO_EH_CLAUSE_NONE is zero, so you can "&" to check it. - // You do need to mask off the bits, though, because COR_ILEXCEPTION_CLAUSE_DUPLICATED + // You do need to mask off the bits, though, because CORINFO_EH_CLAUSE_DUPLICATE // is and'ed in. const DWORD CORINFO_EH_CLAUSE_TYPE_MASK = 0x7; switch (clause.Flags & CORINFO_EH_CLAUSE_TYPE_MASK) @@ -3013,15 +3013,19 @@ void Compiler::dispOutgoingEHClause(unsigned num, const CORINFO_EH_CLAUSE& claus } if ((clause.TryOffset == clause.TryLength) && (clause.TryOffset == clause.HandlerOffset) && - ((clause.Flags & (COR_ILEXCEPTION_CLAUSE_DUPLICATED | COR_ILEXCEPTION_CLAUSE_FINALLY)) == - (COR_ILEXCEPTION_CLAUSE_DUPLICATED | COR_ILEXCEPTION_CLAUSE_FINALLY))) + ((clause.Flags & (CORINFO_EH_CLAUSE_DUPLICATE | CORINFO_EH_CLAUSE_FINALLY)) == + (CORINFO_EH_CLAUSE_DUPLICATE | CORINFO_EH_CLAUSE_FINALLY))) { printf(" cloned finally"); } - else if (clause.Flags & COR_ILEXCEPTION_CLAUSE_DUPLICATED) + else if (clause.Flags & CORINFO_EH_CLAUSE_DUPLICATE) { printf(" duplicated"); } + else if (clause.Flags & CORINFO_EH_CLAUSE_SAMETRY) + { + printf(" same try"); + } printf("\n"); } diff --git a/src/jit/jitgcinfo.h b/src/jit/jitgcinfo.h index b93ac3376c..3f8d8afe88 100644 --- a/src/jit/jitgcinfo.h +++ b/src/jit/jitgcinfo.h @@ -380,6 +380,9 @@ private: public: void gcUpdateForRegVarMove(regMaskTP srcMask, regMaskTP dstMask, LclVarDsc* varDsc); #endif // !LEGACY_BACKEND + +private: + ReturnKind getReturnKind(); }; inline unsigned char encodeUnsigned(BYTE* dest, unsigned value) diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp index 369c96322d..ea9c573a02 100644 --- a/src/jit/lclvars.cpp +++ b/src/jit/lclvars.cpp @@ -385,8 +385,9 @@ void Compiler::lvaInitThisPtr(InitVarDscInfo* varDscInfo) if (simdBaseType != TYP_UNKNOWN) { assert(varTypeIsSIMD(type)); - varDsc->lvSIMDType = true; - varDsc->lvBaseType = simdBaseType; + varDsc->lvSIMDType = true; + varDsc->lvBaseType = simdBaseType; + varDsc->lvExactSize = genTypeSize(type); } } #endif // FEATURE_SIMD @@ -1448,12 +1449,16 @@ void Compiler::lvaCanPromoteStructType(CORINFO_CLASS_HANDLE typeHnd, #if 1 // TODO-Cleanup: Consider removing this entire #if block in the future - // This method has two callers. The one in Importer.cpp passes sortFields == false - // and the other passes sortFields == true. - // This is a workaround that leave the inlining behavior the same and before while still - // performing extra struct promotions when compiling the method. - // +// This method has two callers. The one in Importer.cpp passes sortFields == false +// and the other passes sortFields == true. +// This is a workaround that leaves the inlining behavior the same as before while still +// performing extra struct promotions when compiling the method. +// +// The x86 legacy back-end can't handle the more general RyuJIT struct promotion (notably structs +// with holes), in genPushArgList(), so in that case always check for custom layout. +#if FEATURE_FIXED_OUT_ARGS || !defined(LEGACY_BACKEND) if (!sortFields) // the condition "!sortFields" really means "we are inlining" +#endif { treatAsOverlapping = StructHasCustomLayout(typeFlags); } @@ -1736,7 +1741,7 @@ void Compiler::lvaPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo* Stru } } -#if !defined(_TARGET_64BIT_) +#if !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_) //------------------------------------------------------------------------ // lvaPromoteLongVars: "Struct promote" all register candidate longs as if they are structs of two ints. // @@ -1752,29 +1757,18 @@ void Compiler::lvaPromoteLongVars() { return; } + // The lvaTable might grow as we grab temps. Make a local copy here. unsigned startLvaCount = lvaCount; for (unsigned lclNum = 0; lclNum < startLvaCount; lclNum++) { LclVarDsc* varDsc = &lvaTable[lclNum]; if (!varTypeIsLong(varDsc) || varDsc->lvDoNotEnregister || varDsc->lvIsMultiRegArgOrRet() || - (varDsc->lvRefCnt == 0)) + (varDsc->lvRefCnt == 0) || varDsc->lvIsStructField || (fgNoStructPromotion && varDsc->lvIsParam)) { continue; } - // Will this work ??? - // We can't have nested promoted structs. - if (varDsc->lvIsStructField) - { - if (lvaGetPromotionType(varDsc->lvParentLcl) != PROMOTION_TYPE_INDEPENDENT) - { - continue; - } - varDsc->lvIsStructField = false; - varDsc->lvTracked = false; - } - varDsc->lvFieldCnt = 2; varDsc->lvFieldLclStart = lvaCount; varDsc->lvPromoted = true; @@ -1823,7 +1817,7 @@ void Compiler::lvaPromoteLongVars() } #endif // DEBUG } -#endif // !_TARGET_64BIT_ +#endif // !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_) /***************************************************************************** * Given a fldOffset in a promoted struct var, return the index of the local @@ -1904,6 +1898,10 @@ void Compiler::lvaSetVarDoNotEnregister(unsigned varNum DEBUGARG(DoNotEnregister JITDUMP("it is a struct\n"); assert(varTypeIsStruct(varDsc)); break; + case DNER_IsStructArg: + JITDUMP("it is a struct arg\n"); + assert(varTypeIsStruct(varDsc)); + break; case DNER_BlockOp: JITDUMP("written in a block op\n"); varDsc->lvLclBlockOpAddr = 1; @@ -2038,7 +2036,7 @@ void Compiler::lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, bool } #ifndef _TARGET_64BIT_ - bool fDoubleAlignHint = FALSE; + BOOL fDoubleAlignHint = FALSE; #ifdef _TARGET_X86_ fDoubleAlignHint = TRUE; #endif @@ -2697,6 +2695,10 @@ void Compiler::lvaSortByRefCount() lvaTrackedCount = 0; lvaTrackedCountInSizeTUnits = 0; +#ifdef DEBUG + VarSetOps::AssignNoCopy(this, lvaTrackedVars, VarSetOps::MakeEmpty(this)); +#endif + if (lvaCount == 0) { return; @@ -3386,26 +3388,30 @@ void Compiler::lvaMarkLocalVars() #endif // !FEATURE_EH_FUNCLETS -#if FEATURE_EH_FUNCLETS - if (ehNeedsPSPSym()) + // PSPSym and LocAllocSPvar are not used by the CoreRT ABI + if (!IsTargetAbi(CORINFO_CORERT_ABI)) { - lvaPSPSym = lvaGrabTempWithImplicitUse(false DEBUGARG("PSPSym")); - LclVarDsc* lclPSPSym = &lvaTable[lvaPSPSym]; - lclPSPSym->lvType = TYP_I_IMPL; - } +#if FEATURE_EH_FUNCLETS + if (ehNeedsPSPSym()) + { + lvaPSPSym = lvaGrabTempWithImplicitUse(false DEBUGARG("PSPSym")); + LclVarDsc* lclPSPSym = &lvaTable[lvaPSPSym]; + lclPSPSym->lvType = TYP_I_IMPL; + } #endif // FEATURE_EH_FUNCLETS - if (compLocallocUsed) - { - lvaLocAllocSPvar = lvaGrabTempWithImplicitUse(false DEBUGARG("LocAllocSPvar")); - LclVarDsc* locAllocSPvar = &lvaTable[lvaLocAllocSPvar]; - locAllocSPvar->lvType = TYP_I_IMPL; + // TODO: LocAllocSPvar should be only required by the implicit frame layout expected by the VM on x86. + // It should be removed on other platforms once we check there are no other implicit dependencies. + if (compLocallocUsed) + { + lvaLocAllocSPvar = lvaGrabTempWithImplicitUse(false DEBUGARG("LocAllocSPvar")); + LclVarDsc* locAllocSPvar = &lvaTable[lvaLocAllocSPvar]; + locAllocSPvar->lvType = TYP_I_IMPL; + } } BasicBlock* block; -#if defined(DEBUGGING_SUPPORT) || defined(DEBUG) - #ifndef DEBUG // Assign slot numbers to all variables. // If compiler generated local variables, slot numbers will be @@ -3428,8 +3434,6 @@ void Compiler::lvaMarkLocalVars() } } -#endif // defined(DEBUGGING_SUPPORT) || defined(DEBUG) - /* Mark all local variable references */ lvaRefCountingStarted = true; @@ -4062,12 +4066,11 @@ void Compiler::lvaFixVirtualFrameOffsets() LclVarDsc* varDsc; #if FEATURE_EH_FUNCLETS && defined(_TARGET_AMD64_) - if (ehNeedsPSPSym()) + if (lvaPSPSym != BAD_VAR_NUM) { // We need to fix the offset of the PSPSym so there is no padding between it and the outgoing argument space. // Without this code, lvaAlignFrame might have put the padding lower than the PSPSym, which would be between // the PSPSym and the outgoing argument space. - assert(lvaPSPSym != BAD_VAR_NUM); varDsc = &lvaTable[lvaPSPSym]; assert(varDsc->lvFramePointerBased); // We always access it RBP-relative. assert(!varDsc->lvMustInit); // It is never "must init". @@ -4453,7 +4456,9 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, noway_assert(argSize); if (Target::g_tgtArgOrder == Target::ARG_ORDER_L2R) + { argOffs -= argSize; + } unsigned fieldVarNum = BAD_VAR_NUM; @@ -4543,7 +4548,9 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, } if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg) + { argOffs += argSize; + } return argOffs; } @@ -4973,13 +4980,12 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() #endif //_TARGET_AMD64_ #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARMARCH_) - if (ehNeedsPSPSym()) + if (lvaPSPSym != BAD_VAR_NUM) { // On ARM/ARM64, if we need a PSPSym, allocate it first, before anything else, including // padding (so we can avoid computing the same padding in the funclet // frame). Note that there is no special padding requirement for the PSPSym. noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer - assert(lvaPSPSym != BAD_VAR_NUM); // We should have created the PSPSym variable stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs); } #endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARMARCH_) @@ -5033,7 +5039,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaSecurityObject, TARGET_POINTER_SIZE, stkOffs); } - if (compLocallocUsed) + if (lvaLocAllocSPvar != BAD_VAR_NUM) { #ifdef JIT32_GCENCODER noway_assert(codeGen->isFramePointerUsed()); // else offsets of locals of frameless methods will be incorrect @@ -5278,7 +5284,9 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() // a local variable which will need stack frame space. // if (!varDsc->lvIsRegArg) + { continue; + } #ifdef _TARGET_ARM64_ if (info.compIsVarArgs) @@ -5477,13 +5485,12 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() } #if FEATURE_EH_FUNCLETS && defined(_TARGET_AMD64_) - if (ehNeedsPSPSym()) + if (lvaPSPSym != BAD_VAR_NUM) { // On AMD64, if we need a PSPSym, allocate it last, immediately above the outgoing argument // space. Any padding will be higher on the stack than this // (including the padding added by lvaAlignFrame()). noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer - assert(lvaPSPSym != BAD_VAR_NUM); // We should have created the PSPSym variable stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs); } #endif // FEATURE_EH_FUNCLETS && defined(_TARGET_AMD64_) diff --git a/src/jit/legacyjit/.gitmirror b/src/jit/legacyjit/.gitmirror new file mode 100644 index 0000000000..f507630f94 --- /dev/null +++ b/src/jit/legacyjit/.gitmirror @@ -0,0 +1 @@ +Only contents of this folder, excluding subfolders, will be mirrored by the Git-TFS Mirror.
\ No newline at end of file diff --git a/src/jit/legacyjit/CMakeLists.txt b/src/jit/legacyjit/CMakeLists.txt new file mode 100644 index 0000000000..73a4600a66 --- /dev/null +++ b/src/jit/legacyjit/CMakeLists.txt @@ -0,0 +1,62 @@ +project(legacyjit) + +add_definitions(-DLEGACY_BACKEND) +add_definitions(-DALT_JIT) +add_definitions(-DFEATURE_NO_HOST) +add_definitions(-DSELF_NO_HOST) +add_definitions(-DFEATURE_READYTORUN_COMPILER) +remove_definitions(-DFEATURE_MERGE_JIT_AND_ENGINE) + +# No SIMD in legacy back-end. +remove_definitions(-DFEATURE_SIMD) +remove_definitions(-DFEATURE_AVX_SUPPORT) + +if(WIN32) + add_definitions(-DFX_VER_INTERNALNAME_STR=legacyjit.dll) +endif(WIN32) + +add_library_clr(legacyjit + SHARED + ${SHARED_LIB_SOURCES} +) + +add_dependencies(legacyjit jit_exports) + +set_property(TARGET legacyjit APPEND_STRING PROPERTY LINK_FLAGS ${JIT_EXPORTS_LINKER_OPTION}) +set_property(TARGET legacyjit APPEND_STRING PROPERTY LINK_DEPENDS ${JIT_EXPORTS_FILE}) + +set(RYUJIT_LINK_LIBRARIES + utilcodestaticnohost + gcinfo +) + +if(CLR_CMAKE_PLATFORM_UNIX) + list(APPEND RYUJIT_LINK_LIBRARIES + mscorrc_debug + coreclrpal + palrt + ) +else() + list(APPEND RYUJIT_LINK_LIBRARIES + ${STATIC_MT_CRT_LIB} + ${STATIC_MT_VCRT_LIB} + kernel32.lib + advapi32.lib + ole32.lib + oleaut32.lib + uuid.lib + user32.lib + version.lib + shlwapi.lib + bcrypt.lib + crypt32.lib + RuntimeObject.lib + ) +endif(CLR_CMAKE_PLATFORM_UNIX) + +target_link_libraries(legacyjit + ${RYUJIT_LINK_LIBRARIES} +) + +# add the install targets +install_clr(legacyjit) diff --git a/src/jit/lir.cpp b/src/jit/lir.cpp index 94206def1c..35dd1815ef 100644 --- a/src/jit/lir.cpp +++ b/src/jit/lir.cpp @@ -190,12 +190,13 @@ void LIR::Use::ReplaceWith(Compiler* compiler, GenTree* replacement) assert(IsDummyUse() || m_range->Contains(m_user)); assert(m_range->Contains(replacement)); - GenTree* replacedNode = *m_edge; - - *m_edge = replacement; - if (!IsDummyUse() && m_user->IsCall()) + if (!IsDummyUse()) + { + m_user->ReplaceOperand(m_edge, replacement); + } + else { - compiler->fgFixupArgTabEntryPtr(m_user, replacedNode, replacement); + *m_edge = replacement; } } @@ -256,7 +257,7 @@ unsigned LIR::Use::ReplaceWithLclVar(Compiler* compiler, unsigned blockWeight, u assert(m_range->Contains(m_user)); assert(m_range->Contains(*m_edge)); - GenTree* node = *m_edge; + GenTree* const node = *m_edge; if (lclNum == BAD_VAR_NUM) { @@ -267,9 +268,11 @@ unsigned LIR::Use::ReplaceWithLclVar(Compiler* compiler, unsigned blockWeight, u compiler->lvaTable[lclNum].incRefCnts(blockWeight, compiler); compiler->lvaTable[lclNum].incRefCnts(blockWeight, compiler); - GenTreeLclVar* store = compiler->gtNewTempAssign(lclNum, node)->AsLclVar(); + GenTreeLclVar* const store = compiler->gtNewTempAssign(lclNum, node)->AsLclVar(); + assert(store != nullptr); + assert(store->gtOp1 == node); - GenTree* load = + GenTree* const load = new (compiler, GT_LCL_VAR) GenTreeLclVar(store->TypeGet(), store->AsLclVarCommon()->GetLclNum(), BAD_IL_OFFSET); m_range->InsertAfter(node, store, load); @@ -678,7 +681,7 @@ void LIR::Range::FinishInsertBefore(GenTree* insertionPoint, GenTree* first, Gen assert(m_lastNode != nullptr); assert(m_lastNode->gtNext == nullptr); m_lastNode->gtNext = first; - first->gtPrev = m_lastNode; + first->gtPrev = m_lastNode; } m_lastNode = last; } @@ -866,7 +869,7 @@ void LIR::Range::FinishInsertAfter(GenTree* insertionPoint, GenTree* first, GenT assert(m_firstNode != nullptr); assert(m_firstNode->gtPrev == nullptr); m_firstNode->gtPrev = last; - last->gtNext = m_firstNode; + last->gtNext = m_firstNode; } m_firstNode = first; } @@ -1157,7 +1160,6 @@ void LIR::Range::Delete(Compiler* compiler, BasicBlock* block, ReadOnlyRange&& r Delete(compiler, block, range.m_firstNode, range.m_lastNode); } - //------------------------------------------------------------------------ // LIR::Range::TryGetUse: Try to find the use for a given node. // @@ -1616,22 +1618,21 @@ void LIR::InsertBeforeTerminator(BasicBlock* block, LIR::Range&& range) #if DEBUG switch (block->bbJumpKind) { - case BBJ_COND: - assert(insertionPoint->OperGet() == GT_JTRUE); - break; + case BBJ_COND: + assert(insertionPoint->OperIsConditionalJump()); + break; - case BBJ_SWITCH: - assert((insertionPoint->OperGet() == GT_SWITCH) || (insertionPoint->OperGet() == GT_SWITCH_TABLE)); - break; + case BBJ_SWITCH: + assert((insertionPoint->OperGet() == GT_SWITCH) || (insertionPoint->OperGet() == GT_SWITCH_TABLE)); + break; - case BBJ_RETURN: - assert((insertionPoint->OperGet() == GT_RETURN) || - (insertionPoint->OperGet() == GT_JMP) || - (insertionPoint->OperGet() == GT_CALL)); - break; + case BBJ_RETURN: + assert((insertionPoint->OperGet() == GT_RETURN) || (insertionPoint->OperGet() == GT_JMP) || + (insertionPoint->OperGet() == GT_CALL)); + break; - default: - unreached(); + default: + unreached(); } #endif } diff --git a/src/jit/liveness.cpp b/src/jit/liveness.cpp index 19d326303e..423d72b9b2 100644 --- a/src/jit/liveness.cpp +++ b/src/jit/liveness.cpp @@ -76,7 +76,6 @@ void Compiler::fgMarkUseDef(GenTreeLclVarCommon* tree, GenTree* asgdLclVar) if ((lhsLclNum == lclNum) && ((tree->gtFlags & GTF_VAR_DEF) == 0) && (tree != asgdLclVar)) { /* bingo - we have an x = f(x) case */ - noway_assert(lvaTable[lhsLclNum].lvType != TYP_STRUCT); asgdLclVar->gtFlags |= GTF_VAR_USEDEF; rhsUSEDEF = true; } @@ -699,10 +698,6 @@ void Compiler::fgPerBlockLocalVarLiveness() } } -/*****************************************************************************/ -#ifdef DEBUGGING_SUPPORT -/*****************************************************************************/ - // Helper functions to mark variables live over their entire scope void Compiler::fgBeginScopeLife(VARSET_TP* inScope, VarScopeDsc* var) @@ -1113,7 +1108,7 @@ void Compiler::fgExtendDbgLifetimes() // Create initialization node if (!block->IsLIR()) { - GenTree* varNode = gtNewLclvNode(varNum, type); + GenTree* varNode = gtNewLclvNode(varNum, type); GenTree* initNode = gtNewAssignNode(varNode, zero); // Create a statement for the initializer, sequence it, and append it to the current BB. @@ -1124,7 +1119,8 @@ void Compiler::fgExtendDbgLifetimes() } else { - GenTree* store = new (this, GT_STORE_LCL_VAR) GenTreeLclVar(GT_STORE_LCL_VAR, type, varNum, BAD_IL_OFFSET); + GenTree* store = + new (this, GT_STORE_LCL_VAR) GenTreeLclVar(GT_STORE_LCL_VAR, type, varNum, BAD_IL_OFFSET); store->gtOp.gtOp1 = zero; store->gtFlags |= (GTF_VAR_DEF | GTF_ASG); @@ -1133,7 +1129,7 @@ void Compiler::fgExtendDbgLifetimes() #if !defined(_TARGET_64BIT_) && !defined(LEGACY_BACKEND) DecomposeLongs::DecomposeRange(this, blockWeight, initRange); -#endif +#endif // !defined(_TARGET_64BIT_) && !defined(LEGACY_BACKEND) // Naively inserting the initializer at the end of the block may add code after the block's // terminator, in which case the inserted code will never be executed (and the IR for the @@ -1184,10 +1180,6 @@ void Compiler::fgExtendDbgLifetimes() #endif // DEBUG } -/*****************************************************************************/ -#endif // DEBUGGING_SUPPORT -/*****************************************************************************/ - VARSET_VALRET_TP Compiler::fgGetHandlerLiveVars(BasicBlock* block) { noway_assert(block); @@ -1905,9 +1897,7 @@ VARSET_VALRET_TP Compiler::fgComputeLife(VARSET_VALARG_TP lifeArg, VARSET_TP VARSET_INIT(this, life, lifeArg); // lifeArg is const ref; copy to allow modification. VARSET_TP VARSET_INIT(this, keepAliveVars, volatileVars); -#ifdef DEBUGGING_SUPPORT VarSetOps::UnionD(this, keepAliveVars, compCurBB->bbScope); // Don't kill vars in scope -#endif noway_assert(VarSetOps::Equal(this, VarSetOps::Intersection(this, keepAliveVars, life), keepAliveVars)); noway_assert(compCurStmt->gtOper == GT_STMT); @@ -1955,9 +1945,7 @@ VARSET_VALRET_TP Compiler::fgComputeLifeLIR(VARSET_VALARG_TP lifeArg, BasicBlock VARSET_TP VARSET_INIT(this, life, lifeArg); // lifeArg is const ref; copy to allow modification. VARSET_TP VARSET_INIT(this, keepAliveVars, volatileVars); -#ifdef DEBUGGING_SUPPORT VarSetOps::UnionD(this, keepAliveVars, block->bbScope); // Don't kill vars in scope -#endif noway_assert(VarSetOps::Equal(this, VarSetOps::Intersection(this, keepAliveVars, life), keepAliveVars)); @@ -1980,9 +1968,9 @@ VARSET_VALRET_TP Compiler::fgComputeLifeLIR(VARSET_VALARG_TP lifeArg, BasicBlock else if (node->OperIsNonPhiLocal() || node->OperIsLocalAddr()) { bool isDeadStore = fgComputeLifeLocal(life, keepAliveVars, node, node); - if (isDeadStore) + if (isDeadStore && fgTryRemoveDeadLIRStore(blockRange, node, &next)) { - fgTryRemoveDeadLIRStore(blockRange, node, &next); + fgStmtRemoved = true; } } } @@ -2018,9 +2006,8 @@ VARSET_VALRET_TP Compiler::fgComputeLife(VARSET_VALARG_TP lifeArg, GenTreePtr gtColon = NULL; VARSET_TP VARSET_INIT(this, keepAliveVars, volatileVars); -#ifdef DEBUGGING_SUPPORT VarSetOps::UnionD(this, keepAliveVars, compCurBB->bbScope); /* Dont kill vars in scope */ -#endif + noway_assert(VarSetOps::Equal(this, VarSetOps::Intersection(this, keepAliveVars, life), keepAliveVars)); noway_assert(compCurStmt->gtOper == GT_STMT); noway_assert(endNode || (startNode == compCurStmt->gtStmt.gtStmtExpr)); @@ -2548,10 +2535,10 @@ bool Compiler::fgRemoveDeadStore( switch (asgNode->gtOper) { case GT_ASG_ADD: - asgNode->gtOper = GT_ADD; + asgNode->SetOperRaw(GT_ADD); break; case GT_ASG_SUB: - asgNode->gtOper = GT_SUB; + asgNode->SetOperRaw(GT_SUB); break; default: // Only add and sub allowed, we don't have ASG_MUL and ASG_DIV for ints, and @@ -2854,10 +2841,6 @@ void Compiler::fgInterBlockLocalVarLiveness() fgLiveVarAnalysis(); -//------------------------------------------------------------------------- - -#ifdef DEBUGGING_SUPPORT - /* For debuggable code, we mark vars as live over their entire * reported scope, so that it will be visible over the entire scope */ @@ -2867,8 +2850,6 @@ void Compiler::fgInterBlockLocalVarLiveness() fgExtendDbgLifetimes(); } -#endif // DEBUGGING_SUPPORT - /*------------------------------------------------------------------------- * Variables involved in exception-handlers and finally blocks need * to be specially marked diff --git a/src/jit/loopcloning.cpp b/src/jit/loopcloning.cpp index 8ce015e607..a1ba14292a 100644 --- a/src/jit/loopcloning.cpp +++ b/src/jit/loopcloning.cpp @@ -698,7 +698,7 @@ void LoopCloneContext::CondToStmtInBlock(Compiler* comp, comp->fgInsertStmtAtEnd(block, stmt); // Remorph. - comp->fgMorphBlockStmt(block, stmt DEBUGARG("Loop cloning condition")); + comp->fgMorphBlockStmt(block, stmt->AsStmt() DEBUGARG("Loop cloning condition")); } //-------------------------------------------------------------------------------------------------- diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp index 09eb9146ac..a6e50b304c 100644 --- a/src/jit/lower.cpp +++ b/src/jit/lower.cpp @@ -135,6 +135,15 @@ GenTree* Lowering::LowerNode(GenTree* node) LowerCall(node); break; + case GT_LT: + case GT_LE: + case GT_GT: + case GT_GE: + case GT_EQ: + case GT_NE: + LowerCompare(node); + break; + case GT_JMP: LowerJmpMethod(node); break; @@ -169,13 +178,33 @@ GenTree* Lowering::LowerNode(GenTree* node) // produces a TYP_SIMD16 result node->gtType = TYP_SIMD16; } + +#ifdef _TARGET_XARCH_ + if ((node->AsSIMD()->gtSIMDIntrinsicID == SIMDIntrinsicGetItem) && (node->gtGetOp1()->OperGet() == GT_IND)) + { + // If SIMD vector is already in memory, we force its + // addr to be evaluated into a reg. This would allow + // us to generate [regBase] or [regBase+offset] or + // [regBase+sizeOf(SIMD vector baseType)*regIndex] + // to access the required SIMD vector element directly + // from memory. + // + // TODO-CQ-XARCH: If addr of GT_IND is GT_LEA, we + // might be able update GT_LEA to fold the regIndex + // or offset in some cases. Instead with this + // approach we always evaluate GT_LEA into a reg. + // Ideally, we should be able to lower GetItem intrinsic + // into GT_IND(newAddr) where newAddr combines + // the addr of SIMD vector with the given index. + node->gtOp.gtOp1->gtFlags |= GTF_IND_REQ_ADDR_IN_REG; + } +#endif break; case GT_LCL_VAR: case GT_STORE_LCL_VAR: if (node->TypeGet() == TYP_SIMD12) { -#ifdef _TARGET_64BIT_ // Assumption 1: // RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off // to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for @@ -198,10 +227,29 @@ GenTree* Lowering::LowerNode(GenTree* node) // Vector3 return values are returned two return registers and Caller assembles them into a // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3 // type args in prolog and Vector3 type return value of a call + // + // RyuJIT x86 Windows: all non-param Vector3 local vars are allocated as 16 bytes. Vector3 arguments + // are pushed as 12 bytes. For return values, a 16-byte local is allocated and the address passed + // as a return buffer pointer. The callee doesn't write the high 4 bytes, and we don't need to clear + // it either. + + unsigned varNum = node->AsLclVarCommon()->GetLclNum(); + LclVarDsc* varDsc = &comp->lvaTable[varNum]; + +#if defined(_TARGET_64BIT_) + assert(varDsc->lvSize() == 16); node->gtType = TYP_SIMD16; -#else - NYI("Lowering of TYP_SIMD12 locals"); -#endif // _TARGET_64BIT_ +#else // !_TARGET_64BIT_ + if (varDsc->lvSize() == 16) + { + node->gtType = TYP_SIMD16; + } + else + { + // The following assert is guaranteed by lvSize(). + assert(varDsc->lvIsParam); + } +#endif // !_TARGET_64BIT_ } #endif // FEATURE_SIMD __fallthrough; @@ -215,7 +263,7 @@ GenTree* Lowering::LowerNode(GenTree* node) #if FEATURE_MULTIREG_RET GenTree* src = node->gtGetOp1(); assert((src->OperGet() == GT_CALL) && src->AsCall()->HasMultiRegRetVal()); -#else // !FEATURE_MULTIREG_RET +#else // !FEATURE_MULTIREG_RET assert(!"Unexpected struct local store in Lowering"); #endif // !FEATURE_MULTIREG_RET } @@ -680,7 +728,7 @@ void Lowering::ReplaceArgWithPutArgOrCopy(GenTree** argSlot, GenTree* putArgOrCo // Arguments: // call - the call whose arg is being rewritten. // arg - the arg being rewritten. -// info - the ArgTabEntry information for the argument. +// info - the fgArgTabEntry information for the argument. // type - the type of the argument. // // Return Value: @@ -692,11 +740,11 @@ void Lowering::ReplaceArgWithPutArgOrCopy(GenTree** argSlot, GenTree* putArgOrCo // // Notes: // For System V systems with native struct passing (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING defined) -// this method allocates a single GT_PUTARG_REG for 1 eightbyte structs and a GT_LIST of two GT_PUTARG_REGs +// this method allocates a single GT_PUTARG_REG for 1 eightbyte structs and a GT_FIELD_LIST of two GT_PUTARG_REGs // for two eightbyte structs. // // For STK passed structs the method generates GT_PUTARG_STK tree. For System V systems with native struct passing -// (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING defined) this method also sets the GP pointers count and the pointers +// (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING defined) this method also sets the GC pointers count and the pointers // layout object, so the codegen of the GT_PUTARG_STK could use this for optimizing copying to the stack by value. // (using block copy primitives for non GC pointers and a single TARGET_POINTER_SIZE copy with recording GC info.) // @@ -753,8 +801,8 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP // In this case a new tree is created that is GT_PUTARG_REG // with a op1 the original argument. // 2. The struct is contained in 2 eightbytes: - // in this case the arg comes as a GT_LIST of two GT_LCL_FLDs - the two eightbytes of the struct. - // The code creates a GT_PUTARG_REG node for each GT_LCL_FLD in the GT_LIST + // in this case the arg comes as a GT_FIELD_LIST of two GT_LCL_FLDs - the two eightbytes of the struct. + // The code creates a GT_PUTARG_REG node for each GT_LCL_FLD in the GT_FIELD_LIST // and splices it in the list with the corresponding original GT_LCL_FLD tree as op1. assert(info->structDesc.eightByteCount != 0); @@ -826,25 +874,25 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP // // clang-format on - assert(arg->OperGet() == GT_LIST); + assert(arg->OperGet() == GT_FIELD_LIST); - GenTreeArgList* argListPtr = arg->AsArgList(); - assert(argListPtr->IsAggregate()); + GenTreeFieldList* fieldListPtr = arg->AsFieldList(); + assert(fieldListPtr->IsFieldListHead()); - for (unsigned ctr = 0; argListPtr != nullptr; argListPtr = argListPtr->Rest(), ctr++) + for (unsigned ctr = 0; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), ctr++) { // Create a new GT_PUTARG_REG node with op1 the original GT_LCL_FLD. GenTreePtr newOper = comp->gtNewOperNode( GT_PUTARG_REG, comp->GetTypeFromClassificationAndSizes(info->structDesc.eightByteClassifications[ctr], info->structDesc.eightByteSizes[ctr]), - argListPtr->gtOp.gtOp1); + fieldListPtr->gtOp.gtOp1); - // Splice in the new GT_PUTARG_REG node in the GT_LIST - ReplaceArgWithPutArgOrCopy(&argListPtr->gtOp.gtOp1, newOper); + // Splice in the new GT_PUTARG_REG node in the GT_FIELD_LIST + ReplaceArgWithPutArgOrCopy(&fieldListPtr->gtOp.gtOp1, newOper); } - // Just return arg. The GT_LIST is not replaced. + // Just return arg. The GT_FIELD_LIST is not replaced. // Nothing more to do. return arg; } @@ -857,26 +905,26 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP else #else // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) #if FEATURE_MULTIREG_ARGS - if ((info->numRegs > 1) && (arg->OperGet() == GT_LIST)) + if ((info->numRegs > 1) && (arg->OperGet() == GT_FIELD_LIST)) { - assert(arg->OperGet() == GT_LIST); + assert(arg->OperGet() == GT_FIELD_LIST); - GenTreeArgList* argListPtr = arg->AsArgList(); - assert(argListPtr->IsAggregate()); + GenTreeFieldList* fieldListPtr = arg->AsFieldList(); + assert(fieldListPtr->IsFieldListHead()); - for (unsigned ctr = 0; argListPtr != nullptr; argListPtr = argListPtr->Rest(), ctr++) + for (unsigned ctr = 0; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), ctr++) { - GenTreePtr curOp = argListPtr->gtOp.gtOp1; + GenTreePtr curOp = fieldListPtr->gtOp.gtOp1; var_types curTyp = curOp->TypeGet(); // Create a new GT_PUTARG_REG node with op1 GenTreePtr newOper = comp->gtNewOperNode(GT_PUTARG_REG, curTyp, curOp); - // Splice in the new GT_PUTARG_REG node in the GT_LIST - ReplaceArgWithPutArgOrCopy(&argListPtr->gtOp.gtOp1, newOper); + // Splice in the new GT_PUTARG_REG node in the GT_FIELD_LIST + ReplaceArgWithPutArgOrCopy(&fieldListPtr->gtOp.gtOp1, newOper); } - // Just return arg. The GT_LIST is not replaced. + // Just return arg. The GT_FIELD_LIST is not replaced. // Nothing more to do. return arg; } @@ -893,23 +941,20 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP // This provides the info to put this argument in in-coming arg area slot // instead of in out-going arg area slot. - FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(assert(info->isStruct == varTypeIsStruct(type))); // Make sure state is - // correct + PUT_STRUCT_ARG_STK_ONLY(assert(info->isStruct == varTypeIsStruct(type))); // Make sure state is + // correct #if FEATURE_FASTTAILCALL putArg = new (comp, GT_PUTARG_STK) - GenTreePutArgStk(GT_PUTARG_STK, type, arg, - info->slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(info->numSlots) - FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(info->isStruct), + GenTreePutArgStk(GT_PUTARG_STK, type, arg, info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots), call->IsFastTailCall() DEBUGARG(call)); #else putArg = new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK, type, arg, - info->slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(info->numSlots) - FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(info->isStruct) DEBUGARG(call)); + info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots) DEBUGARG(call)); #endif -#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +#ifdef FEATURE_PUT_STRUCT_ARG_STK // If the ArgTabEntry indicates that this arg is a struct // get and store the number of slots that are references. // This is later used in the codegen for PUT_ARG_STK implementation @@ -919,8 +964,6 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP // pair copying using XMM registers or rep mov instructions. if (info->isStruct) { - unsigned numRefs = 0; - BYTE* gcLayout = new (comp, CMK_Codegen) BYTE[info->numSlots]; // We use GT_OBJ for non-SIMD struct arguments. However, for // SIMD arguments the GT_OBJ has already been transformed. if (arg->gtOper != GT_OBJ) @@ -929,13 +972,14 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP } else { + unsigned numRefs = 0; + BYTE* gcLayout = new (comp, CMK_Codegen) BYTE[info->numSlots]; assert(!varTypeIsSIMD(arg)); numRefs = comp->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout); + putArg->AsPutArgStk()->setGcPointers(numRefs, gcLayout); } - - putArg->AsPutArgStk()->setGcPointers(numRefs, gcLayout); } -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING +#endif // FEATURE_PUT_STRUCT_ARG_STK } if (arg->InReg()) @@ -1011,6 +1055,22 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg) type = TYP_INT; } +#if defined(FEATURE_SIMD) && defined(_TARGET_X86_) + // Non-param TYP_SIMD12 local var nodes are massaged in Lower to TYP_SIMD16 to match their + // allocated size (see lvSize()). However, when passing the variables as arguments, and + // storing the variables to the outgoing argument area on the stack, we must use their + // actual TYP_SIMD12 type, so exactly 12 bytes is allocated and written. + if (type == TYP_SIMD16) + { + if ((arg->OperGet() == GT_LCL_VAR) || (arg->OperGet() == GT_STORE_LCL_VAR)) + { + unsigned varNum = arg->AsLclVarCommon()->GetLclNum(); + LclVarDsc* varDsc = &comp->lvaTable[varNum]; + type = varDsc->lvType; + } + } +#endif // defined(FEATURE_SIMD) && defined(_TARGET_X86_) + GenTreePtr putArg; // If we hit this we are probably double-lowering. @@ -1068,7 +1128,7 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg) putArg = NewPutArg(call, arg, info, type); // In the case of register passable struct (in one or two registers) - // the NewPutArg returns a new node (GT_PUTARG_REG or a GT_LIST with two GT_PUTARG_REGs.) + // the NewPutArg returns a new node (GT_PUTARG_REG or a GT_FIELD_LIST with two GT_PUTARG_REGs.) // If an extra node is returned, splice it in the right place in the tree. if (arg != putArg) { @@ -1367,6 +1427,7 @@ void Lowering::CheckVSQuirkStackPaddingNeeded(GenTreeCall* call) // Inserts profiler hook, GT_PROF_HOOK for a tail call node. // +// AMD64: // We need to insert this after all nested calls, but before all the arguments to this call have been set up. // To do this, we look for the first GT_PUTARG_STK or GT_PUTARG_REG, and insert the hook immediately before // that. If there are no args, then it should be inserted before the call node. @@ -1391,16 +1452,30 @@ void Lowering::CheckVSQuirkStackPaddingNeeded(GenTreeCall* call) // In this case, the GT_PUTARG_REG src is a nested call. We need to put the instructions after that call // (as shown). We assume that of all the GT_PUTARG_*, only the first one can have a nested call. // +// X86: +// Insert the profiler hook immediately before the call. The profiler hook will preserve +// all argument registers (ECX, EDX), but nothing else. +// // Params: // callNode - tail call node -// insertionPoint - if caller has an insertion point; If null -// profiler hook is inserted before args are setup +// insertionPoint - if non-null, insert the profiler hook before this point. +// If null, insert the profiler hook before args are setup // but after all arg side effects are computed. +// void Lowering::InsertProfTailCallHook(GenTreeCall* call, GenTree* insertionPoint) { assert(call->IsTailCall()); assert(comp->compIsProfilerHookNeeded()); +#if defined(_TARGET_X86_) + + if (insertionPoint == nullptr) + { + insertionPoint = call; + } + +#else // !defined(_TARGET_X86_) + if (insertionPoint == nullptr) { GenTreePtr tmp = nullptr; @@ -1437,6 +1512,8 @@ void Lowering::InsertProfTailCallHook(GenTreeCall* call, GenTree* insertionPoint } } +#endif // !defined(_TARGET_X86_) + assert(insertionPoint != nullptr); GenTreePtr profHookNode = new (comp, GT_PROF_HOOK) GenTree(GT_PROF_HOOK, TYP_VOID); BlockRange().InsertBefore(insertionPoint, profHookNode); @@ -1705,7 +1782,10 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget assert(!comp->opts.compNeedSecurityCheck); // tail call from methods that need security check assert(!call->IsUnmanaged()); // tail calls to unamanaged methods assert(!comp->compLocallocUsed); // tail call from methods that also do localloc - assert(!comp->getNeedsGSSecurityCookie()); // jit64 compat: tail calls from methods that need GS check + +#ifdef _TARGET_AMD64_ + assert(!comp->getNeedsGSSecurityCookie()); // jit64 compat: tail calls from methods that need GS check +#endif // _TARGET_AMD64_ // We expect to see a call that meets the following conditions assert(call->IsTailCallViaHelper()); @@ -1713,8 +1793,9 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget // The TailCall helper call never returns to the caller and is not GC interruptible. // Therefore the block containing the tail call should be a GC safe point to avoid - // GC starvation. - assert(comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT); + // GC starvation. It is legal for the block to be unmarked iff the entry block is a + // GC safe point, as the entry block trivially dominates every reachable block. + assert((comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT) || (comp->fgFirstBB->bbFlags & BBF_GC_SAFE_POINT)); // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that // a method returns. This is a case of caller method has both PInvokes and tail calls. @@ -1839,16 +1920,268 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget // Now add back tail call flags for identifying this node as tail call dispatched via helper. call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER; +#ifdef PROFILING_SUPPORTED // Insert profiler tail call hook if needed. // Since we don't know the insertion point, pass null for second param. if (comp->compIsProfilerHookNeeded()) { InsertProfTailCallHook(call, nullptr); } +#endif // PROFILING_SUPPORTED + + assert(call->IsTailCallViaHelper()); return result; } +//------------------------------------------------------------------------ +// Lowering::LowerCompare: lowers a compare node. +// +// For 64-bit targets, this doesn't do much of anything: all comparisons +// that we support can be handled in code generation on such targets. +// +// For 32-bit targets, however, any comparison that feeds a `GT_JTRUE` +// node must be lowered such that the liveness of the operands to the +// comparison is properly visible to the rest of the backend. As such, +// a 64-bit comparison is lowered from something like this: +// +// ------------ BB02 [004..014) -> BB02 (cond), preds={BB02,BB01} succs={BB03,BB02} +// N001 ( 1, 1) [000006] ------------ t6 = lclVar int V02 loc0 u:5 $148 +// +// /--* t6 int +// N002 ( 2, 3) [000007] ---------U-- t7 = * cast long <- ulong <- uint $3c0 +// +// N003 ( 3, 10) [000009] ------------ t9 = lconst long 0x0000000000000003 $101 +// +// /--* t7 long +// +--* t9 long +// N004 ( 9, 17) [000010] N------N-U-- t10 = * < int $149 +// +// /--* t10 int +// N005 ( 11, 19) [000011] ------------ * jmpTrue void +// +// To something like this: +// +// ------------ BB02 [004..014) -> BB03 (cond), preds={BB06,BB07,BB01} succs={BB06,BB03} +// [000099] ------------ t99 = const int 0 +// +// [000101] ------------ t101 = const int 0 +// +// /--* t99 int +// +--* t101 int +// N004 ( 9, 17) [000010] N------N-U-- t10 = * > int $149 +// +// /--* t10 int +// N005 ( 11, 19) [000011] ------------ * jmpTrue void +// +// +// ------------ BB06 [???..???) -> BB02 (cond), preds={BB02} succs={BB07,BB02} +// [000105] -------N-U-- jcc void cond=< +// +// +// ------------ BB07 [???..???) -> BB02 (cond), preds={BB06} succs={BB03,BB02} +// N001 ( 1, 1) [000006] ------------ t6 = lclVar int V02 loc0 u:5 $148 +// +// N003 ( 3, 10) [000009] ------------ t9 = const int 3 +// +// /--* t6 int +// +--* t9 int +// [000106] N------N-U-- t106 = * < int +// +// /--* t106 int +// [000107] ------------ * jmpTrue void +// +// Which will eventually generate code similar to the following: +// +// 33DB xor ebx, ebx +// 85DB test ebx, ebx +// 7707 ja SHORT G_M50523_IG04 +// 72E7 jb SHORT G_M50523_IG03 +// 83F803 cmp eax, 3 +// 72E2 jb SHORT G_M50523_IG03 +// +void Lowering::LowerCompare(GenTree* cmp) +{ +#ifndef _TARGET_64BIT_ + if (cmp->gtGetOp1()->TypeGet() != TYP_LONG) + { + return; + } + + LIR::Use cmpUse; + + if (!BlockRange().TryGetUse(cmp, &cmpUse) || cmpUse.User()->OperGet() != GT_JTRUE) + { + return; + } + + GenTree* src1 = cmp->gtGetOp1(); + GenTree* src2 = cmp->gtGetOp2(); + unsigned weight = m_block->getBBWeight(comp); + + LIR::Use loSrc1(BlockRange(), &(src1->gtOp.gtOp1), src1); + LIR::Use loSrc2(BlockRange(), &(src2->gtOp.gtOp1), src2); + + if (loSrc1.Def()->OperGet() != GT_CNS_INT && loSrc1.Def()->OperGet() != GT_LCL_VAR) + { + loSrc1.ReplaceWithLclVar(comp, weight); + } + + if (loSrc2.Def()->OperGet() != GT_CNS_INT && loSrc2.Def()->OperGet() != GT_LCL_VAR) + { + loSrc2.ReplaceWithLclVar(comp, weight); + } + + BasicBlock* jumpDest = m_block->bbJumpDest; + BasicBlock* nextDest = m_block->bbNext; + BasicBlock* newBlock = comp->fgSplitBlockAtEnd(m_block); + + cmp->gtType = TYP_INT; + cmp->gtOp.gtOp1 = src1->gtOp.gtOp2; + cmp->gtOp.gtOp2 = src2->gtOp.gtOp2; + + if (cmp->OperGet() == GT_EQ || cmp->OperGet() == GT_NE) + { + // 64-bit equality comparisons (no matter the polarity) require two 32-bit comparisons: one for the upper 32 + // bits and one for the lower 32 bits. As such, we update the flow graph like so: + // + // Before: + // BB0: cond + // / \ + // false true + // | | + // BB1 BB2 + // + // After: + // BB0: cond(hi) + // / \ + // false true + // | | + // | BB3: cond(lo) + // | / \ + // | false true + // \ / | + // BB1 BB2 + // + + BlockRange().Remove(loSrc1.Def()); + BlockRange().Remove(loSrc2.Def()); + GenTree* loCmp = comp->gtNewOperNode(cmp->OperGet(), TYP_INT, loSrc1.Def(), loSrc2.Def()); + loCmp->gtFlags = cmp->gtFlags; + GenTree* loJtrue = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, loCmp); + LIR::AsRange(newBlock).InsertAfter(nullptr, loSrc1.Def(), loSrc2.Def(), loCmp, loJtrue); + + m_block->bbJumpKind = BBJ_COND; + + if (cmp->OperGet() == GT_EQ) + { + cmp->gtOper = GT_NE; + m_block->bbJumpDest = nextDest; + nextDest->bbFlags |= BBF_JMP_TARGET; + comp->fgAddRefPred(nextDest, m_block); + } + else + { + m_block->bbJumpDest = jumpDest; + comp->fgAddRefPred(jumpDest, m_block); + } + + assert(newBlock->bbJumpKind == BBJ_COND); + assert(newBlock->bbJumpDest == jumpDest); + } + else + { + // 64-bit ordinal comparisons are more complicated: they require two comparisons for the upper 32 bits and one + // comparison for the lower 32 bits. We update the flowgraph as such: + // + // Before: + // BB0: cond + // / \ + // false true + // | | + // BB1 BB2 + // + // After: + // BB0: (!cond(hi) && !eq(hi)) + // / \ + // true false + // | | + // | BB3: (cond(hi) && !eq(hi)) + // | / \ + // | false true + // | | | + // | BB4: cond(lo) | + // | / \ | + // | false true | + // \ / \ / + // BB1 BB2 + // + // + // Note that the actual comparisons used to implement "(!cond(hi) && !eq(hi))" and "(cond(hi) && !eq(hi))" + // differ based on the original condition, and all consist of a single node. The switch statement below + // performs the necessary mapping. + // + + genTreeOps hiCmpOper; + genTreeOps loCmpOper; + + switch (cmp->OperGet()) + { + case GT_LT: + cmp->gtOper = GT_GT; + hiCmpOper = GT_LT; + loCmpOper = GT_LT; + break; + case GT_LE: + cmp->gtOper = GT_GT; + hiCmpOper = GT_LT; + loCmpOper = GT_LE; + break; + case GT_GT: + cmp->gtOper = GT_LT; + hiCmpOper = GT_GT; + loCmpOper = GT_GT; + break; + case GT_GE: + cmp->gtOper = GT_LT; + hiCmpOper = GT_GT; + loCmpOper = GT_GE; + break; + default: + unreached(); + } + + BasicBlock* newBlock2 = comp->fgSplitBlockAtEnd(newBlock); + + GenTree* hiJcc = new (comp, GT_JCC) GenTreeJumpCC(hiCmpOper); + hiJcc->gtFlags = cmp->gtFlags; + LIR::AsRange(newBlock).InsertAfter(nullptr, hiJcc); + + BlockRange().Remove(loSrc1.Def()); + BlockRange().Remove(loSrc2.Def()); + GenTree* loCmp = comp->gtNewOperNode(loCmpOper, TYP_INT, loSrc1.Def(), loSrc2.Def()); + loCmp->gtFlags = cmp->gtFlags | GTF_UNSIGNED; + GenTree* loJtrue = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, loCmp); + LIR::AsRange(newBlock2).InsertAfter(nullptr, loSrc1.Def(), loSrc2.Def(), loCmp, loJtrue); + + m_block->bbJumpKind = BBJ_COND; + m_block->bbJumpDest = nextDest; + nextDest->bbFlags |= BBF_JMP_TARGET; + comp->fgAddRefPred(nextDest, m_block); + + newBlock->bbJumpKind = BBJ_COND; + newBlock->bbJumpDest = jumpDest; + comp->fgAddRefPred(jumpDest, newBlock); + + assert(newBlock2->bbJumpKind == BBJ_COND); + assert(newBlock2->bbJumpDest == jumpDest); + } + + BlockRange().Remove(src1); + BlockRange().Remove(src2); +#endif +} + // Lower "jmp <method>" tail call to insert PInvoke method epilog if required. void Lowering::LowerJmpMethod(GenTree* jmp) { @@ -2334,8 +2667,12 @@ void Lowering::InsertPInvokeMethodProlog() DISPTREERANGE(firstBlockRange, storeFP); // -------------------------------------------------------- + // On 32-bit targets, CORINFO_HELP_INIT_PINVOKE_FRAME initializes the PInvoke frame and then pushes it onto + // the current thread's Frame stack. On 64-bit targets, it only initializes the PInvoke frame. + CLANG_FORMAT_COMMENT_ANCHOR; - if (comp->opts.eeFlags & CORJIT_FLG_IL_STUB) +#ifdef _TARGET_64BIT_ + if (comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB)) { // Push a frame - if we are NOT in an IL stub, this is done right before the call // The init routine sets InlinedCallFrame's m_pNext, so we just set the thead's top-of-stack @@ -2343,6 +2680,7 @@ void Lowering::InsertPInvokeMethodProlog() firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd)); DISPTREERANGE(firstBlockRange, frameUpd); } +#endif // _TARGET_64BIT_ } //------------------------------------------------------------------------ @@ -2405,9 +2743,14 @@ void Lowering::InsertPInvokeMethodEpilog(BasicBlock* returnBB DEBUGARG(GenTreePt GenTree* storeGCState = SetGCState(1); returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeGCState)); - if (comp->opts.eeFlags & CORJIT_FLG_IL_STUB) + // Pop the frame if necessary. This always happens in the epilog on 32-bit targets. For 64-bit targets, we only do + // this in the epilog for IL stubs; for non-IL stubs the frame is popped after every PInvoke call. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef _TARGET_64BIT_ + if (comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB)) +#endif // _TARGET_64BIT_ { - // Pop the frame, in non-stubs we do this around each PInvoke call GenTree* frameUpd = CreateFrameLinkUpdate(PopFrame); returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd)); } @@ -2454,6 +2797,7 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call) comp->fgMorphTree(helperCall); BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, helperCall)); + LowerNode(helperCall); // helper call is inserted before current node and should be lowered here. return; } #endif @@ -2464,7 +2808,7 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call) // InlinedCallFrame.m_pCallSiteSP = SP // x86 only // InlinedCallFrame.m_pCallerReturnAddress = return address // Thread.gcState = 0 - // (non-stub) - update top Frame on TCB + // (non-stub) - update top Frame on TCB // 64-bit targets only // ---------------------------------------------------------------------------------- // Setup InlinedCallFrame.callSiteTarget (which is how the JIT refers to it). @@ -2474,11 +2818,19 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call) if (callType == CT_INDIRECT) { +#if !defined(_TARGET_64BIT_) + // On 32-bit targets, indirect calls need the size of the stack args in InlinedCallFrame.m_Datum. + const unsigned numStkArgBytes = call->fgArgInfo->GetNextSlotNum() * TARGET_POINTER_SIZE; + + src = comp->gtNewIconNode(numStkArgBytes, TYP_INT); +#else + // On 64-bit targets, indirect calls may need the stub parameter value in InlinedCallFrame.m_Datum. + // If the stub parameter value is not needed, m_Datum will be initialized by the VM. if (comp->info.compPublishStubParam) { - src = new (comp, GT_LCL_VAR) GenTreeLclVar(TYP_I_IMPL, comp->lvaStubArgumentVar, BAD_IL_OFFSET); + src = comp->gtNewLclvNode(comp->lvaStubArgumentVar, TYP_I_IMPL); } - // else { If we don't have secret parameter, m_Datum will be initialized by VM code } +#endif // !defined(_TARGET_64BIT_) } else { @@ -2542,7 +2894,12 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call) BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, storeLab)); - if (!(comp->opts.eeFlags & CORJIT_FLG_IL_STUB)) + // Push the PInvoke frame if necessary. On 32-bit targets this only happens in the method prolog if a method + // contains PInvokes; on 64-bit targets this is necessary in non-stubs. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef _TARGET_64BIT_ + if (!comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB)) { // Set the TCB's frame to be the one we just created. // Note the init routine for the InlinedCallFrame (CORINFO_HELP_INIT_PINVOKE_FRAME) @@ -2552,6 +2909,7 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call) GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame); BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, frameUpd)); } +#endif // _TARGET_64BIT_ // IMPORTANT **** This instruction must come last!!! **** // It changes the thread's state to Preemptive mode @@ -2583,7 +2941,7 @@ void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call) // First argument is the address of the frame variable. GenTree* frameAddr = new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, BAD_IL_OFFSET); - frameAddr->gtOper = GT_LCL_VAR_ADDR; + frameAddr->SetOperRaw(GT_LCL_VAR_ADDR); // Insert call to CORINFO_HELP_JIT_PINVOKE_END GenTree* helperCall = @@ -2604,12 +2962,32 @@ void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call) tree = CreateReturnTrapSeq(); BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree)); - // Pop the frame if necessasry - if (!(comp->opts.eeFlags & CORJIT_FLG_IL_STUB)) + // Pop the frame if necessary. On 32-bit targets this only happens in the method epilog; on 64-bit targets thi + // happens after every PInvoke call in non-stubs. 32-bit targets instead mark the frame as inactive. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef _TARGET_64BIT_ + if (!comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB)) { tree = CreateFrameLinkUpdate(PopFrame); BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree)); } +#else + const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = comp->eeGetEEInfo()->inlinedCallFrameInfo; + + // ---------------------------------------------------------------------------------- + // InlinedCallFrame.m_pCallerReturnAddress = nullptr + + GenTreeLclFld* const storeCallSiteTracker = + new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, + callFrameInfo.offsetOfReturnAddress); + + GenTreeIntCon* const constantZero = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, 0); + + storeCallSiteTracker->gtOp1 = constantZero; + + BlockRange().InsertBefore(insertionPoint, constantZero, storeCallSiteTracker); +#endif // _TARGET_64BIT_ } //------------------------------------------------------------------------ @@ -2624,7 +3002,7 @@ void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call) GenTree* Lowering::LowerNonvirtPinvokeCall(GenTreeCall* call) { // PInvoke lowering varies depending on the flags passed in by the EE. By default, - // GC transitions are generated inline; if CORJIT_FLG2_USE_PINVOKE_HELPERS is specified, + // GC transitions are generated inline; if CORJIT_FLAG_USE_PINVOKE_HELPERS is specified, // GC transitions are instead performed using helper calls. Examples of each case are given // below. Note that the data structure that is used to store information about a call frame // containing any P/Invoke calls is initialized in the method prolog (see @@ -2697,7 +3075,7 @@ GenTree* Lowering::LowerNonvirtPinvokeCall(GenTreeCall* call) #if COR_JIT_EE_VERSION > 460 comp->info.compCompHnd->getAddressOfPInvokeTarget(methHnd, &lookup); #else - void* pIndirection; + void* pIndirection; lookup.accessType = IAT_PVALUE; lookup.addr = comp->info.compCompHnd->getAddressOfPInvokeFixup(methHnd, &pIndirection); if (lookup.addr == nullptr) @@ -2866,14 +3244,10 @@ GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call) } #endif - // TODO-Cleanup: Disable emitting random NOPs - // This is code to set up an indirect call to a stub address computed // via dictionary lookup. if (call->gtCallType == CT_INDIRECT) { - NYI_X86("Virtual Stub dispatched call lowering via dictionary lookup"); - // The importer decided we needed a stub call via a computed // stub dispatch address, i.e. an address which came from a dictionary lookup. // - The dictionary lookup produces an indirected address, suitable for call @@ -2886,6 +3260,8 @@ GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call) // All we have to do here is add an indirection to generate the actual call target. GenTree* ind = Ind(call->gtCallAddr); + ind->gtFlags |= GTF_IND_REQ_ADDR_IN_REG; + BlockRange().InsertAfter(call->gtCallAddr, ind); call->gtCallAddr = ind; } @@ -2923,8 +3299,10 @@ GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call) // So we don't use a register. #ifndef _TARGET_X86_ // on x64 we must materialize the target using specific registers. - addr->gtRegNum = REG_VIRTUAL_STUB_PARAM; + addr->gtRegNum = REG_VIRTUAL_STUB_PARAM; + indir->gtRegNum = REG_JUMP_THUNK_PARAM; + indir->gtFlags |= GTF_IND_REQ_ADDR_IN_REG; #endif result = indir; } @@ -3042,8 +3420,6 @@ bool Lowering::AreSourcesPossiblyModifiedLocals(GenTree* addr, GenTree* base, Ge return true; } } - - unreached(); } //------------------------------------------------------------------------ @@ -3082,9 +3458,9 @@ GenTree* Lowering::TryCreateAddrMode(LIR::Use&& use, bool isIndir) { // We can have an indirection on the rhs of a block copy (it is the source // object). This is not a "regular" indirection. - // (Note that the parent check could be costly.) - GenTree* parent = indir->gtGetParent(nullptr); - if ((parent != nullptr) && parent->OperIsIndir()) + // (Note that the user check could be costly.) + LIR::Use indirUse; + if (BlockRange().TryGetUse(indir, &indirUse) && indirUse.User()->OperIsIndir()) { isIndir = false; } @@ -3248,9 +3624,14 @@ void Lowering::LowerUnsignedDivOrMod(GenTree* node) { assert((node->OperGet() == GT_UDIV) || (node->OperGet() == GT_UMOD)); - GenTree* divisor = node->gtGetOp2(); + GenTree* divisor = node->gtGetOp2(); + GenTree* dividend = node->gtGetOp1(); - if (divisor->IsCnsIntOrI()) + if (divisor->IsCnsIntOrI() +#ifdef _TARGET_X86_ + && (dividend->OperGet() != GT_LONG) +#endif + ) { size_t divisorValue = static_cast<size_t>(divisor->gtIntCon.IconValue()); @@ -3276,6 +3657,91 @@ void Lowering::LowerUnsignedDivOrMod(GenTree* node) } //------------------------------------------------------------------------ +// GetSignedMagicNumberForDivide: Generates a magic number and shift amount for +// the magic number division optimization. +// +// Arguments: +// denom - The denominator +// shift - Pointer to the shift value to be returned +// +// Returns: +// The magic number. +// +// Notes: +// This code is previously from UTC where it notes it was taken from +// _The_PowerPC_Compiler_Writer's_Guide_, pages 57-58. The paper is is based on +// is "Division by invariant integers using multiplication" by Torbjorn Granlund +// and Peter L. Montgomery in PLDI 94 + +template <typename T> +T GetSignedMagicNumberForDivide(T denom, int* shift /*out*/) +{ + // static SMAG smag; + const int bits = sizeof(T) * 8; + const int bits_minus_1 = bits - 1; + + typedef typename jitstd::make_unsigned<T>::type UT; + + const UT two_nminus1 = UT(1) << bits_minus_1; + + int p; + UT absDenom; + UT absNc; + UT delta; + UT q1; + UT r1; + UT r2; + UT q2; + UT t; + T result_magic; + int result_shift; + int iters = 0; + + absDenom = abs(denom); + t = two_nminus1 + ((unsigned int)denom >> 31); + absNc = t - 1 - (t % absDenom); // absolute value of nc + p = bits_minus_1; // initialize p + q1 = two_nminus1 / absNc; // initialize q1 = 2^p / abs(nc) + r1 = two_nminus1 - (q1 * absNc); // initialize r1 = rem(2^p, abs(nc)) + q2 = two_nminus1 / absDenom; // initialize q1 = 2^p / abs(denom) + r2 = two_nminus1 - (q2 * absDenom); // initialize r1 = rem(2^p, abs(denom)) + + do + { + iters++; + p++; + q1 *= 2; // update q1 = 2^p / abs(nc) + r1 *= 2; // update r1 = rem(2^p / abs(nc)) + + if (r1 >= absNc) + { // must be unsigned comparison + q1++; + r1 -= absNc; + } + + q2 *= 2; // update q2 = 2^p / abs(denom) + r2 *= 2; // update r2 = rem(2^p / abs(denom)) + + if (r2 >= absDenom) + { // must be unsigned comparison + q2++; + r2 -= absDenom; + } + + delta = absDenom - r2; + } while (q1 < delta || (q1 == delta && r1 == 0)); + + result_magic = q2 + 1; // resulting magic number + if (denom < 0) + { + result_magic = -result_magic; + } + *shift = p - bits; // resulting shift + + return result_magic; +} + +//------------------------------------------------------------------------ // LowerSignedDivOrMod: transform integer GT_DIV/GT_MOD nodes with a power of 2 // const divisor into equivalent but faster sequences. // @@ -3313,8 +3779,10 @@ GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node) ssize_t divisorValue = divisor->gtIntCon.IconValue(); - if (divisorValue == -1) + if (divisorValue == -1 || divisorValue == 0) { + // x / 0 and x % 0 can't be optimized because they are required to throw an exception. + // x / -1 can't be optimized because INT_MIN / -1 is required to throw an exception. // x % -1 is always 0 and the IL spec says that the rem instruction "can" throw an exception if x is @@ -3343,14 +3811,122 @@ GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node) if (!isPow2(absDivisorValue)) { +#ifdef _TARGET_XARCH_ + ssize_t magic; + int shift; + + if (type == TYP_INT) + { + magic = GetSignedMagicNumberForDivide<int32_t>(static_cast<int32_t>(divisorValue), &shift); + } + else + { +#ifdef _TARGET_64BIT_ + magic = GetSignedMagicNumberForDivide<int64_t>(static_cast<int64_t>(divisorValue), &shift); +#else + unreached(); +#endif + } + + divisor->gtIntConCommon.SetIconValue(magic); + + // Insert a new GT_MULHI node in front of the existing GT_DIV/GT_MOD node. + // The existing node will later be transformed into a GT_ADD/GT_SUB that + // computes the final result. This way don't need to find and change the + // use of the existing node. + GenTree* mulhi = comp->gtNewOperNode(GT_MULHI, type, divisor, dividend); + BlockRange().InsertBefore(divMod, mulhi); + + // mulhi was the easy part. Now we need to generate different code depending + // on the divisor value: + // For 3 we need: + // div = signbit(mulhi) + mulhi + // For 5 we need: + // div = signbit(mulhi) + sar(mulhi, 1) ; requires shift adjust + // For 7 we need: + // mulhi += dividend ; requires add adjust + // div = signbit(mulhi) + sar(mulhi, 2) ; requires shift adjust + // For -3 we need: + // mulhi -= dividend ; requires sub adjust + // div = signbit(mulhi) + sar(mulhi, 1) ; requires shift adjust + bool requiresAddSubAdjust = signum(divisorValue) != signum(magic); + bool requiresShiftAdjust = shift != 0; + bool requiresDividendMultiuse = requiresAddSubAdjust || !isDiv; + unsigned curBBWeight = comp->compCurBB->getBBWeight(comp); + unsigned dividendLclNum = BAD_VAR_NUM; + + if (requiresDividendMultiuse) + { + LIR::Use dividendUse(BlockRange(), &mulhi->gtOp.gtOp2, mulhi); + dividendLclNum = dividendUse.ReplaceWithLclVar(comp, curBBWeight); + } + + GenTree* adjusted; + + if (requiresAddSubAdjust) + { + dividend = comp->gtNewLclvNode(dividendLclNum, type); + comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp); + + adjusted = comp->gtNewOperNode(divisorValue > 0 ? GT_ADD : GT_SUB, type, mulhi, dividend); + BlockRange().InsertBefore(divMod, dividend, adjusted); + } + else + { + adjusted = mulhi; + } + + GenTree* shiftBy = comp->gtNewIconNode(genTypeSize(type) * 8 - 1, type); + GenTree* signBit = comp->gtNewOperNode(GT_RSZ, type, adjusted, shiftBy); + BlockRange().InsertBefore(divMod, shiftBy, signBit); + + LIR::Use adjustedUse(BlockRange(), &signBit->gtOp.gtOp1, signBit); + unsigned adjustedLclNum = adjustedUse.ReplaceWithLclVar(comp, curBBWeight); + adjusted = comp->gtNewLclvNode(adjustedLclNum, type); + comp->lvaTable[adjustedLclNum].incRefCnts(curBBWeight, comp); + BlockRange().InsertBefore(divMod, adjusted); + + if (requiresShiftAdjust) + { + shiftBy = comp->gtNewIconNode(shift, TYP_INT); + adjusted = comp->gtNewOperNode(GT_RSH, type, adjusted, shiftBy); + BlockRange().InsertBefore(divMod, shiftBy, adjusted); + } + + if (isDiv) + { + divMod->SetOperRaw(GT_ADD); + divMod->gtOp.gtOp1 = adjusted; + divMod->gtOp.gtOp2 = signBit; + } + else + { + GenTree* div = comp->gtNewOperNode(GT_ADD, type, adjusted, signBit); + + dividend = comp->gtNewLclvNode(dividendLclNum, type); + comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp); + + // divisor % dividend = dividend - divisor x div + GenTree* divisor = comp->gtNewIconNode(divisorValue, type); + GenTree* mul = comp->gtNewOperNode(GT_MUL, type, div, divisor); + BlockRange().InsertBefore(divMod, dividend, div, divisor, mul); + + divMod->SetOperRaw(GT_SUB); + divMod->gtOp.gtOp1 = dividend; + divMod->gtOp.gtOp2 = mul; + } + + return mulhi; +#else + // Currently there's no GT_MULHI for ARM32/64 return next; +#endif } - // We're committed to the conversion now. Go find the use. + // We're committed to the conversion now. Go find the use if any. LIR::Use use; if (!BlockRange().TryGetUse(node, &use)) { - assert(!"signed DIV/MOD node is unused"); return next; } @@ -3450,8 +4026,6 @@ void Lowering::LowerStoreInd(GenTree* node) void Lowering::LowerBlockStore(GenTreeBlk* blkNode) { GenTree* src = blkNode->Data(); - // TODO-1stClassStructs: Don't require this. - assert(blkNode->OperIsInitBlkOp() || !src->OperIsLocal()); TryCreateAddrMode(LIR::Use(BlockRange(), &blkNode->Addr(), blkNode), false); } @@ -3817,17 +4391,17 @@ void Lowering::CheckCallArg(GenTree* arg) break; #endif - case GT_LIST: - { - GenTreeArgList* list = arg->AsArgList(); - assert(list->IsAggregate()); + case GT_FIELD_LIST: + { + GenTreeFieldList* list = arg->AsFieldList(); + assert(list->IsFieldListHead()); - for (; list != nullptr; list = list->Rest()) - { - assert(list->Current()->OperIsPutArg()); - } + for (; list != nullptr; list = list->Rest()) + { + assert(list->Current()->OperIsPutArg()); } - break; + } + break; default: assert(arg->OperIsPutArg()); diff --git a/src/jit/lower.h b/src/jit/lower.h index 620636d8bd..c1cafb4ee8 100644 --- a/src/jit/lower.h +++ b/src/jit/lower.h @@ -65,6 +65,7 @@ private: // Call Lowering // ------------------------------ void LowerCall(GenTree* call); + void LowerCompare(GenTree* tree); void LowerJmpMethod(GenTree* jmp); void LowerRet(GenTree* ret); GenTree* LowerDelegateInvoke(GenTreeCall* call); @@ -127,8 +128,14 @@ private: // return true if this call target is within range of a pc-rel call on the machine bool IsCallTargetInRange(void* addr); +#ifdef _TARGET_X86_ + bool ExcludeNonByteableRegisters(GenTree* tree); +#endif + void TreeNodeInfoInit(GenTree* stmt); + void TreeNodeInfoInitCheckByteable(GenTree* tree); + #if defined(_TARGET_XARCH_) void TreeNodeInfoInitSimple(GenTree* tree); @@ -190,6 +197,7 @@ private: void TreeNodeInfoInitReturn(GenTree* tree); void TreeNodeInfoInitShiftRotate(GenTree* tree); void TreeNodeInfoInitCall(GenTreeCall* call); + void TreeNodeInfoInitCmp(GenTreePtr tree); void TreeNodeInfoInitStructArg(GenTreePtr structArg); void TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode); void TreeNodeInfoInitLogicalOp(GenTree* tree); @@ -200,11 +208,11 @@ private: #endif // FEATURE_SIMD void TreeNodeInfoInitCast(GenTree* tree); #ifdef _TARGET_ARM64_ - void TreeNodeInfoInitPutArgStk(GenTree* argNode, fgArgTabEntryPtr info); + void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info); #endif // _TARGET_ARM64_ -#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING - void TreeNodeInfoInitPutArgStk(GenTree* tree); -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING +#ifdef FEATURE_PUT_STRUCT_ARG_STK + void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* tree); +#endif // FEATURE_PUT_STRUCT_ARG_STK void TreeNodeInfoInitLclHeap(GenTree* tree); void DumpNodeInfoMap(); @@ -226,8 +234,6 @@ private: void SetMulOpCounts(GenTreePtr tree); #endif // defined(_TARGET_XARCH_) - void LowerCmp(GenTreePtr tree); - #if !CPU_LOAD_STORE_ARCH bool IsRMWIndirCandidate(GenTree* operand, GenTree* storeInd); bool IsBinOpInRMWStoreInd(GenTreePtr tree); diff --git a/src/jit/lowerarm.cpp b/src/jit/lowerarm.cpp index 67cea2ff4e..5bf23c4199 100644 --- a/src/jit/lowerarm.cpp +++ b/src/jit/lowerarm.cpp @@ -32,10 +32,76 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "lower.h" #include "lsra.h" -/* Lowering of GT_CAST nodes */ +//------------------------------------------------------------------------ +// LowerCast: Lower GT_CAST(srcType, DstType) nodes. +// +// Arguments: +// tree - GT_CAST node to be lowered +// +// Return Value: +// None. +// +// Notes: +// Casts from small int type to float/double are transformed as follows: +// GT_CAST(byte, float/double) = GT_CAST(GT_CAST(byte, int32), float/double) +// GT_CAST(sbyte, float/double) = GT_CAST(GT_CAST(sbyte, int32), float/double) +// GT_CAST(int16, float/double) = GT_CAST(GT_CAST(int16, int32), float/double) +// GT_CAST(uint16, float/double) = GT_CAST(GT_CAST(uint16, int32), float/double) +// +// Similarly casts from float/double to a smaller int type are transformed as follows: +// GT_CAST(float/double, byte) = GT_CAST(GT_CAST(float/double, int32), byte) +// GT_CAST(float/double, sbyte) = GT_CAST(GT_CAST(float/double, int32), sbyte) +// GT_CAST(float/double, int16) = GT_CAST(GT_CAST(double/double, int32), int16) +// GT_CAST(float/double, uint16) = GT_CAST(GT_CAST(double/double, int32), uint16) +// +// Note that for the overflow conversions we still depend on helper calls and +// don't expect to see them here. +// i) GT_CAST(float/double, int type with overflow detection) + void Lowering::LowerCast(GenTree* tree) { - NYI_ARM("ARM Lowering for cast"); + assert(tree->OperGet() == GT_CAST); + + JITDUMP("LowerCast for: "); + DISPNODE(tree); + JITDUMP("\n"); + + GenTreePtr op1 = tree->gtOp.gtOp1; + var_types dstType = tree->CastToType(); + var_types srcType = op1->TypeGet(); + var_types tmpType = TYP_UNDEF; + + // TODO-ARM-Cleanup: Remove following NYI assertions. + if (varTypeIsFloating(srcType)) + { + NYI_ARM("Lowering for cast from float"); // Not tested yet. + noway_assert(!tree->gtOverflow()); + } + + // Case of src is a small type and dst is a floating point type. + if (varTypeIsSmall(srcType) && varTypeIsFloating(dstType)) + { + NYI_ARM("Lowering for cast from small type to float"); // Not tested yet. + // These conversions can never be overflow detecting ones. + noway_assert(!tree->gtOverflow()); + tmpType = TYP_INT; + } + // case of src is a floating point type and dst is a small type. + else if (varTypeIsFloating(srcType) && varTypeIsSmall(dstType)) + { + NYI_ARM("Lowering for cast from float to small type"); // Not tested yet. + tmpType = TYP_INT; + } + + if (tmpType != TYP_UNDEF) + { + GenTreePtr tmp = comp->gtNewCastNode(tmpType, op1, tmpType); + tmp->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT)); + + tree->gtFlags &= ~GTF_UNSIGNED; + tree->gtOp.gtOp1 = tmp; + BlockRange().InsertAfter(op1, tmp); + } } void Lowering::LowerRotate(GenTreePtr tree) @@ -62,7 +128,73 @@ bool Lowering::IsCallTargetInRange(void* addr) // return true if the immediate can be folded into an instruction, for example small enough and non-relocatable bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) { - NYI_ARM("ARM IsContainableImmed"); + if (varTypeIsFloating(parentNode->TypeGet())) + { + // TODO-ARM-Cleanup: not tested yet. + NYI_ARM("ARM IsContainableImmed for floating point type"); + + // We can contain a floating point 0.0 constant in a compare instruction + switch (parentNode->OperGet()) + { + default: + return false; + + case GT_EQ: + case GT_NE: + case GT_LT: + case GT_LE: + case GT_GE: + case GT_GT: + if (childNode->IsIntegralConst(0)) + return true; + break; + } + } + else + { + // Make sure we have an actual immediate + if (!childNode->IsCnsIntOrI()) + return false; + if (childNode->IsIconHandle() && comp->opts.compReloc) + return false; + + ssize_t immVal = childNode->gtIntCon.gtIconVal; + emitAttr attr = emitActualTypeSize(childNode->TypeGet()); + emitAttr size = EA_SIZE(attr); + + switch (parentNode->OperGet()) + { + default: + return false; + + case GT_ADD: + case GT_SUB: + if (emitter::emitIns_valid_imm_for_add(immVal, INS_FLAGS_DONT_CARE)) + return true; + break; + + case GT_EQ: + case GT_NE: + case GT_LT: + case GT_LE: + case GT_GE: + case GT_GT: + case GT_AND: + case GT_OR: + case GT_XOR: + if (emitter::emitIns_valid_imm_for_alu(immVal)) + return true; + break; + + case GT_STORE_LCL_VAR: + // TODO-ARM-Cleanup: not tested yet + NYI_ARM("ARM IsContainableImmed for GT_STORE_LCL_VAR"); + if (immVal == 0) + return true; + break; + } + } + return false; } diff --git a/src/jit/lowerarm64.cpp b/src/jit/lowerarm64.cpp index 1720c62acb..cc9e2266d2 100644 --- a/src/jit/lowerarm64.cpp +++ b/src/jit/lowerarm64.cpp @@ -126,6 +126,10 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) TreeNodeInfo* info = &(tree->gtLsraInfo); RegisterType registerType = TypeGet(tree); + JITDUMP("TreeNodeInfoInit for: "); + DISPNODE(tree); + JITDUMP("\n"); + switch (tree->OperGet()) { GenTree* op1; @@ -202,6 +206,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) __fallthrough; case GT_LIST: + case GT_FIELD_LIST: case GT_ARGPLACE: case GT_NO_OP: case GT_START_NONGC: @@ -485,7 +490,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_LE: case GT_GE: case GT_GT: - LowerCmp(tree); + TreeNodeInfoInitCmp(tree); break; case GT_CKFINITE: @@ -524,12 +529,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) break; case GT_BLK: - case GT_OBJ: case GT_DYN_BLK: // These should all be eliminated prior to Lowering. assert(!"Non-store block node in Lowering"); info->srcCount = 0; info->dstCount = 0; + break; case GT_STORE_BLK: case GT_STORE_OBJ: @@ -537,6 +542,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) TreeNodeInfoInitBlockStore(tree->AsBlk()); break; + case GT_INIT_VAL: + // Always a passthrough of its child's value. + info->srcCount = 0; + info->dstCount = 0; + break; + case GT_LCLHEAP: { info->srcCount = 1; @@ -977,7 +988,7 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) { - assert(list->IsList()); + assert(list->OperIsList()); GenTreePtr argNode = list->Current(); @@ -989,7 +1000,7 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) // late arg that is not passed in a register assert(argNode->gtOper == GT_PUTARG_STK); - TreeNodeInfoInitPutArgStk(argNode, curArgTabEntry); + TreeNodeInfoInitPutArgStk(argNode->AsPutArgStk(), curArgTabEntry); continue; } @@ -1003,16 +1014,16 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) argNode = argNode->gtEffectiveVal(); - // A GT_LIST has a TYP_VOID, but is used to represent a multireg struct - if (varTypeIsStruct(argNode) || (argNode->gtOper == GT_LIST)) + // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct + if (varTypeIsStruct(argNode) || (argNode->gtOper == GT_FIELD_LIST)) { GenTreePtr actualArgNode = argNode; unsigned originalSize = 0; - if (argNode->gtOper == GT_LIST) + if (argNode->gtOper == GT_FIELD_LIST) { // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs) - GenTreeArgList* argListPtr = argNode->AsArgList(); + GenTreeFieldList* fieldListPtr = argNode->AsFieldList(); // Initailize the first register and the first regmask in our list regNumber targetReg = argReg; @@ -1020,9 +1031,9 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) unsigned iterationNum = 0; originalSize = 0; - for (; argListPtr; argListPtr = argListPtr->Rest()) + for (; fieldListPtr; fieldListPtr = fieldListPtr->Rest()) { - GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1; + GenTreePtr putArgRegNode = fieldListPtr->Current(); assert(putArgRegNode->gtOper == GT_PUTARG_REG); GenTreePtr putArgChild = putArgRegNode->gtOp.gtOp1; @@ -1115,7 +1126,7 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) assert(curArgTabEntry->regNum == REG_STK); - TreeNodeInfoInitPutArgStk(arg, curArgTabEntry); + TreeNodeInfoInitPutArgStk(arg->AsPutArgStk(), curArgTabEntry); } else { @@ -1154,7 +1165,7 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) // Notes: // Set the child node(s) to be contained when we have a multireg arg // -void Lowering::TreeNodeInfoInitPutArgStk(GenTree* argNode, fgArgTabEntryPtr info) +void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info) { assert(argNode->gtOper == GT_PUTARG_STK); @@ -1166,14 +1177,14 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTree* argNode, fgArgTabEntryPtr info argNode->gtLsraInfo.srcCount = 1; argNode->gtLsraInfo.dstCount = 0; - // Do we have a TYP_STRUCT argument (or a GT_LIST), if so it must be a multireg pass-by-value struct - if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_LIST)) + // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct + if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_FIELD_LIST)) { // We will use store instructions that each write a register sized value - if (putArgChild->OperGet() == GT_LIST) + if (putArgChild->OperGet() == GT_FIELD_LIST) { - // We consume all of the items in the GT_LIST + // We consume all of the items in the GT_FIELD_LIST argNode->gtLsraInfo.srcCount = info->numSlots; } else @@ -1219,8 +1230,9 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTree* argNode, fgArgTabEntryPtr info void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) { - GenTree* dstAddr = blkNode->Addr(); - unsigned size; + GenTree* dstAddr = blkNode->Addr(); + unsigned size = blkNode->gtBlkSize; + GenTree* source = blkNode->Data(); LinearScan* l = m_lsra; Compiler* compiler = comp; @@ -1228,16 +1240,44 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) // We may require an additional source or temp register for the size. blkNode->gtLsraInfo.srcCount = 2; blkNode->gtLsraInfo.dstCount = 0; + GenTreePtr srcAddrOrFill = nullptr; + bool isInitBlk = blkNode->OperIsInitBlkOp(); - if ((blkNode->OperGet() == GT_STORE_OBJ) && (blkNode->AsObj()->gtGcPtrCount == 0)) + if (!isInitBlk) { - blkNode->SetOper(GT_STORE_BLK); + // CopyObj or CopyBlk + if ((blkNode->OperGet() == GT_STORE_OBJ) && ((blkNode->AsObj()->gtGcPtrCount == 0) || blkNode->gtBlkOpGcUnsafe)) + { + blkNode->SetOper(GT_STORE_BLK); + } + if (source->gtOper == GT_IND) + { + srcAddrOrFill = blkNode->Data()->gtGetOp1(); + // We're effectively setting source as contained, but can't call MakeSrcContained, because the + // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading. + // If srcAddr is already non-contained, we don't need to change it. + if (srcAddrOrFill->gtLsraInfo.getDstCount() == 0) + { + srcAddrOrFill->gtLsraInfo.setDstCount(1); + srcAddrOrFill->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount); + } + m_lsra->clearOperandCounts(source); + } + else if (!source->IsMultiRegCall() && !source->OperIsSIMD()) + { + assert(source->IsLocal()); + MakeSrcContained(blkNode, source); + } } - if (blkNode->OperIsInitBlkOp()) + if (isInitBlk) { - unsigned size = blkNode->gtBlkSize; - GenTreePtr initVal = blkNode->Data(); + GenTreePtr initVal = source; + if (initVal->OperIsInitVal()) + { + initVal = initVal->gtGetOp1(); + } + srcAddrOrFill = initVal; #if 0 // TODO-ARM64-CQ: Currently we generate a helper call for every @@ -1264,8 +1304,6 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) initVal->gtType = TYP_LONG; } - MakeSrcContained(tree, blockSize); - // In case we have a buffer >= 16 bytes // we can use SSE2 to do a 128-bit store in a single // instruction. @@ -1282,7 +1320,7 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) else #endif // 0 { - // The helper follows the regular AMD64 ABI. + // The helper follows the regular ABI. dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0); initVal->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; @@ -1306,34 +1344,12 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) { // CopyObj or CopyBlk // Sources are src and dest and size if not constant. - unsigned size = blkNode->gtBlkSize; - GenTreePtr source = blkNode->Data(); - GenTree* srcAddr = nullptr; - if (source->gtOper == GT_IND) - { - srcAddr = blkNode->Data()->gtGetOp1(); - // We're effectively setting source as contained, but can't call MakeSrcContained, because the - // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading. - // If srcAddr is already non-contained, we don't need to change it. - if (srcAddr->gtLsraInfo.getDstCount() == 0) - { - srcAddr->gtLsraInfo.setDstCount(1); - srcAddr->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount); - } - m_lsra->clearOperandCounts(source); - } - else - { - assert(source->IsLocal()); - MakeSrcContained(blkNode, source); - } if (blkNode->OperGet() == GT_STORE_OBJ) { // CopyObj GenTreeObj* objNode = blkNode->AsObj(); - GenTreePtr source = objNode->Data(); unsigned slots = objNode->gtSlots; @@ -1362,16 +1378,19 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) blkNode->gtLsraInfo.internalIntCount = 1; dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_DST_BYREF); - srcAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_SRC_BYREF); + // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF. + // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF, + // which is killed by a StoreObj (and thus needn't be reserved). + if (srcAddrOrFill != nullptr) + { + srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_SRC_BYREF); + } } else { // CopyBlk - unsigned size = blkNode->gtBlkSize; - GenTreePtr dstAddr = blkNode->Addr(); - GenTreePtr srcAddr = blkNode->Data(); - short internalIntCount = 0; - regMaskTP internalIntCandidates = RBM_NONE; + short internalIntCount = 0; + regMaskTP internalIntCandidates = RBM_NONE; #if 0 // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size @@ -1379,11 +1398,8 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) // TODO-ARM64-CQ: cpblk loop unrolling is currently not implemented. - if (blockSize->IsCnsIntOrI() && blockSize->gtIntCon.gtIconVal <= CPBLK_UNROLL_LIMIT) + if ((size != 0) && (size <= INITBLK_UNROLL_LIMIT)) { - assert(!blockSize->IsIconHandle()); - ssize_t size = blockSize->gtIntCon.gtIconVal; - // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2. // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of // our framework assemblies, so this is the main code generation scheme we'll use. @@ -1404,9 +1420,9 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) // If src or dst are on stack, we don't have to generate the address into a register // because it's just some constant+SP - if (srcAddr->OperIsLocalAddr()) + if (srcAddr != nullptr && srcAddrOrFill->OperIsLocalAddr()) { - MakeSrcContained(blkNode, srcAddr); + MakeSrcContained(blkNode, srcAddrOrFill); } if (dstAddr->OperIsLocalAddr()) @@ -1425,15 +1441,9 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0); // The srcAddr goes in arg1. - if (srcAddr != nullptr) + if (srcAddrOrFill != nullptr) { - srcAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); - } - else - { - // This is a local; we'll use a temp register for its address. - internalIntCandidates |= RBM_ARG_1; - internalIntCount++; + srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1); } if (size != 0) { @@ -1447,7 +1457,6 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK); blkNode->gtLsraInfo.setSrcCount(3); GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize; - assert(!blockSize->IsIconHandle()); blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2); } blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; @@ -1860,7 +1869,7 @@ void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree) } } -void Lowering::LowerCmp(GenTreePtr tree) +void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree) { TreeNodeInfo* info = &(tree->gtLsraInfo); diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index 6f98eb6661..589cef482e 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -77,7 +77,7 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc) // InitBlk MakeSrcContained(storeLoc, op1); } - else if (storeLoc->TypeGet() == TYP_SIMD12) + else if ((storeLoc->TypeGet() == TYP_SIMD12) && (storeLoc->OperGet() == GT_STORE_LCL_FLD)) { // Need an additional register to extract upper 4 bytes of Vector3. info->internalFloatCount = 1; @@ -177,6 +177,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) break; case GT_LCL_FLD: + case GT_LCL_VAR: info->srcCount = 0; info->dstCount = 1; @@ -185,9 +186,9 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) if (tree->TypeGet() == TYP_SIMD12) { // We need an internal register different from targetReg in which 'tree' produces its result - // because both targetReg and internal reg will be in use at the same time. This is achieved - // by asking for two internal registers. - info->internalFloatCount = 2; + // because both targetReg and internal reg will be in use at the same time. + info->internalFloatCount = 1; + info->isInternalRegDelayFree = true; info->setInternalCandidates(m_lsra, m_lsra->allSIMDRegs()); } #endif @@ -195,7 +196,16 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_STORE_LCL_FLD: case GT_STORE_LCL_VAR: - info->srcCount = 1; +#ifdef _TARGET_X86_ + if (tree->gtGetOp1()->OperGet() == GT_LONG) + { + info->srcCount = 2; + } + else +#endif // _TARGET_X86_ + { + info->srcCount = 1; + } info->dstCount = 0; LowerStoreLoc(tree->AsLclVarCommon()); break; @@ -242,6 +252,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) break; case GT_LIST: + case GT_FIELD_LIST: case GT_ARGPLACE: case GT_NO_OP: case GT_START_NONGC: @@ -319,9 +330,87 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) break; case GT_JTRUE: + { + info->srcCount = 0; + info->dstCount = 0; + + GenTree* cmp = tree->gtGetOp1(); + l->clearDstCount(cmp); + +#ifdef FEATURE_SIMD + // Say we have the following IR + // simdCompareResult = GT_SIMD((In)Equality, v1, v2) + // integerCompareResult = GT_EQ/NE(simdCompareResult, true/false) + // GT_JTRUE(integerCompareResult) + // + // In this case we don't need to generate code for GT_EQ_/NE, since SIMD (In)Equality + // intrinsic would set or clear Zero flag. + + genTreeOps cmpOper = cmp->OperGet(); + if (cmpOper == GT_EQ || cmpOper == GT_NE) + { + GenTree* cmpOp1 = cmp->gtGetOp1(); + GenTree* cmpOp2 = cmp->gtGetOp2(); + + if (cmpOp1->IsSIMDEqualityOrInequality() && (cmpOp2->IsIntegralConst(0) || cmpOp2->IsIntegralConst(1))) + { + // clear dstCount on SIMD node to indicate that + // result doesn't need to be materialized into a register. + l->clearOperandCounts(cmp); + l->clearDstCount(cmpOp1); + l->clearOperandCounts(cmpOp2); + + // Codegen of SIMD (in)Equality uses target integer reg + // only for setting flags. Target reg is not needed on AVX + // when comparing against Vector Zero. In all other cases + // we need to reserve an int type internal register, since we + // have cleared dstCount. + if (compiler->canUseAVX() && cmpOp1->gtGetOp2()->IsIntegralConstVector(0)) + { + // We don't need an internal register,since we use vptest + // for setting flags. + } + else + { + ++(cmpOp1->gtLsraInfo.internalIntCount); + regMaskTP internalCandidates = cmpOp1->gtLsraInfo.getInternalCandidates(l); + internalCandidates |= l->allRegs(TYP_INT); + cmpOp1->gtLsraInfo.setInternalCandidates(l, internalCandidates); + } + + // We would have to reverse compare oper in the following cases: + // 1) SIMD Equality: Sets Zero flag on equal otherwise clears it. + // Therefore, if compare oper is == or != against false(0), we will + // be checking opposite of what is required. + // + // 2) SIMD inEquality: Clears Zero flag on true otherwise sets it. + // Therefore, if compare oper is == or != against true(1), we will + // be checking opposite of what is required. + GenTreeSIMD* simdNode = cmpOp1->AsSIMD(); + if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality) + { + if (cmpOp2->IsIntegralConst(0)) + { + cmp->SetOper(GenTree::ReverseRelop(cmpOper)); + } + } + else + { + assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality); + if (cmpOp2->IsIntegralConst(1)) + { + cmp->SetOper(GenTree::ReverseRelop(cmpOper)); + } + } + } + } +#endif // FEATURE_SIMD + } + break; + + case GT_JCC: info->srcCount = 0; info->dstCount = 0; - l->clearDstCount(tree->gtOp.gtOp1); break; case GT_JMP: @@ -436,6 +525,9 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_MUL: case GT_MULHI: +#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) + case GT_MUL_LONG: +#endif SetMulOpCounts(tree); break; @@ -478,6 +570,11 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) info->internalFloatCount = 1; info->setInternalCandidates(l, l->internalFloatRegCandidates()); } + else + { + // Codegen of this tree node sets ZF and SF flags. + tree->gtFlags |= GTF_ZSF_SET; + } break; case GT_NOT: @@ -490,6 +587,10 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_RSZ: case GT_ROL: case GT_ROR: +#ifdef _TARGET_X86_ + case GT_LSH_HI: + case GT_RSH_LO: +#endif TreeNodeInfoInitShiftRotate(tree); break; @@ -499,7 +600,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_LE: case GT_GE: case GT_GT: - LowerCmp(tree); + TreeNodeInfoInitCmp(tree); break; case GT_CKFINITE: @@ -542,10 +643,7 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) } break; -#ifdef _TARGET_X86_ - case GT_OBJ: - NYI_X86("GT_OBJ"); -#elif !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) +#if !defined(FEATURE_PUT_STRUCT_ARG_STK) case GT_OBJ: #endif case GT_BLK: @@ -556,11 +654,11 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) info->dstCount = 0; break; -#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +#ifdef FEATURE_PUT_STRUCT_ARG_STK case GT_PUTARG_STK: - TreeNodeInfoInitPutArgStk(tree); + TreeNodeInfoInitPutArgStk(tree->AsPutArgStk()); break; -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING +#endif // FEATURE_PUT_STRUCT_ARG_STK case GT_STORE_BLK: case GT_STORE_OBJ: @@ -568,6 +666,12 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) TreeNodeInfoInitBlockStore(tree->AsBlk()); break; + case GT_INIT_VAL: + // Always a passthrough of its child's value. + info->srcCount = 0; + info->dstCount = 0; + break; + case GT_LCLHEAP: TreeNodeInfoInitLclHeap(tree); break; @@ -634,14 +738,20 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_ARR_OFFSET: // This consumes the offset, if any, the arrObj and the effective index, // and produces the flattened offset for this dimension. - info->srcCount = 3; - info->dstCount = 1; - info->internalIntCount = 1; + info->srcCount = 3; + info->dstCount = 1; + // we don't want to generate code for this if (tree->gtArrOffs.gtOffset->IsIntegralConst(0)) { MakeSrcContained(tree, tree->gtArrOffs.gtOffset); } + else + { + // Here we simply need an internal register, which must be different + // from any of the operand's registers, but may be the same as targetReg. + info->internalIntCount = 1; + } break; case GT_LEA: @@ -725,15 +835,9 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) #endif case GT_CLS_VAR: - info->srcCount = 0; - // GT_CLS_VAR, by the time we reach the backend, must always - // be a pure use. - // It will produce a result of the type of the - // node, and use an internal register for the address. - - info->dstCount = 1; - assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0); - info->internalIntCount = 1; + // These nodes are eliminated by rationalizer. + JITDUMP("Unexpected node %s in Lower.\n", GenTree::NodeName(tree->OperGet())); + unreached(); break; } // end switch (tree->OperGet()) @@ -813,27 +917,36 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) } } + TreeNodeInfoInitCheckByteable(tree); + + // We need to be sure that we've set info->srcCount and info->dstCount appropriately + assert((info->dstCount < 2) || (tree->IsMultiRegCall() && info->dstCount == MAX_RET_REG_COUNT)); +} + +//------------------------------------------------------------------------ +// TreeNodeInfoInitCheckByteable: Check the tree to see if "byte-able" registers are +// required, and set the tree node info accordingly. +// +// Arguments: +// tree - The node of interest +// +// Return Value: +// None. +// +void Lowering::TreeNodeInfoInitCheckByteable(GenTree* tree) +{ #ifdef _TARGET_X86_ + LinearScan* l = m_lsra; + TreeNodeInfo* info = &(tree->gtLsraInfo); + // Exclude RBM_NON_BYTE_REGS from dst candidates of tree node and src candidates of operands // if the tree node is a byte type. // - // Example1: GT_STOREIND(byte, addr, op2) - storeind of byte sized value from op2 into mem 'addr' - // Storeind itself will not produce any value and hence dstCount=0. But op2 could be TYP_INT - // value. In this case we need to exclude esi/edi from the src candidates of op2. - // - // Example2: GT_CAST(int <- bool <- int) - here type of GT_CAST node is int and castToType is bool. - // - // Example3: GT_EQ(int, op1 of type ubyte, op2 of type ubyte) - in this case codegen uses - // ubyte as the result of comparison and if the result needs to be materialized into a reg - // simply zero extend it to TYP_INT size. Here is an example of generated code: - // cmp dl, byte ptr[addr mode] - // movzx edx, dl - // // Though this looks conservative in theory, in practice we could not think of a case where // the below logic leads to conservative register specification. In future when or if we find // one such case, this logic needs to be fine tuned for that case(s). - if (varTypeIsByte(tree) || ((tree->OperGet() == GT_CAST) && varTypeIsByte(tree->CastToType())) || - (tree->OperIsCompare() && varTypeIsByte(tree->gtGetOp1()) && varTypeIsByte(tree->gtGetOp2()))) + + if (ExcludeNonByteableRegisters(tree)) { regMaskTP regMask; if (info->dstCount > 0) @@ -870,9 +983,6 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) } } #endif //_TARGET_X86_ - - // We need to be sure that we've set info->srcCount and info->dstCount appropriately - assert((info->dstCount < 2) || (tree->IsMultiRegCall() && info->dstCount == MAX_RET_REG_COUNT)); } //------------------------------------------------------------------------ @@ -1028,6 +1138,31 @@ void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree) GenTreePtr shiftBy = tree->gtOp.gtOp2; GenTreePtr source = tree->gtOp.gtOp1; +#ifdef _TARGET_X86_ + // The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that + // we can have a three operand form. Increment the srcCount. + if (tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO) + { + assert(source->OperGet() == GT_LONG); + + info->srcCount++; + + if (tree->OperGet() == GT_LSH_HI) + { + GenTreePtr sourceLo = source->gtOp.gtOp1; + sourceLo->gtLsraInfo.isDelayFree = true; + } + else + { + GenTreePtr sourceHi = source->gtOp.gtOp2; + sourceHi->gtLsraInfo.isDelayFree = true; + } + + source->gtLsraInfo.hasDelayFreeSrc = true; + info->hasDelayFreeSrc = true; + } +#endif + // x64 can encode 8 bits of shift and it will use 5 or 6. (the others are masked off) // We will allow whatever can be encoded - hope you know what you are doing. if (!IsContainableImmed(tree, shiftBy) || (shiftBy->gtIntConCommon.IconValue() > 255) || @@ -1040,6 +1175,17 @@ void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree) else { MakeSrcContained(tree, shiftBy); + + // Note that Rotate Left/Right instructions don't set ZF and SF flags. + // + // If the operand being shifted is 32-bits then upper three bits are masked + // by hardware to get actual shift count. Similarly for 64-bit operands + // shift count is narrowed to [0..63]. If the resulting shift count is zero, + // then shift operation won't modify flags. + // + // TODO-CQ-XARCH: We can optimize generating 'test' instruction for GT_EQ/NE(shift, 0) + // if the shift count is known to be non-zero and in the range depending on the + // operand size. } } @@ -1088,6 +1234,12 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) assert(ctrlExpr == nullptr); assert(call->gtCallAddr != nullptr); ctrlExpr = call->gtCallAddr; + +#ifdef _TARGET_X86_ + // Fast tail calls aren't currently supported on x86, but if they ever are, the code + // below that handles indirect VSD calls will need to be fixed. + assert(!call->IsFastTailCall() || !call->IsVirtualStub()); +#endif // _TARGET_X86_ } // set reg requirements on call target represented as control sequence. @@ -1103,7 +1255,24 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) // computed into a register. if (!call->IsFastTailCall()) { - if (ctrlExpr->isIndir()) +#ifdef _TARGET_X86_ + // On x86, we need to generate a very specific pattern for indirect VSD calls: + // + // 3-byte nop + // call dword ptr [eax] + // + // Where EAX is also used as an argument to the stub dispatch helper. Make + // sure that the call target address is computed into EAX in this case. + if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT)) + { + assert(ctrlExpr->isIndir()); + + ctrlExpr->gtGetOp1()->gtLsraInfo.setSrcCandidates(l, RBM_VIRTUAL_STUB_TARGET); + MakeSrcContained(call, ctrlExpr); + } + else +#endif // _TARGET_X86_ + if (ctrlExpr->isIndir()) { MakeSrcContained(call, ctrlExpr); } @@ -1191,7 +1360,7 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) // First, count reg args for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) { - assert(list->IsList()); + assert(list->OperIsList()); GenTreePtr argNode = list->Current(); @@ -1206,7 +1375,7 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) argNode->gtLsraInfo.srcCount = 1; argNode->gtLsraInfo.dstCount = 0; -#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +#ifdef FEATURE_PUT_STRUCT_ARG_STK // If the node is TYP_STRUCT and it is put on stack with // putarg_stk operation, we consume and produce no registers. // In this case the embedded Obj node should not produce @@ -1218,7 +1387,7 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) argNode->gtOp.gtOp1->gtLsraInfo.dstCount = 0; argNode->gtLsraInfo.srcCount = 0; } -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING +#endif // FEATURE_PUT_STRUCT_ARG_STK continue; } @@ -1248,7 +1417,7 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) // If the struct arg is wrapped in CPYBLK the type of the param will be TYP_VOID. // Use the curArgTabEntry's isStruct to get whether the param is a struct. - if (varTypeIsStruct(argNode) FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(|| curArgTabEntry->isStruct)) + if (varTypeIsStruct(argNode) PUT_STRUCT_ARG_STK_ONLY(|| curArgTabEntry->isStruct)) { unsigned originalSize = 0; LclVarDsc* varDsc = nullptr; @@ -1270,16 +1439,16 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) { originalSize = genTypeSize(argNode->gtType); } - else if (argNode->gtOper == GT_LIST) + else if (argNode->gtOper == GT_FIELD_LIST) { originalSize = 0; // There could be up to 2 PUTARG_REGs in the list - GenTreeArgList* argListPtr = argNode->AsArgList(); - unsigned iterationNum = 0; - for (; argListPtr; argListPtr = argListPtr->Rest()) + GenTreeFieldList* fieldListPtr = argNode->AsFieldList(); + unsigned iterationNum = 0; + for (; fieldListPtr; fieldListPtr = fieldListPtr->Rest()) { - GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1; + GenTreePtr putArgRegNode = fieldListPtr->Current(); assert(putArgRegNode->gtOper == GT_PUTARG_REG); if (iterationNum == 0) @@ -1509,7 +1678,7 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) } m_lsra->clearOperandCounts(source); } - else if (!source->OperIsSIMD()) + else if (!source->IsMultiRegCall() && !source->OperIsSIMD()) { assert(source->IsLocal()); MakeSrcContained(blkNode, source); @@ -1519,7 +1688,11 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) if (isInitBlk) { GenTree* initVal = source; - srcAddrOrFill = source; + if (initVal->OperIsInitVal()) + { + initVal = initVal->gtGetOp1(); + } + srcAddrOrFill = initVal; // If we have an InitBlk with constant block size we can optimize several ways: // a) If the size is smaller than a small memory page but larger than INITBLK_UNROLL_LIMIT bytes // we use rep stosb since this reduces the register pressure in LSRA and we have @@ -1571,8 +1744,23 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) // a pack of 16 init value constants. blkNode->gtLsraInfo.internalFloatCount = 1; blkNode->gtLsraInfo.setInternalCandidates(l, l->internalFloatRegCandidates()); + if ((fill == 0) && ((size & 0xf) == 0)) + { + MakeSrcContained(blkNode, source); + } } blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; + +#ifdef _TARGET_X86_ + if ((size & 1) != 0) + { + // On x86, you can't address the lower byte of ESI, EDI, ESP, or EBP when doing + // a "mov byte ptr [dest], val". If the fill size is odd, we will try to do this + // when unrolling, so only allow byteable registers as the source value. (We could + // consider just using BlkOpKindRepInstr instead.) + sourceRegMask = RBM_BYTE_REGS; + } +#endif // _TARGET_X86_ } else { @@ -1825,7 +2013,7 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) } } -#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING +#ifdef FEATURE_PUT_STRUCT_ARG_STK //------------------------------------------------------------------------ // TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK. // @@ -1835,44 +2023,219 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) // Return Value: // None. // -void Lowering::TreeNodeInfoInitPutArgStk(GenTree* tree) +void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk) { - TreeNodeInfo* info = &(tree->gtLsraInfo); + TreeNodeInfo* info = &(putArgStk->gtLsraInfo); LinearScan* l = m_lsra; - if (tree->TypeGet() != TYP_STRUCT) +#ifdef _TARGET_X86_ + if (putArgStk->gtOp1->gtOper == GT_FIELD_LIST) + { + putArgStk->gtNumberReferenceSlots = 0; + putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Invalid; + + GenTreeFieldList* fieldList = putArgStk->gtOp1->AsFieldList(); + + // The code generator will push these fields in reverse order by offset. Reorder the list here s.t. the order + // of uses is visible to LSRA. + unsigned fieldCount = 0; + GenTreeFieldList* head = nullptr; + for (GenTreeFieldList *current = fieldList, *next; current != nullptr; current = next) + { + next = current->Rest(); + + // First, insert the field node into the sorted list. + GenTreeFieldList* prev = nullptr; + for (GenTreeFieldList* cursor = head;; cursor = cursor->Rest()) + { + // If the offset of the current list node is greater than the offset of the cursor or if we have + // reached the end of the list, insert the current node before the cursor and terminate. + if ((cursor == nullptr) || (current->gtFieldOffset > cursor->gtFieldOffset)) + { + if (prev == nullptr) + { + assert(cursor == head); + head = current; + } + else + { + prev->Rest() = current; + } + + current->Rest() = cursor; + break; + } + } + + fieldCount++; + } + + info->srcCount = fieldCount; + info->dstCount = 0; + + // In theory, the upper bound for the size of a field list is 8: these constructs only appear when passing the + // collection of lclVars that represent the fields of a promoted struct lclVar, and we do not promote struct + // lclVars with more than 4 fields. If each of these lclVars is of type long, decomposition will split the + // corresponding field list nodes in two, giving an upper bound of 8. + // + // The reason that this is important is that the algorithm we use above to sort the field list is O(N^2): if + // the maximum size of a field list grows significantly, we will need to reevaluate it. + assert(fieldCount <= 8); + + // The sort above may have changed which node is at the head of the list. Update the PUTARG_STK node if + // necessary. + if (head != fieldList) + { + head->gtFlags |= GTF_FIELD_LIST_HEAD; + fieldList->gtFlags &= ~GTF_FIELD_LIST_HEAD; + +#ifdef DEBUG + head->gtSeqNum = fieldList->gtSeqNum; +#endif // DEBUG + + head->gtLsraInfo = fieldList->gtLsraInfo; + head->gtClearReg(comp); + + BlockRange().InsertAfter(fieldList, head); + BlockRange().Remove(fieldList); + + fieldList = head; + putArgStk->gtOp1 = fieldList; + } + + // Now that the fields have been sorted, initialize the LSRA info. + bool allFieldsAreSlots = true; + bool needsByteTemp = false; + unsigned prevOffset = putArgStk->getArgSize(); + for (GenTreeFieldList* current = fieldList; current != nullptr; current = current->Rest()) + { + GenTree* const fieldNode = current->Current(); + const var_types fieldType = fieldNode->TypeGet(); + const unsigned fieldOffset = current->gtFieldOffset; + assert(fieldType != TYP_LONG); + + // For x86 we must mark all integral fields as contained or reg-optional, and handle them + // accordingly in code generation, since we may have up to 8 fields, which cannot all be in + // registers to be consumed atomically by the call. + if (varTypeIsIntegralOrI(fieldNode)) + { + if (fieldNode->OperGet() == GT_LCL_VAR) + { + LclVarDsc* varDsc = &(comp->lvaTable[fieldNode->AsLclVarCommon()->gtLclNum]); + if (varDsc->lvTracked && !varDsc->lvDoNotEnregister) + { + SetRegOptional(fieldNode); + } + else + { + MakeSrcContained(putArgStk, fieldNode); + } + } + else if (fieldNode->IsIntCnsFitsInI32()) + { + MakeSrcContained(putArgStk, fieldNode); + } + else + { + // For the case where we cannot directly push the value, if we run out of registers, + // it would be better to defer computation until we are pushing the arguments rather + // than spilling, but this situation is not all that common, as most cases of promoted + // structs do not have a large number of fields, and of those most are lclVars or + // copy-propagated constants. + SetRegOptional(fieldNode); + } + } + else + { + assert(varTypeIsFloating(fieldNode)); + } + + // We can treat as a slot any field that is stored at a slot boundary, where the previous + // field is not in the same slot. (Note that we store the fields in reverse order.) + const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevOffset - fieldOffset) >= 4); + if (!fieldIsSlot) + { + allFieldsAreSlots = false; + if (varTypeIsByte(fieldType)) + { + // If this field is a slot--i.e. it is an integer field that is 4-byte aligned and takes up 4 bytes + // (including padding)--we can store the whole value rather than just the byte. Otherwise, we will + // need a byte-addressable register for the store. We will enforce this requirement on an internal + // register, which we can use to copy multiple byte values. + needsByteTemp = true; + } + } + + if (varTypeIsGC(fieldType)) + { + putArgStk->gtNumberReferenceSlots++; + } + + prevOffset = fieldOffset; + } + + // Set the copy kind. + // TODO-X86-CQ: Even if we are using push, if there are contiguous floating point fields, we should + // adjust the stack once for those fields. The latter is really best done in code generation, but + // this tuning should probably be undertaken as a whole. + // Also, if there are floating point fields, it may be better to use the "Unroll" mode + // of copying the struct as a whole, if the fields are not register candidates. + if (allFieldsAreSlots) + { + putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::PushAllSlots; + } + else + { + putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Push; + // If any of the fields cannot be stored with an actual push, we may need a temporary + // register to load the value before storing it to the stack location. + info->internalIntCount = 1; + regMaskTP regMask = l->allRegs(TYP_INT); + if (needsByteTemp) + { + regMask &= ~RBM_NON_BYTE_REGS; + } + info->setInternalCandidates(l, regMask); + } + return; + } +#endif // _TARGET_X86_ + +#if defined(FEATURE_SIMD) && defined(_TARGET_X86_) + // For PutArgStk of a TYP_SIMD12, we need an extra register. + if (putArgStk->TypeGet() == TYP_SIMD12) { - TreeNodeInfoInitSimple(tree); + info->srcCount = putArgStk->gtOp1->gtLsraInfo.dstCount; + info->dstCount = 0; + info->internalFloatCount = 1; + info->setInternalCandidates(l, l->allSIMDRegs()); return; } +#endif // defined(FEATURE_SIMD) && defined(_TARGET_X86_) - GenTreePutArgStk* putArgStkTree = tree->AsPutArgStk(); + if (putArgStk->TypeGet() != TYP_STRUCT) + { + TreeNodeInfoInitSimple(putArgStk); + return; + } - GenTreePtr dst = tree; - GenTreePtr src = tree->gtOp.gtOp1; + GenTreePtr dst = putArgStk; + GenTreePtr src = putArgStk->gtOp1; GenTreePtr srcAddr = nullptr; + bool haveLocalAddr = false; if ((src->OperGet() == GT_OBJ) || (src->OperGet() == GT_IND)) { srcAddr = src->gtOp.gtOp1; + assert(srcAddr != nullptr); + haveLocalAddr = srcAddr->OperIsLocalAddr(); } else { - assert(varTypeIsSIMD(tree)); - } - info->srcCount = src->gtLsraInfo.dstCount; - - // If this is a stack variable address, - // make the op1 contained, so this way - // there is no unnecessary copying between registers. - // To avoid assertion, increment the parent's source. - // It is recovered below. - bool haveLocalAddr = ((srcAddr != nullptr) && (srcAddr->OperIsLocalAddr())); - if (haveLocalAddr) - { - info->srcCount += 1; + assert(varTypeIsSIMD(putArgStk)); } + info->srcCount = src->gtLsraInfo.dstCount; info->dstCount = 0; // In case of a CpBlk we could use a helper call. In case of putarg_stk we @@ -1884,7 +2247,7 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTree* tree) // This threshold will decide from using the helper or let the JIT decide to inline // a code sequence of its choice. ssize_t helperThreshold = max(CPBLK_MOVS_LIMIT, CPBLK_UNROLL_LIMIT); - ssize_t size = putArgStkTree->gtNumSlots * TARGET_POINTER_SIZE; + ssize_t size = putArgStk->gtNumSlots * TARGET_POINTER_SIZE; // TODO-X86-CQ: The helper call either is not supported on x86 or required more work // (I don't know which). @@ -1892,7 +2255,7 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTree* tree) // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2. // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of // our framework assemblies, so this is the main code generation scheme we'll use. - if (size <= CPBLK_UNROLL_LIMIT && putArgStkTree->gtNumberReferenceSlots == 0) + if (size <= CPBLK_UNROLL_LIMIT && putArgStk->gtNumberReferenceSlots == 0) { // If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg. // @@ -1913,46 +2276,62 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTree* tree) info->setInternalCandidates(l, regMask); } +#ifdef _TARGET_X86_ + if (size >= 8) +#else // !_TARGET_X86_ if (size >= XMM_REGSIZE_BYTES) +#endif // !_TARGET_X86_ { - // If we have a buffer larger than XMM_REGSIZE_BYTES, - // reserve an XMM register to use it for a + // If we have a buffer larger than or equal to XMM_REGSIZE_BYTES on x64/ux, + // or larger than or equal to 8 bytes on x86, reserve an XMM register to use it for a // series of 16-byte loads and stores. info->internalFloatCount = 1; info->addInternalCandidates(l, l->internalFloatRegCandidates()); } - if (haveLocalAddr) +#ifdef _TARGET_X86_ + if (size < XMM_REGSIZE_BYTES) { - MakeSrcContained(putArgStkTree, srcAddr); + putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Push; } - - // If src or dst are on stack, we don't have to generate the address into a register - // because it's just some constant+SP - putArgStkTree->gtPutArgStkKind = GenTreePutArgStk::PutArgStkKindUnroll; + else +#endif // _TARGET_X86_ + { + putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Unroll; + } + } +#ifdef _TARGET_X86_ + else if (putArgStk->gtNumberReferenceSlots != 0) + { + // On x86, we must use `push` to store GC references to the stack in order for the emitter to properly update + // the function's GC info. These `putargstk` nodes will generate a sequence of `push` instructions. + putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Push; } +#endif // _TARGET_X86_ else { info->internalIntCount += 3; info->setInternalCandidates(l, (RBM_RDI | RBM_RCX | RBM_RSI)); - if (haveLocalAddr) - { - MakeSrcContained(putArgStkTree, srcAddr); - } - putArgStkTree->gtPutArgStkKind = GenTreePutArgStk::PutArgStkKindRepInstr; + putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::RepInstr; } // Always mark the OBJ and ADDR as contained trees by the putarg_stk. The codegen will deal with this tree. - MakeSrcContained(putArgStkTree, src); + MakeSrcContained(putArgStk, src); - // Balance up the inc above. if (haveLocalAddr) { - info->srcCount -= 1; + // If the source address is the address of a lclVar, make the source address contained to avoid unnecessary + // copies. + // + // To avoid an assertion in MakeSrcContained, increment the parent's source count beforehand and decrement it + // afterwards. + info->srcCount++; + MakeSrcContained(putArgStk, srcAddr); + info->srcCount--; } } -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING +#endif // FEATURE_PUT_STRUCT_ARG_STK //------------------------------------------------------------------------ // TreeNodeInfoInitLclHeap: Set the NodeInfo for a GT_LCLHEAP. @@ -1976,13 +2355,17 @@ void Lowering::TreeNodeInfoInitLclHeap(GenTree* tree) // Here '-' means don't care. // // Size? Init Memory? # temp regs - // 0 - 0 - // const and <=6 reg words - 0 - // const and >6 reg words Yes 0 + // 0 - 0 (returns 0) + // const and <=6 reg words - 0 (pushes '0') + // const and >6 reg words Yes 0 (pushes '0') // const and <PageSize No 0 (amd64) 1 (x86) - // const and >=PageSize No 2 - // Non-const Yes 0 - // Non-const No 2 + // (x86:tmpReg for sutracting from esp) + // const and >=PageSize No 2 (regCnt and tmpReg for subtracing from sp) + // Non-const Yes 0 (regCnt=targetReg and pushes '0') + // Non-const No 2 (regCnt and tmpReg for subtracting from sp) + // + // Note: Here we don't need internal register to be different from targetReg. + // Rather, require it to be different from operand's reg. GenTreePtr size = tree->gtOp.gtOp1; if (size->IsCnsIntOrI()) @@ -2121,6 +2504,9 @@ void Lowering::TreeNodeInfoInitLogicalOp(GenTree* tree) // as reg optional. SetRegOptionalForBinOp(tree); } + + // Codegen of this tree node sets ZF and SF flags. + tree->gtFlags |= GTF_ZSF_SET; } //------------------------------------------------------------------------ @@ -2189,15 +2575,40 @@ void Lowering::TreeNodeInfoInitModDiv(GenTree* tree) info->setDstCandidates(l, RBM_RAX); } - // If possible would like to have op1 in RAX to avoid a register move - op1->gtLsraInfo.setSrcCandidates(l, RBM_RAX); + bool op2CanBeRegOptional = true; +#ifdef _TARGET_X86_ + if (op1->OperGet() == GT_LONG) + { + // To avoid reg move would like to have op1's low part in RAX and high part in RDX. + GenTree* loVal = op1->gtGetOp1(); + GenTree* hiVal = op1->gtGetOp2(); + + // Src count is actually 3, so increment. + assert(op2->IsCnsIntOrI()); + assert(tree->OperGet() == GT_UMOD); + info->srcCount++; + op2CanBeRegOptional = false; + + // This situation also requires an internal register. + info->internalIntCount = 1; + info->setInternalCandidates(l, l->allRegs(TYP_INT)); + + loVal->gtLsraInfo.setSrcCandidates(l, RBM_EAX); + hiVal->gtLsraInfo.setSrcCandidates(l, RBM_EDX); + } + else +#endif + { + // If possible would like to have op1 in RAX to avoid a register move + op1->gtLsraInfo.setSrcCandidates(l, RBM_RAX); + } // divisor can be an r/m, but the memory indirection must be of the same size as the divide if (op2->isMemoryOp() && (op2->TypeGet() == tree->TypeGet())) { MakeSrcContained(tree, op2); } - else + else if (op2CanBeRegOptional) { op2->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX)); @@ -2298,12 +2709,13 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree) info->dstCount = 1; switch (simdTree->gtSIMDIntrinsicID) { + GenTree* op1; GenTree* op2; case SIMDIntrinsicInit: { info->srcCount = 1; - GenTree* op1 = tree->gtOp.gtOp1; + op1 = tree->gtOp.gtOp1; // This sets all fields of a SIMD struct to the given value. // Mark op1 as contained if it is either zero or int constant of all 1's, @@ -2377,7 +2789,8 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree) info->srcCount = 2; // SSE2 32-bit integer multiplication requires two temp regs - if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT) + if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT && + comp->getSIMDInstructionSet() == InstructionSet_SSE2) { info->internalFloatCount = 2; info->setInternalCandidates(lsra, lsra->allSIMDRegs()); @@ -2406,38 +2819,78 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree) case SIMDIntrinsicOpEquality: case SIMDIntrinsicOpInEquality: - // Need two SIMD registers as scratch. - // See genSIMDIntrinsicRelOp() for details on code sequence generate and - // the need for two scratch registers. - info->srcCount = 2; - info->internalFloatCount = 2; - info->setInternalCandidates(lsra, lsra->allSIMDRegs()); + info->srcCount = 2; + + // On SSE4/AVX, we can generate optimal code for (in)equality + // against zero using ptest. We can safely do the this optimization + // for integral vectors but not for floating-point for the reason + // that we have +0.0 and -0.0 and +0.0 == -0.0 + op2 = tree->gtGetOp2(); + if ((comp->getSIMDInstructionSet() >= InstructionSet_SSE3_4) && op2->IsIntegralConstVector(0)) + { + MakeSrcContained(tree, op2); + } + else + { + + // Need one SIMD register as scratch. + // See genSIMDIntrinsicRelOp() for details on code sequence generated and + // the need for one scratch register. + // + // Note these intrinsics produce a BOOL result, hence internal float + // registers reserved are guaranteed to be different from target + // integer register without explicitly specifying. + info->internalFloatCount = 1; + info->setInternalCandidates(lsra, lsra->allSIMDRegs()); + } break; case SIMDIntrinsicDotProduct: - if ((comp->getSIMDInstructionSet() == InstructionSet_SSE2) || - (simdTree->gtOp.gtOp1->TypeGet() == TYP_SIMD32)) + // Float/Double vectors: + // For SSE, or AVX with 32-byte vectors, we also need an internal register + // as scratch. Further we need the targetReg and internal reg to be distinct + // registers. Note that if this is a TYP_SIMD16 or smaller on AVX, then we + // don't need a tmpReg. + // + // 32-byte integer vector on SSE4/AVX: + // will take advantage of phaddd, which operates only on 128-bit xmm reg. + // This will need 1 (in case of SSE4) or 2 (in case of AVX) internal + // registers since targetReg is an int type register. + // + // See genSIMDIntrinsicDotProduct() for details on code sequence generated + // and the need for scratch registers. + if (varTypeIsFloating(simdTree->gtSIMDBaseType)) { - // For SSE, or AVX with 32-byte vectors, we also need an internal register as scratch. - // Further we need the targetReg and internal reg to be distinct registers. - // This is achieved by requesting two internal registers; thus one of them - // will be different from targetReg. - // Note that if this is a TYP_SIMD16 or smaller on AVX, then we don't need a tmpReg. - // - // See genSIMDIntrinsicDotProduct() for details on code sequence generated and - // the need for scratch registers. - info->internalFloatCount = 2; + if ((comp->getSIMDInstructionSet() == InstructionSet_SSE2) || + (simdTree->gtOp.gtOp1->TypeGet() == TYP_SIMD32)) + { + info->internalFloatCount = 1; + info->isInternalRegDelayFree = true; + info->setInternalCandidates(lsra, lsra->allSIMDRegs()); + } + // else don't need scratch reg(s). + } + else + { + assert(simdTree->gtSIMDBaseType == TYP_INT && comp->getSIMDInstructionSet() >= InstructionSet_SSE3_4); + + // No need to set isInternalRegDelayFree since targetReg is a + // an int type reg and guaranteed to be different from xmm/ymm + // regs. + info->internalFloatCount = comp->canUseAVX() ? 2 : 1; info->setInternalCandidates(lsra, lsra->allSIMDRegs()); } info->srcCount = 2; break; case SIMDIntrinsicGetItem: + { // This implements get_Item method. The sources are: // - the source SIMD struct // - index (which element to get) // The result is baseType of SIMD struct. info->srcCount = 2; + op1 = tree->gtOp.gtOp1; op2 = tree->gtOp.gtOp2; // If the index is a constant, mark it as contained. @@ -2446,48 +2899,69 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree) info->srcCount = 1; } - // If the index is not a constant, we will use the SIMD temp location to store the vector. - // Otherwise, if the baseType is floating point, the targetReg will be a xmm reg and we - // can use that in the process of extracting the element. - // - // If the index is a constant and base type is a small int we can use pextrw, but on AVX - // we will need a temp if are indexing into the upper half of the AVX register. - // In all other cases with constant index, we need a temp xmm register to extract the - // element if index is other than zero. - - if (!op2->IsCnsIntOrI()) + if (op1->isMemoryOp()) { - (void)comp->getSIMDInitTempVarNum(); + MakeSrcContained(tree, op1); + + // Although GT_IND of TYP_SIMD12 reserves an internal float + // register for reading 4 and 8 bytes from memory and + // assembling them into target XMM reg, it is not required + // in this case. + op1->gtLsraInfo.internalIntCount = 0; + op1->gtLsraInfo.internalFloatCount = 0; } - else if (!varTypeIsFloating(simdTree->gtSIMDBaseType)) + else { - bool needFloatTemp; - if (varTypeIsSmallInt(simdTree->gtSIMDBaseType) && - (comp->getSIMDInstructionSet() == InstructionSet_AVX)) - { - int byteShiftCnt = (int)op2->AsIntCon()->gtIconVal * genTypeSize(simdTree->gtSIMDBaseType); - needFloatTemp = (byteShiftCnt >= 16); - } - else + // If the index is not a constant, we will use the SIMD temp location to store the vector. + // Otherwise, if the baseType is floating point, the targetReg will be a xmm reg and we + // can use that in the process of extracting the element. + // + // If the index is a constant and base type is a small int we can use pextrw, but on AVX + // we will need a temp if are indexing into the upper half of the AVX register. + // In all other cases with constant index, we need a temp xmm register to extract the + // element if index is other than zero. + + if (!op2->IsCnsIntOrI()) { - needFloatTemp = !op2->IsIntegralConst(0); + (void)comp->getSIMDInitTempVarNum(); } - if (needFloatTemp) + else if (!varTypeIsFloating(simdTree->gtSIMDBaseType)) { - info->internalFloatCount = 1; - info->setInternalCandidates(lsra, lsra->allSIMDRegs()); + bool needFloatTemp; + if (varTypeIsSmallInt(simdTree->gtSIMDBaseType) && + (comp->getSIMDInstructionSet() == InstructionSet_AVX)) + { + int byteShiftCnt = (int)op2->AsIntCon()->gtIconVal * genTypeSize(simdTree->gtSIMDBaseType); + needFloatTemp = (byteShiftCnt >= 16); + } + else + { + needFloatTemp = !op2->IsIntegralConst(0); + } + + if (needFloatTemp) + { + info->internalFloatCount = 1; + info->setInternalCandidates(lsra, lsra->allSIMDRegs()); + } } } - break; + } + break; case SIMDIntrinsicSetX: case SIMDIntrinsicSetY: case SIMDIntrinsicSetZ: case SIMDIntrinsicSetW: - // We need an internal integer register - info->srcCount = 2; - info->internalIntCount = 1; - info->setInternalCandidates(lsra, lsra->allRegs(TYP_INT)); + info->srcCount = 2; + + // We need an internal integer register for SSE2 codegen + if (comp->getSIMDInstructionSet() == InstructionSet_SSE2) + { + info->internalIntCount = 1; + info->setInternalCandidates(lsra, lsra->allRegs(TYP_INT)); + } + break; case SIMDIntrinsicCast: @@ -2592,6 +3066,8 @@ void Lowering::TreeNodeInfoInitCast(GenTree* tree) { if (genTypeSize(castOpType) == 8) { + // Here we don't need internal register to be different from targetReg, + // rather require it to be different from operand's reg. info->internalIntCount = 1; } } @@ -2693,7 +3169,6 @@ void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree) GenTreePtr index = nullptr; unsigned mul, cns; bool rev; - bool modifiedSources = false; #ifdef FEATURE_SIMD // If indirTree is of TYP_SIMD12, don't mark addr as contained @@ -2711,11 +3186,10 @@ void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree) info->internalFloatCount = 1; // In case of GT_IND we need an internal register different from targetReg and - // both of the registers are used at the same time. This achieved by reserving - // two internal registers + // both of the registers are used at the same time. if (indirTree->OperGet() == GT_IND) { - (info->internalFloatCount)++; + info->isInternalRegDelayFree = true; } info->setInternalCandidates(m_lsra, m_lsra->allSIMDRegs()); @@ -2724,16 +3198,21 @@ void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree) } #endif // FEATURE_SIMD - // These nodes go into an addr mode: - // - GT_CLS_VAR_ADDR turns into a constant. - // - GT_LCL_VAR_ADDR is a stack addr mode. - if ((addr->OperGet() == GT_CLS_VAR_ADDR) || (addr->OperGet() == GT_LCL_VAR_ADDR)) + if ((indirTree->gtFlags & GTF_IND_REQ_ADDR_IN_REG) != 0) { + // The address of an indirection that requires its address in a reg. + // Skip any further processing that might otherwise make it contained. + } + else if ((addr->OperGet() == GT_CLS_VAR_ADDR) || (addr->OperGet() == GT_LCL_VAR_ADDR)) + { + // These nodes go into an addr mode: + // - GT_CLS_VAR_ADDR turns into a constant. + // - GT_LCL_VAR_ADDR is a stack addr mode. + // make this contained, it turns into a constant that goes into an addr mode MakeSrcContained(indirTree, addr); } - else if (addr->IsCnsIntOrI() && addr->AsIntConCommon()->FitsInAddrBase(comp) && - addr->gtLsraInfo.getDstCandidates(m_lsra) != RBM_VIRTUAL_STUB_PARAM) + else if (addr->IsCnsIntOrI() && addr->AsIntConCommon()->FitsInAddrBase(comp)) { // Amd64: // We can mark any pc-relative 32-bit addr as containable, except for a direct VSD call address. @@ -2755,17 +3234,10 @@ void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree) } else if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr)) { - GenTreeAddrMode* lea = addr->AsAddrMode(); - base = lea->Base(); - index = lea->Index(); - - m_lsra->clearOperandCounts(addr); - // The srcCount is decremented because addr is now "contained", - // then we account for the base and index below, if they are non-null. - info->srcCount--; + MakeSrcContained(indirTree, addr); } else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) && - !(modifiedSources = AreSourcesPossiblyModifiedLocals(indirTree, base, index))) + !AreSourcesPossiblyModifiedLocals(indirTree, base, index)) { // An addressing mode will be constructed that may cause some // nodes to not need a register, and cause others' lifetimes to be extended @@ -2774,7 +3246,16 @@ void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree) assert(base != addr); m_lsra->clearOperandCounts(addr); - GenTreePtr arrLength = nullptr; + const bool hasBase = base != nullptr; + const bool hasIndex = index != nullptr; + assert(hasBase || hasIndex); // At least one of a base or an index must be present. + + // If the addressing mode has both a base and an index, bump its source count by one. If it only has one or the + // other, its source count is already correct (due to the source for the address itself). + if (hasBase && hasIndex) + { + info->srcCount++; + } // Traverse the computation below GT_IND to find the operands // for the addressing mode, marking the various constants and @@ -2784,14 +3265,13 @@ void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree) // up of simple arithmetic operators, and the code generator // only traverses one leg of each node. - bool foundBase = (base == nullptr); - bool foundIndex = (index == nullptr); - GenTreePtr nextChild = nullptr; - for (GenTreePtr child = addr; child != nullptr && !child->OperIsLeaf(); child = nextChild) + bool foundBase = !hasBase; + bool foundIndex = !hasIndex; + for (GenTree *child = addr, *nextChild = nullptr; child != nullptr && !child->OperIsLeaf(); child = nextChild) { - nextChild = nullptr; - GenTreePtr op1 = child->gtOp.gtOp1; - GenTreePtr op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr; + nextChild = nullptr; + GenTree* op1 = child->gtOp.gtOp1; + GenTree* op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr; if (op1 == base) { @@ -2832,7 +3312,6 @@ void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree) } } assert(foundBase && foundIndex); - info->srcCount--; // it gets incremented below. } else if (addr->gtOper == GT_ARR_ELEM) { @@ -2845,32 +3324,23 @@ void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree) assert(addr->gtLsraInfo.srcCount >= 2); addr->gtLsraInfo.srcCount -= 1; } - else - { - // it is nothing but a plain indir - info->srcCount--; // base gets added in below - base = addr; - } - - if (base != nullptr) - { - info->srcCount++; - } - - if (index != nullptr && !modifiedSources) - { - info->srcCount++; - } } -void Lowering::LowerCmp(GenTreePtr tree) +void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree) { + assert(tree->OperIsCompare()); + TreeNodeInfo* info = &(tree->gtLsraInfo); info->srcCount = 2; info->dstCount = 1; #ifdef _TARGET_X86_ + // If the compare is used by a jump, we just need to set the condition codes. If not, then we need + // to store the result into the low byte of a register, which requires the dst be a byteable register. + // We always set the dst candidates, though, because if this is compare is consumed by a jump, they + // won't be used. We might be able to use GTF_RELOP_JMP_USED to determine this case, but it's not clear + // that flag is maintained until this location (especially for decomposed long compares). info->setDstCandidates(m_lsra, RBM_BYTE_REGS); #endif // _TARGET_X86_ @@ -2894,9 +3364,9 @@ void Lowering::LowerCmp(GenTreePtr tree) #endif // !defined(_TARGET_64BIT_) // If either of op1 or op2 is floating point values, then we need to use - // ucomiss or ucomisd to compare, both of which support the following form - // ucomis[s|d] xmm, xmm/mem. That is only the second operand can be a memory - // op. + // ucomiss or ucomisd to compare, both of which support the following form: + // ucomis[s|d] xmm, xmm/mem + // That is only the second operand can be a memory op. // // Second operand is a memory Op: Note that depending on comparison operator, // the operands of ucomis[s|d] need to be reversed. Therefore, either op1 or @@ -2952,16 +3422,9 @@ void Lowering::LowerCmp(GenTreePtr tree) bool hasShortCast = false; if (CheckImmedAndMakeContained(tree, op2)) { - bool op1CanBeContained = (op1Type == op2Type); - if (!op1CanBeContained) - { - if (genTypeSize(op1Type) == genTypeSize(op2Type)) - { - // The constant is of the correct size, but we don't have an exact type match - // We can treat the isMemoryOp as "contained" - op1CanBeContained = true; - } - } + // If the types are the same, or if the constant is of the correct size, + // we can treat the isMemoryOp as contained. + bool op1CanBeContained = (genTypeSize(op1Type) == genTypeSize(op2Type)); // Do we have a short compare against a constant in op2 // @@ -3031,13 +3494,13 @@ void Lowering::LowerCmp(GenTreePtr tree) bool op1IsMadeContained = false; // When op1 is a GT_AND we can often generate a single "test" instruction - // instead of two instructions (an "and" instruction followed by a "cmp"/"test") + // instead of two instructions (an "and" instruction followed by a "cmp"/"test"). // - // This instruction can only be used for equality or inequality comparions. + // This instruction can only be used for equality or inequality comparisons. // and we must have a compare against zero. // // If we have a postive test for a single bit we can reverse the condition and - // make the compare be against zero + // make the compare be against zero. // // Example: // GT_EQ GT_NE @@ -3046,8 +3509,8 @@ void Lowering::LowerCmp(GenTreePtr tree) // / \ / \ // andOp1 GT_CNS (0x100) andOp1 GT_CNS (0x100) // - // We will mark the GT_AND node as contained if the tree is a equality compare with zero - // Additionally when we do this we also allow for a contained memory operand for "andOp1". + // We will mark the GT_AND node as contained if the tree is an equality compare with zero. + // Additionally, when we do this we also allow for a contained memory operand for "andOp1". // bool isEqualityCompare = (tree->gtOper == GT_EQ || tree->gtOper == GT_NE); @@ -3066,7 +3529,7 @@ void Lowering::LowerCmp(GenTreePtr tree) // so that we can generate a test instruction. // Reverse the equality comparison - tree->gtOper = (tree->gtOper == GT_EQ) ? GT_NE : GT_EQ; + tree->SetOperRaw((tree->gtOper == GT_EQ) ? GT_NE : GT_EQ); // Change the relOp2CnsVal to zero relOp2CnsVal = 0; @@ -3171,7 +3634,7 @@ void Lowering::LowerCmp(GenTreePtr tree) genTreeOps castOp1Oper = castOp1->OperGet(); bool safeOper = false; - // It is not always safe to change the gtType of 'castOp1' to TYP_UBYTE + // It is not always safe to change the gtType of 'castOp1' to TYP_UBYTE. // For example when 'castOp1Oper' is a GT_RSZ or GT_RSH then we are shifting // bits from the left into the lower bits. If we change the type to a TYP_UBYTE // we will instead generate a byte sized shift operation: shr al, 24 @@ -3196,22 +3659,24 @@ void Lowering::LowerCmp(GenTreePtr tree) // assert(!castOp1->gtOverflowEx()); // Must not be an overflow checking operation - GenTreePtr removeTreeNode = op1; - tree->gtOp.gtOp1 = castOp1; - op1 = castOp1; - castOp1->gtType = TYP_UBYTE; - - // trim down the value if castOp1 is an int constant since its type changed to UBYTE. - if (castOp1Oper == GT_CNS_INT) - { - castOp1->gtIntCon.gtIconVal = (UINT8)castOp1->gtIntCon.gtIconVal; - } - + // TODO-Cleanup: we're within "if (CheckImmedAndMakeContained(tree, op2))", so isn't + // the following condition always true? if (op2->isContainedIntOrIImmed()) { ssize_t val = (ssize_t)op2->AsIntConCommon()->IconValue(); if (val >= 0 && val <= 255) { + GenTreePtr removeTreeNode = op1; + tree->gtOp.gtOp1 = castOp1; + op1 = castOp1; + castOp1->gtType = TYP_UBYTE; + + // trim down the value if castOp1 is an int constant since its type changed to UBYTE. + if (castOp1Oper == GT_CNS_INT) + { + castOp1->gtIntCon.gtIconVal = (UINT8)castOp1->gtIntCon.gtIconVal; + } + op2->gtType = TYP_UBYTE; tree->gtFlags |= GTF_UNSIGNED; @@ -3222,18 +3687,26 @@ void Lowering::LowerCmp(GenTreePtr tree) MakeSrcContained(tree, op1); op1IsMadeContained = true; } - } - } - BlockRange().Remove(removeTreeNode); + BlockRange().Remove(removeTreeNode); + + // We've changed the type on op1 to TYP_UBYTE, but we already processed that node. + // We need to go back and mark it byteable. + // TODO-Cleanup: it might be better to move this out of the TreeNodeInfoInit pass to + // the earlier "lower" pass, in which case the byteable check would just fall out. + // But that is quite complex! + TreeNodeInfoInitCheckByteable(op1); + #ifdef DEBUG - if (comp->verbose) - { - printf("LowerCmp: Removing a GT_CAST to TYP_UBYTE and changing castOp1->gtType to " - "TYP_UBYTE\n"); - comp->gtDispTreeRange(BlockRange(), tree); - } + if (comp->verbose) + { + printf("TreeNodeInfoInitCmp: Removing a GT_CAST to TYP_UBYTE and changing " + "castOp1->gtType to TYP_UBYTE\n"); + comp->gtDispTreeRange(BlockRange(), tree); + } #endif + } + } } } @@ -3241,6 +3714,41 @@ void Lowering::LowerCmp(GenTreePtr tree) if (!op1IsMadeContained) { SetRegOptional(op1); + + // If op1 codegen sets ZF and SF flags and ==/!= against + // zero, we don't need to generate test instruction, + // provided we don't have another GenTree node between op1 + // and tree that could potentially modify flags. + // + // TODO-CQ: right now the below peep is inexpensive and + // gets the benefit in most of cases because in majority + // of cases op1, op2 and tree would be in that order in + // execution. In general we should be able to check that all + // the nodes that come after op1 in execution order do not + // modify the flags so that it is safe to avoid generating a + // test instruction. Such a check requires that on each + // GenTree node we need to set the info whether its codegen + // will modify flags. + // + // TODO-CQ: We can optimize compare against zero in the + // following cases by generating the branch as indicated + // against each case. + // 1) unsigned compare + // < 0 - always FALSE + // <= 0 - ZF=1 and jne + // > 0 - ZF=0 and je + // >= 0 - always TRUE + // + // 2) signed compare + // < 0 - SF=1 and js + // >= 0 - SF=0 and jns + if (isEqualityCompare && op1->gtSetZSFlags() && op2->IsIntegralConst(0) && (op1->gtNext == op2) && + (op2->gtNext == tree)) + { + // Require codegen of op1 to set the flags. + assert(!op1->gtSetFlags()); + op1->gtFlags |= GTF_SET_FLAGS; + } } } } @@ -3255,10 +3763,17 @@ void Lowering::LowerCmp(GenTreePtr tree) { MakeSrcContained(tree, op1); } + else if (op1->IsCnsIntOrI()) + { + // TODO-CQ: We should be able to support swapping op1 and op2 to generate cmp reg, imm, + // but there is currently an assert in CodeGen::genCompareInt(). + // https://github.com/dotnet/coreclr/issues/7270 + SetRegOptional(op2); + } else { // One of op1 or op2 could be marked as reg optional - // to indicate that codgen can still generate code + // to indicate that codegen can still generate code // if one of them is on stack. SetRegOptional(PreferredRegOptionalOperand(tree)); } @@ -3318,7 +3833,6 @@ void Lowering::LowerCast(GenTree* tree) var_types dstType = tree->CastToType(); var_types srcType = op1->TypeGet(); var_types tmpType = TYP_UNDEF; - bool srcUns = false; // force the srcType to unsigned if GT_UNSIGNED flag is set if (tree->gtFlags & GTF_UNSIGNED) @@ -3849,6 +4363,20 @@ bool Lowering::SetStoreIndOpCountsIfRMWMemOp(GenTreePtr storeInd) } m_lsra->clearOperandCounts(indirCandidateChild); +#ifdef _TARGET_X86_ + if (varTypeIsByte(storeInd)) + { + // If storeInd is of TYP_BYTE, set indirOpSources to byteable registers. + bool containedNode = indirOpSource->gtLsraInfo.dstCount == 0; + if (!containedNode) + { + regMaskTP regMask = indirOpSource->gtLsraInfo.getSrcCandidates(m_lsra); + assert(regMask != RBM_NONE); + indirOpSource->gtLsraInfo.setSrcCandidates(m_lsra, regMask & ~RBM_NON_BYTE_REGS); + } + } +#endif + return true; } @@ -3858,8 +4386,11 @@ bool Lowering::SetStoreIndOpCountsIfRMWMemOp(GenTreePtr storeInd) */ void Lowering::SetMulOpCounts(GenTreePtr tree) { +#if defined(_TARGET_X86_) + assert(tree->OperGet() == GT_MUL || tree->OperGet() == GT_MULHI || tree->OperGet() == GT_MUL_LONG); +#else assert(tree->OperGet() == GT_MUL || tree->OperGet() == GT_MULHI); - +#endif TreeNodeInfo* info = &(tree->gtLsraInfo); info->srcCount = 2; @@ -3900,13 +4431,18 @@ void Lowering::SetMulOpCounts(GenTreePtr tree) GenTreeIntConCommon* imm = nullptr; GenTreePtr other = nullptr; - // There are three forms of x86 multiply: - // one-op form: RDX:RAX = RAX * r/m - // two-op form: reg *= r/m - // three-op form: reg = r/m * imm +// There are three forms of x86 multiply: +// one-op form: RDX:RAX = RAX * r/m +// two-op form: reg *= r/m +// three-op form: reg = r/m * imm - // This special widening 32x32->64 MUL is not used on x64 - assert((tree->gtFlags & GTF_MUL_64RSLT) == 0); +// This special widening 32x32->64 MUL is not used on x64 +#if defined(_TARGET_X86_) + if (tree->OperGet() != GT_MUL_LONG) +#endif + { + assert((tree->gtFlags & GTF_MUL_64RSLT) == 0); + } // Multiply should never be using small types assert(!varTypeIsSmall(tree->TypeGet())); @@ -3924,12 +4460,21 @@ void Lowering::SetMulOpCounts(GenTreePtr tree) info->setDstCandidates(m_lsra, RBM_RAX); hasImpliedFirstOperand = true; } - else if (tree->gtOper == GT_MULHI) + else if (tree->OperGet() == GT_MULHI) + { + // Have to use the encoding:RDX:RAX = RAX * rm. Since we only care about the + // upper 32 bits of the result set the destination candidate to REG_RDX. + info->setDstCandidates(m_lsra, RBM_RDX); + hasImpliedFirstOperand = true; + } +#if defined(_TARGET_X86_) + else if (tree->OperGet() == GT_MUL_LONG) { // have to use the encoding:RDX:RAX = RAX * rm info->setDstCandidates(m_lsra, RBM_RAX); hasImpliedFirstOperand = true; } +#endif else if (IsContainableImmed(tree, op2) || IsContainableImmed(tree, op1)) { if (IsContainableImmed(tree, op2)) @@ -4187,6 +4732,71 @@ GenTree* Lowering::PreferredRegOptionalOperand(GenTree* tree) return preferredOp; } +#ifdef _TARGET_X86_ +//------------------------------------------------------------------------ +// ExcludeNonByteableRegisters: Determines if we need to exclude non-byteable registers for +// various reasons +// +// Arguments: +// tree - The node of interest +// +// Return Value: +// If we need to exclude non-byteable registers +// +bool Lowering::ExcludeNonByteableRegisters(GenTree* tree) +{ + // Example1: GT_STOREIND(byte, addr, op2) - storeind of byte sized value from op2 into mem 'addr' + // Storeind itself will not produce any value and hence dstCount=0. But op2 could be TYP_INT + // value. In this case we need to exclude esi/edi from the src candidates of op2. + if (varTypeIsByte(tree)) + { + return true; + } + // Example2: GT_CAST(int <- bool <- int) - here type of GT_CAST node is int and castToType is bool. + else if ((tree->OperGet() == GT_CAST) && varTypeIsByte(tree->CastToType())) + { + return true; + } + else if (tree->OperIsCompare()) + { + GenTree* op1 = tree->gtGetOp1(); + GenTree* op2 = tree->gtGetOp2(); + + // Example3: GT_EQ(int, op1 of type ubyte, op2 of type ubyte) - in this case codegen uses + // ubyte as the result of comparison and if the result needs to be materialized into a reg + // simply zero extend it to TYP_INT size. Here is an example of generated code: + // cmp dl, byte ptr[addr mode] + // movzx edx, dl + if (varTypeIsByte(op1) && varTypeIsByte(op2)) + { + return true; + } + // Example4: GT_EQ(int, op1 of type ubyte, op2 is GT_CNS_INT) - in this case codegen uses + // ubyte as the result of the comparison and if the result needs to be materialized into a reg + // simply zero extend it to TYP_INT size. + else if (varTypeIsByte(op1) && op2->IsCnsIntOrI()) + { + return true; + } + // Example4: GT_EQ(int, op1 is GT_CNS_INT, op2 of type ubyte) - in this case codegen uses + // ubyte as the result of the comparison and if the result needs to be materialized into a reg + // simply zero extend it to TYP_INT size. + else if (op1->IsCnsIntOrI() && varTypeIsByte(op2)) + { + return true; + } + else + { + return false; + } + } + else + { + return false; + } +} +#endif // _TARGET_X86_ + #endif // _TARGET_XARCH_ #endif // !LEGACY_BACKEND diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp index 317b976e42..accfd6ee78 100644 --- a/src/jit/lsra.cpp +++ b/src/jit/lsra.cpp @@ -355,6 +355,33 @@ RegRecord* LinearScan::getRegisterRecord(regNumber regNum) } #ifdef DEBUG + +//---------------------------------------------------------------------------- +// getConstrainedRegMask: Returns new regMask which is the intersection of +// regMaskActual and regMaskConstraint if the new regMask has at least +// minRegCount registers, otherwise returns regMaskActual. +// +// Arguments: +// regMaskActual - regMask that needs to be constrained +// regMaskConstraint - regMask constraint that needs to be +// applied to regMaskActual +// minRegCount - Minimum number of regs that should be +// be present in new regMask. +// +// Return Value: +// New regMask that has minRegCount registers after instersection. +// Otherwise returns regMaskActual. +regMaskTP LinearScan::getConstrainedRegMask(regMaskTP regMaskActual, regMaskTP regMaskConstraint, unsigned minRegCount) +{ + regMaskTP newMask = regMaskActual & regMaskConstraint; + if (genCountBits(newMask) >= minRegCount) + { + return newMask; + } + + return regMaskActual; +} + //------------------------------------------------------------------------ // stressLimitRegs: Given a set of registers, expressed as a register mask, reduce // them based on the current stress options. @@ -373,38 +400,46 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask) { if (getStressLimitRegs() != LSRA_LIMIT_NONE) { + // The refPosition could be null, for example when called + // by getTempRegForResolution(). + int minRegCount = (refPosition != nullptr) ? refPosition->minRegCandidateCount : 1; + switch (getStressLimitRegs()) { case LSRA_LIMIT_CALLEE: - if (!compiler->opts.compDbgEnC && (mask & RBM_CALLEE_SAVED) != RBM_NONE) + if (!compiler->opts.compDbgEnC) { - mask &= RBM_CALLEE_SAVED; + mask = getConstrainedRegMask(mask, RBM_CALLEE_SAVED, minRegCount); } break; + case LSRA_LIMIT_CALLER: - if ((mask & RBM_CALLEE_TRASH) != RBM_NONE) - { - mask &= RBM_CALLEE_TRASH; - } - break; + { + mask = getConstrainedRegMask(mask, RBM_CALLEE_TRASH, minRegCount); + } + break; + case LSRA_LIMIT_SMALL_SET: if ((mask & LsraLimitSmallIntSet) != RBM_NONE) { - mask &= LsraLimitSmallIntSet; + mask = getConstrainedRegMask(mask, LsraLimitSmallIntSet, minRegCount); } else if ((mask & LsraLimitSmallFPSet) != RBM_NONE) { - mask &= LsraLimitSmallFPSet; + mask = getConstrainedRegMask(mask, LsraLimitSmallFPSet, minRegCount); } break; + default: unreached(); } + if (refPosition != nullptr && refPosition->isFixedRegRef) { mask |= refPosition->registerAssignment; } } + return mask; } #endif // DEBUG @@ -658,16 +693,13 @@ void LinearScan::applyCalleeSaveHeuristics(RefPosition* rp) #endif // _TARGET_AMD64_ Interval* theInterval = rp->getInterval(); + #ifdef DEBUG regMaskTP calleeSaveMask = calleeSaveRegs(getRegisterType(theInterval, rp)); if (doReverseCallerCallee()) { - regMaskTP newAssignment = rp->registerAssignment; - newAssignment &= calleeSaveMask; - if (newAssignment != RBM_NONE) - { - rp->registerAssignment = newAssignment; - } + rp->registerAssignment = + getConstrainedRegMask(rp->registerAssignment, calleeSaveMask, rp->minRegCandidateCount); } else #endif // DEBUG @@ -777,6 +809,9 @@ RefPosition* LinearScan::newRefPosition( // mask - Set of valid registers for this RefPosition // multiRegIdx - register position if this RefPosition corresponds to a // multi-reg call node. +// minRegCount - Minimum number registers that needs to be ensured while +// constraining candidates for this ref position under +// LSRA stress. This is a DEBUG only arg. // // Return Value: // a new RefPosition @@ -786,7 +821,8 @@ RefPosition* LinearScan::newRefPosition(Interval* theInterval, RefType theRefType, GenTree* theTreeNode, regMaskTP mask, - unsigned multiRegIdx /* = 0 */) + unsigned multiRegIdx /* = 0 */ + DEBUGARG(unsigned minRegCandidateCount /* = 1 */)) { #ifdef DEBUG if (theInterval != nullptr && regType(theInterval->registerType) == FloatRegisterType) @@ -843,6 +879,10 @@ RefPosition* LinearScan::newRefPosition(Interval* theInterval, newRP->setMultiRegIdx(multiRegIdx); newRP->setAllocateIfProfitable(0); +#ifdef DEBUG + newRP->minRegCandidateCount = minRegCandidateCount; +#endif // DEBUG + associateRefPosWithInterval(newRP); DBEXEC(VERBOSE, newRP->dump()); @@ -1071,12 +1111,14 @@ LinearScan::LinearScan(Compiler* theCompiler) #endif dumpTerse = (JitConfig.JitDumpTerseLsra() != 0); - #endif // DEBUG + availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd); + #if ETW_EBP_FRAMED availableIntRegs &= ~RBM_FPBASE; #endif // ETW_EBP_FRAMED + availableFloatRegs = RBM_ALLFLOAT; availableDoubleRegs = RBM_ALLDOUBLE; @@ -1272,6 +1314,7 @@ void LinearScan::setBlockSequence() bool addedInternalBlocks = false; verifiedAllBBs = false; + hasCriticalEdges = false; BasicBlock* nextBlock; for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = nextBlock) { @@ -1288,6 +1331,13 @@ void LinearScan::setBlockSequence() blockInfo[block->bbNum].hasCriticalOutEdge = false; blockInfo[block->bbNum].weight = block->bbWeight; +#if TRACK_LSRA_STATS + blockInfo[block->bbNum].spillCount = 0; + blockInfo[block->bbNum].copyRegCount = 0; + blockInfo[block->bbNum].resolutionMovCount = 0; + blockInfo[block->bbNum].splitEdgeCount = 0; +#endif // TRACK_LSRA_STATS + if (block->GetUniquePred(compiler) == nullptr) { for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext) @@ -1296,6 +1346,7 @@ void LinearScan::setBlockSequence() if (predBlock->NumSucc(compiler) > 1) { blockInfo[block->bbNum].hasCriticalInEdge = true; + hasCriticalEdges = true; break; } else if (predBlock->bbJumpKind == BBJ_SWITCH) @@ -1321,6 +1372,7 @@ void LinearScan::setBlockSequence() if (checkForCriticalOutEdge && succ->GetUniquePred(compiler) == nullptr) { blockInfo[block->bbNum].hasCriticalOutEdge = true; + hasCriticalEdges = true; // We can stop checking now. checkForCriticalOutEdge = false; } @@ -1666,11 +1718,6 @@ void LinearScan::doLinearScan() compiler->codeGen->regSet.rsClearRegsModified(); - // Figure out if we're going to use an RSP frame or an RBP frame. We need to do this - // before building the intervals and ref positions, because those objects will embed - // RBP in various register masks (like preferences) if RBP is allowed to be allocated. - setFrameType(); - initMaxSpill(); buildIntervals(); DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_REFPOS)); @@ -1685,6 +1732,17 @@ void LinearScan::doLinearScan() resolveRegisters(); compiler->EndPhase(PHASE_LINEAR_SCAN_RESOLVE); +#if TRACK_LSRA_STATS + if ((JitConfig.DisplayLsraStats() != 0) +#ifdef DEBUG + || VERBOSE +#endif + ) + { + dumpLsraStats(jitstdout); + } +#endif // TRACK_LSRA_STATS + DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_POST)); compiler->compLSRADone = true; @@ -1892,6 +1950,8 @@ void LinearScan::identifyCandidates() // for vectors on Arm64, though the actual value may differ. VarSetOps::AssignNoCopy(compiler, fpCalleeSaveCandidateVars, VarSetOps::MakeEmpty(compiler)); + VarSetOps::AssignNoCopy(compiler, resolutionCandidateVars, VarSetOps::MakeEmpty(compiler)); + VarSetOps::AssignNoCopy(compiler, splitOrSpilledVars, VarSetOps::MakeEmpty(compiler)); VARSET_TP VARSET_INIT_NOCOPY(fpMaybeCandidateVars, VarSetOps::MakeEmpty(compiler)); unsigned int floatVarCount = 0; unsigned int thresholdFPRefCntWtd = 4 * BB_UNITY_WEIGHT; @@ -1902,6 +1962,37 @@ void LinearScan::identifyCandidates() unsigned int largeVectorVarCount = 0; unsigned int thresholdLargeVectorRefCntWtd = 4 * BB_UNITY_WEIGHT; #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE +#if DOUBLE_ALIGN + unsigned refCntStk = 0; + unsigned refCntReg = 0; + unsigned refCntWtdReg = 0; + unsigned refCntStkParam = 0; // sum of ref counts for all stack based parameters + unsigned refCntWtdStkDbl = 0; // sum of wtd ref counts for stack based doubles + doDoubleAlign = false; + bool checkDoubleAlign = true; + if (compiler->codeGen->isFramePointerRequired() || compiler->opts.MinOpts()) + { + checkDoubleAlign = false; + } + else + { + switch (compiler->getCanDoubleAlign()) + { + case MUST_DOUBLE_ALIGN: + doDoubleAlign = true; + checkDoubleAlign = false; + break; + case CAN_DOUBLE_ALIGN: + break; + case CANT_DOUBLE_ALIGN: + doDoubleAlign = false; + checkDoubleAlign = false; + break; + default: + unreached(); + } + } +#endif // DOUBLE_ALIGN for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++) { @@ -1911,6 +2002,32 @@ void LinearScan::identifyCandidates() Interval* newInt = newInterval(intervalType); newInt->setLocalNumber(lclNum, this); + +#if DOUBLE_ALIGN + if (checkDoubleAlign) + { + if (varDsc->lvIsParam && !varDsc->lvIsRegArg) + { + refCntStkParam += varDsc->lvRefCnt; + } + else if (!isRegCandidate(varDsc) || varDsc->lvDoNotEnregister) + { + refCntStk += varDsc->lvRefCnt; + if ((varDsc->lvType == TYP_DOUBLE) || + ((varTypeIsStruct(varDsc) && varDsc->lvStructDoubleAlign && + (compiler->lvaGetPromotionType(varDsc) != Compiler::PROMOTION_TYPE_INDEPENDENT)))) + { + refCntWtdStkDbl += varDsc->lvRefCntWtd; + } + } + else + { + refCntReg += varDsc->lvRefCnt; + refCntWtdReg += varDsc->lvRefCntWtd; + } + } +#endif // DOUBLE_ALIGN + if (varDsc->lvIsStructField) { newInt->isStructField = true; @@ -2095,6 +2212,24 @@ void LinearScan::identifyCandidates() } } +#if DOUBLE_ALIGN + if (checkDoubleAlign) + { + // TODO-CQ: Fine-tune this: + // In the legacy reg predictor, this runs after allocation, and then demotes any lclVars + // allocated to the frame pointer, which is probably the wrong order. + // However, because it runs after allocation, it can determine the impact of demoting + // the lclVars allocated to the frame pointer. + // => Here, estimate of the EBP refCnt and weighted refCnt is a wild guess. + // + unsigned refCntEBP = refCntReg / 8; + unsigned refCntWtdEBP = refCntWtdReg / 8; + + doDoubleAlign = + compiler->shouldDoubleAlign(refCntStk, refCntEBP, refCntWtdEBP, refCntStkParam, refCntWtdStkDbl); + } +#endif // DOUBLE_ALIGN + // The factors we consider to determine which set of fp vars to use as candidates for callee save // registers current include the number of fp vars, whether there are loops, and whether there are // multiple exits. These have been selected somewhat empirically, but there is probably room for @@ -2510,6 +2645,9 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree) break; case GT_MULHI: +#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) + case GT_MUL_LONG: +#endif killMask = RBM_RAX | RBM_RDX; break; @@ -2644,7 +2782,7 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree) } break; -#if defined(PROFILING_SUPPORTED) && defined(_TARGET_AMD64_) +#if defined(PROFILING_SUPPORTED) // If this method requires profiler ELT hook then mark these nodes as killing // callee trash registers (excluding RAX and XMM0). The reason for this is that // profiler callback would trash these registers. See vm\amd64\asmhelpers.asm for @@ -2660,10 +2798,9 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree) if (compiler->compIsProfilerHookNeeded()) { killMask = compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_TAILCALL); - ; } break; -#endif // PROFILING_SUPPORTED && _TARGET_AMD64_ +#endif // PROFILING_SUPPORTED default: // for all other 'tree->OperGet()' kinds, leave 'killMask' = RBM_NONE @@ -2769,19 +2906,46 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo return false; } +//---------------------------------------------------------------------------- +// defineNewInternalTemp: Defines a ref position for an internal temp. +// +// Arguments: +// tree - Gentree node requiring an internal register +// regType - Register type +// currentLoc - Location of the temp Def position +// regMask - register mask of candidates for temp +// minRegCandidateCount - Minimum registers to be ensured in candidate +// set under LSRA stress mode. This is a +// DEBUG only arg. RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regType, LsraLocation currentLoc, - regMaskTP regMask) + regMaskTP regMask DEBUGARG(unsigned minRegCandidateCount)) { Interval* current = newInterval(regType); current->isInternal = true; - return newRefPosition(current, currentLoc, RefTypeDef, tree, regMask); + return newRefPosition(current, currentLoc, RefTypeDef, tree, regMask, 0 DEBUG_ARG(minRegCandidateCount)); } +//------------------------------------------------------------------------ +// buildInternalRegisterDefsForNode - build Def positions for internal +// registers required for tree node. +// +// Arguments: +// tree - Gentree node that needs internal registers +// currentLoc - Location at which Def positions need to be defined +// temps - in-out array which is populated with ref positions +// created for Def of internal registers +// minRegCandidateCount - Minimum registers to be ensured in candidate +// set of ref positions under LSRA stress. This is +// a DEBUG only arg. +// +// Returns: +// The total number of Def positions created for internal registers of tree node. int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree, LsraLocation currentLoc, - RefPosition* temps[]) // populates + RefPosition* temps[] // populates + DEBUGARG(unsigned minRegCandidateCount)) { int count; int internalIntCount = tree->gtLsraInfo.internalIntCount; @@ -2805,14 +2969,16 @@ int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree, internalIntCands = genFindLowestBit(internalIntCands); internalCands &= ~internalIntCands; } - temps[count] = defineNewInternalTemp(tree, IntRegisterType, currentLoc, internalIntCands); + temps[count] = + defineNewInternalTemp(tree, IntRegisterType, currentLoc, internalIntCands DEBUG_ARG(minRegCandidateCount)); } int internalFloatCount = tree->gtLsraInfo.internalFloatCount; for (int i = 0; i < internalFloatCount; i++) { regMaskTP internalFPCands = (internalCands & internalFloatRegCandidates()); - temps[count++] = defineNewInternalTemp(tree, FloatRegisterType, currentLoc, internalFPCands); + temps[count++] = + defineNewInternalTemp(tree, FloatRegisterType, currentLoc, internalFPCands DEBUG_ARG(minRegCandidateCount)); } noway_assert(count < MaxInternalRegisters); @@ -2820,10 +2986,26 @@ int LinearScan::buildInternalRegisterDefsForNode(GenTree* tree, return count; } +//------------------------------------------------------------------------ +// buildInternalRegisterUsesForNode - adds Use positions for internal +// registers required for tree node. +// +// Arguments: +// tree - Gentree node that needs internal registers +// currentLoc - Location at which Use positions need to be defined +// defs - int array containing Def positions of internal +// registers. +// total - Total number of Def positions in 'defs' array. +// minRegCandidateCount - Minimum registers to be ensured in candidate +// set of ref positions under LSRA stress. This is +// a DEBUG only arg. +// +// Returns: +// Void. void LinearScan::buildInternalRegisterUsesForNode(GenTree* tree, LsraLocation currentLoc, RefPosition* defs[], - int total) + int total DEBUGARG(unsigned minRegCandidateCount)) { assert(total < MaxInternalRegisters); @@ -2840,8 +3022,14 @@ void LinearScan::buildInternalRegisterUsesForNode(GenTree* tree, } else { - RefPosition* newest = newRefPosition(defs[i]->getInterval(), currentLoc, RefTypeUse, tree, mask); - newest->lastUse = true; + RefPosition* newest = newRefPosition(defs[i]->getInterval(), currentLoc, RefTypeUse, tree, mask, + 0 DEBUG_ARG(minRegCandidateCount)); + newest->lastUse = true; + + if (tree->gtLsraInfo.isInternalRegDelayFree) + { + newest->delayRegFree = true; + } } } } @@ -3196,10 +3384,10 @@ static int ComputeOperandDstCount(GenTree* operand) // If an operand has no destination registers but does have source registers, it must be a store // or a compare. assert(operand->OperIsStore() || operand->OperIsBlkOp() || operand->OperIsPutArgStk() || - operand->OperIsCompare()); + operand->OperIsCompare() || operand->IsSIMDEqualityOrInequality()); return 0; } - else if (!operand->OperIsAggregate() && (operand->OperIsStore() || operand->TypeGet() == TYP_VOID)) + else if (!operand->OperIsFieldListHead() && (operand->OperIsStore() || operand->TypeGet() == TYP_VOID)) { // Stores and void-typed operands may be encountered when processing call nodes, which contain // pointers to argument setup stores. @@ -3207,7 +3395,7 @@ static int ComputeOperandDstCount(GenTree* operand) } else { - // If an aggregate or non-void-typed operand is not an unsued value and does not have source registers, + // If a field list or non-void-typed operand is not an unused value and does not have source registers, // that argument is contained within its parent and produces `sum(operand_dst_count)` registers. int dstCount = 0; for (GenTree* op : operand->Operands()) @@ -3254,16 +3442,14 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, assert(!isRegPairType(tree->TypeGet())); #endif // _TARGET_ARM_ - // The LIR traversal doesn't visit non-aggregate GT_LIST or GT_ARGPLACE nodes + // The LIR traversal doesn't visit GT_LIST or GT_ARGPLACE nodes. + // GT_CLS_VAR nodes should have been eliminated by rationalizer. assert(tree->OperGet() != GT_ARGPLACE); - assert((tree->OperGet() != GT_LIST) || tree->AsArgList()->IsAggregate()); + assert(tree->OperGet() != GT_LIST); + assert(tree->OperGet() != GT_CLS_VAR); - // These nodes are eliminated by the Rationalizer. - if (tree->OperGet() == GT_CLS_VAR) - { - JITDUMP("Unexpected node %s in LSRA.\n", GenTree::NodeName(tree->OperGet())); - assert(!"Unexpected node in LSRA."); - } + // The LIR traversal visits only the first node in a GT_FIELD_LIST. + assert((tree->OperGet() != GT_FIELD_LIST) || tree->AsFieldList()->IsFieldListHead()); // The set of internal temporary registers used by this node are stored in the // gtRsvdRegs register mask. Clear it out. @@ -3409,7 +3595,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, { // Get the location info for the register defined by the first operand. LocationInfoList operandDefs; - bool found = operandToLocationInfoMap.TryGetValue(*(tree->OperandsBegin()), &operandDefs); + bool found = operandToLocationInfoMap.TryGetValue(*(tree->OperandsBegin()), &operandDefs); assert(found); // Since we only expect to consume one register, we should only have a single register to @@ -3503,7 +3689,51 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, // (i.e. the target is read-modify-write), preference the dst to op1. bool hasDelayFreeSrc = tree->gtLsraInfo.hasDelayFreeSrc; - if (tree->OperGet() == GT_PUTARG_REG && isCandidateLocalRef(tree->gtGetOp1()) && + +#if defined(DEBUG) && defined(_TARGET_X86_) + // On x86, `LSRA_LIMIT_CALLER` is too restrictive to allow the use of special put args: this stress mode + // leaves only three registers allocatable--eax, ecx, and edx--of which the latter two are also used for the + // first two integral arguments to a call. This can leave us with too few registers to succesfully allocate in + // situations like the following: + // + // t1026 = lclVar ref V52 tmp35 u:3 REG NA <l:$3a1, c:$98d> + // + // /--* t1026 ref + // t1352 = * putarg_reg ref REG NA + // + // t342 = lclVar int V14 loc6 u:4 REG NA $50c + // + // t343 = const int 1 REG NA $41 + // + // /--* t342 int + // +--* t343 int + // t344 = * + int REG NA $495 + // + // t345 = lclVar int V04 arg4 u:2 REG NA $100 + // + // /--* t344 int + // +--* t345 int + // t346 = * % int REG NA $496 + // + // /--* t346 int + // t1353 = * putarg_reg int REG NA + // + // t1354 = lclVar ref V52 tmp35 (last use) REG NA + // + // /--* t1354 ref + // t1355 = * lea(b+0) byref REG NA + // + // Here, the first `putarg_reg` would normally be considered a special put arg, which would remove `ecx` from the + // set of allocatable registers, leaving only `eax` and `edx`. The allocator will then fail to allocate a register + // for the def of `t345` if arg4 is not a register candidate: the corresponding ref position will be constrained to + // { `ecx`, `ebx`, `esi`, `edi` }, which `LSRA_LIMIT_CALLER` will further constrain to `ecx`, which will not be + // available due to the special put arg. + const bool supportsSpecialPutArg = getStressLimitRegs() != LSRA_LIMIT_CALLER; +#else + const bool supportsSpecialPutArg = true; +#endif + + if (supportsSpecialPutArg && tree->OperGet() == GT_PUTARG_REG && isCandidateLocalRef(tree->gtGetOp1()) && (tree->gtGetOp1()->gtFlags & GTF_VAR_DEATH) == 0) { // This is the case for a "pass-through" copy of a lclVar. In the case where it is a non-last-use, @@ -3525,9 +3755,17 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, RefPosition* internalRefs[MaxInternalRegisters]; +#ifdef DEBUG + // Number of registers required for tree node is the sum of + // consume + produce + internalCount. This is the minimum + // set of registers that needs to be ensured in candidate + // set of ref positions created. + unsigned minRegCount = consume + produce + info.internalIntCount + info.internalFloatCount; +#endif // DEBUG + // make intervals for all the 'internal' register requirements for this node // where internal means additional registers required temporarily - int internalCount = buildInternalRegisterDefsForNode(tree, currentLoc, internalRefs); + int internalCount = buildInternalRegisterDefsForNode(tree, currentLoc, internalRefs DEBUG_ARG(minRegCount)); // pop all ref'd tree temps GenTreeOperandIterator iterator = tree->OperandsBegin(); @@ -3632,6 +3870,37 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, candidates = fixedAssignment; } +#ifdef DEBUG + // If delayRegFree, then Use will interfere with the destination of + // the consuming node. Therefore, we also need add the kill set of + // consuming node to minRegCount. + // + // For example consider the following IR on x86, where v01 and v02 + // are method args coming in ecx and edx respectively. + // GT_DIV(v01, v02) + // + // For GT_DIV minRegCount will be 3 without adding kill set + // of GT_DIV node. + // + // Assume further JitStressRegs=2, which would constrain + // candidates to callee trashable regs { eax, ecx, edx } on + // use positions of v01 and v02. LSRA allocates ecx for v01. + // Use position of v02 cannot be allocated a regs since it + // is marked delay-reg free and {eax,edx} are getting killed + // before the def of GT_DIV. For this reason, minRegCount + // for Use position of v02 also needs to take into account + // of kill set of its consuming node. + unsigned minRegCountForUsePos = minRegCount; + if (delayRegFree) + { + regMaskTP killMask = getKillSetForNode(tree); + if (killMask != RBM_NONE) + { + minRegCountForUsePos += genCountBits(killMask); + } + } +#endif // DEBUG + RefPosition* pos; if ((candidates & allRegs(i->registerType)) == 0) { @@ -3645,13 +3914,16 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, regNumber physicalReg = genRegNumFromMask(fixedAssignment); RefPosition* pos = newRefPosition(physicalReg, currentLoc, RefTypeFixedReg, nullptr, fixedAssignment); } - pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, allRegs(i->registerType), multiRegIdx); + pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, allRegs(i->registerType), + multiRegIdx DEBUG_ARG(minRegCountForUsePos)); pos->registerAssignment = candidates; } else { - pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, candidates, multiRegIdx); + pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, candidates, + multiRegIdx DEBUG_ARG(minRegCountForUsePos)); } + if (delayRegFree) { hasDelayFreeSrc = true; @@ -3675,7 +3947,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, listNodePool.ReturnNodes(operandDefs); } - buildInternalRegisterUsesForNode(tree, currentLoc, internalRefs, internalCount); + buildInternalRegisterUsesForNode(tree, currentLoc, internalRefs, internalCount DEBUG_ARG(minRegCount)); RegisterType registerType = getDefType(tree); regMaskTP candidates = getDefCandidates(tree); @@ -3708,7 +3980,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, { // Build RefPositions for saving any live large vectors. // This must be done after the kills, so that we know which large vectors are still live. - VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc)); + VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc + 1)); } #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE @@ -3779,7 +4051,8 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, locationInfoList.Append(listNodePool.GetNode(defLocation, interval, tree, (unsigned)i)); } - RefPosition* pos = newRefPosition(interval, defLocation, defRefType, defNode, currCandidates, (unsigned)i); + RefPosition* pos = newRefPosition(interval, defLocation, defRefType, defNode, currCandidates, + (unsigned)i DEBUG_ARG(minRegCount)); if (info.isLocalDefUse) { pos->isLocalDefUse = true; @@ -3791,11 +4064,12 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, } #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - buildUpperVectorRestoreRefPositions(tree, currentLoc, liveLargeVectors); + // SaveDef position must be at the same location as Def position of call node. + buildUpperVectorRestoreRefPositions(tree, defLocation, liveLargeVectors); #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE - bool isContainedNode = - !noAdd && consume == 0 && produce == 0 && (tree->OperIsAggregate() || (tree->TypeGet() != TYP_VOID && !tree->OperIsStore())); + bool isContainedNode = !noAdd && consume == 0 && produce == 0 && + (tree->OperIsFieldListHead() || ((tree->TypeGet() != TYP_VOID) && !tree->OperIsStore())); if (isContainedNode) { // Contained nodes map to the concatenated lists of their operands. @@ -3852,6 +4126,22 @@ BasicBlock* getNonEmptyBlock(BasicBlock* block) return block; } +//------------------------------------------------------------------------ +// insertZeroInitRefPositions: Handle lclVars that are live-in to the first block +// +// Notes: +// For each lclVar that is live-in to the first block: +// - If it is a GC ref, or if compInitMem is set, a ZeroInit RefPosition will be created. +// - Otherwise, it will be marked as spilled, since it will not be assigned a register +// on entry and will be loaded from memory on the undefined path. +// Note that, when the compInitMem option is not set, we may encounter these on +// paths that are protected by the same condition as an earlier def. However, since +// we don't do the analysis to determine this - and couldn't rely on always identifying +// such cases even if we tried - we must conservatively treat the undefined path as +// being possible. This is a relatively rare case, so the introduced conservatism is +// not expected to warrant the analysis required to determine the best placement of +// an initialization. +// void LinearScan::insertZeroInitRefPositions() { // insert defs for this, then a block boundary @@ -3861,15 +4151,23 @@ void LinearScan::insertZeroInitRefPositions() { unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; LclVarDsc* varDsc = compiler->lvaTable + varNum; - if (!varDsc->lvIsParam && isCandidateVar(varDsc) && - (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet()))) + if (!varDsc->lvIsParam && isCandidateVar(varDsc)) { - GenTree* firstNode = getNonEmptyBlock(compiler->fgFirstBB)->firstNode(); - JITDUMP("V%02u was live in\n", varNum); - Interval* interval = getIntervalForLocalVar(varNum); - RefPosition* pos = - newRefPosition(interval, MinLocation, RefTypeZeroInit, firstNode, allRegs(interval->registerType)); - varDsc->lvMustInit = true; + JITDUMP("V%02u was live in to first block:", varNum); + Interval* interval = getIntervalForLocalVar(varNum); + if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet())) + { + JITDUMP(" creating ZeroInit\n"); + GenTree* firstNode = getNonEmptyBlock(compiler->fgFirstBB)->firstNode(); + RefPosition* pos = + newRefPosition(interval, MinLocation, RefTypeZeroInit, firstNode, allRegs(interval->registerType)); + varDsc->lvMustInit = true; + } + else + { + setIntervalAsSpilled(interval); + JITDUMP(" marking as spilled\n"); + } } } } @@ -4131,8 +4429,20 @@ void LinearScan::buildIntervals() } #endif // DEBUG +#if DOUBLE_ALIGN + // We will determine whether we should double align the frame during + // identifyCandidates(), but we initially assume that we will not. + doDoubleAlign = false; +#endif + identifyCandidates(); + // Figure out if we're going to use a frame pointer. We need to do this before building + // the ref positions, because those objects will embed the frame register in various register masks + // if the frame pointer is not reserved. If we decide to have a frame pointer, setFrameType() will + // remove the frame pointer from the masks. + setFrameType(); + DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_PRE)); // second part: @@ -4263,6 +4573,9 @@ void LinearScan::buildIntervals() insertZeroInitRefPositions(); } + // Any lclVars live-in to a block are resolution candidates. + VarSetOps::UnionD(compiler, resolutionCandidateVars, block->bbLiveIn); + // Determine if we need any DummyDefs. // We need DummyDefs for cases where "predBlock" isn't really a predecessor. // Note that it's possible to have uses of unitialized variables, in which case even the first @@ -4274,8 +4587,8 @@ void LinearScan::buildIntervals() VARSET_TP VARSET_INIT(compiler, newLiveIn, block->bbLiveIn); if (predBlock) { - JITDUMP("\n\nSetting incoming variable registers of BB%02u to outVarToRegMap of BB%02u\n", block->bbNum, - predBlock->bbNum); + JITDUMP("\n\nSetting BB%02u as the predecessor for determining incoming variable registers of BB%02u\n", + block->bbNum, predBlock->bbNum); assert(predBlock->bbNum <= bbNumMaxBeforeResolution); blockInfo[block->bbNum].predBBNum = predBlock->bbNum; // Compute set difference: newLiveIn = block->bbLiveIn - predBlock->bbLiveOut @@ -4534,7 +4847,16 @@ void LinearScan::validateIntervals() void LinearScan::setFrameType() { FrameType frameType = FT_NOT_SET; - if (compiler->codeGen->isFramePointerRequired()) +#if DOUBLE_ALIGN + compiler->codeGen->setDoubleAlign(false); + if (doDoubleAlign) + { + frameType = FT_DOUBLE_ALIGN_FRAME; + compiler->codeGen->setDoubleAlign(true); + } + else +#endif // DOUBLE_ALIGN + if (compiler->codeGen->isFramePointerRequired()) { frameType = FT_EBP_FRAME; } @@ -4563,22 +4885,6 @@ void LinearScan::setFrameType() } } -#if DOUBLE_ALIGN - // The DOUBLE_ALIGN feature indicates whether the JIT will attempt to double-align the - // frame if needed. Note that this feature isn't on for amd64, because the stack is - // always double-aligned by default. - compiler->codeGen->setDoubleAlign(false); - - // TODO-CQ: Tune this (see regalloc.cpp, in which raCntWtdStkDblStackFP is used to - // determine whether to double-align). Note, though that there is at least one test - // (jit\opt\Perf\DoubleAlign\Locals.exe) that depends on double-alignment being set - // in certain situations. - if (!compiler->opts.MinOpts() && !compiler->codeGen->isFramePointerRequired() && compiler->compFloatingPointUsed) - { - frameType = FT_DOUBLE_ALIGN_FRAME; - } -#endif // DOUBLE_ALIGN - switch (frameType) { case FT_ESP_FRAME: @@ -4593,7 +4899,6 @@ void LinearScan::setFrameType() case FT_DOUBLE_ALIGN_FRAME: noway_assert(!compiler->codeGen->isFramePointerRequired()); compiler->codeGen->setFramePointerUsed(false); - compiler->codeGen->setDoubleAlign(true); break; #endif // DOUBLE_ALIGN default: @@ -4625,11 +4930,11 @@ void LinearScan::setFrameType() compiler->rpFrameType = frameType; } -// Is the copyReg given by this RefPosition still busy at the +// Is the copyReg/moveReg given by this RefPosition still busy at the // given location? -bool copyRegInUse(RefPosition* ref, LsraLocation loc) +bool copyOrMoveRegInUse(RefPosition* ref, LsraLocation loc) { - assert(ref->copyReg); + assert(ref->copyReg || ref->moveReg); if (ref->getRefEndLocation() >= loc) { return true; @@ -4689,14 +4994,15 @@ bool LinearScan::registerIsAvailable(RegRecord* physRegRecord, return false; } - // Is this a copyReg? It is if the register assignment doesn't match. - // (the recentReference may not be a copyReg, because we could have seen another - // reference since the copyReg) + // Is this a copyReg/moveReg? It is if the register assignment doesn't match. + // (the recentReference may not be a copyReg/moveReg, because we could have seen another + // reference since the copyReg/moveReg) if (!assignedInterval->isAssignedTo(physRegRecord->regNum)) { // Don't reassign it if it's still in use - if (recentReference->copyReg && copyRegInUse(recentReference, currentLoc)) + if ((recentReference->copyReg || recentReference->moveReg) && + copyOrMoveRegInUse(recentReference, currentLoc)) { return false; } @@ -5393,8 +5699,17 @@ regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPositio // to remain live until the use, we should set the candidates to allRegs(regType) // to avoid a spill - codegen can then insert the copy. assert(candidates == candidateBit); - physRegNextLocation = MaxLocation; - farthestRefPosWeight = BB_MAX_WEIGHT; + + // If a refPosition has a fixed reg as its candidate and is also marked + // as allocateIfProfitable, we should allocate fixed reg only if the + // weight of this ref position is greater than the weight of the ref + // position to which fixed reg is assigned. Such a case would arise + // on x86 under LSRA stress. + if (!allocateIfProfitable) + { + physRegNextLocation = MaxLocation; + farthestRefPosWeight = BB_MAX_WEIGHT; + } } else { @@ -5487,13 +5802,14 @@ regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPositio } } - LsraLocation nextLocation = assignedInterval->getNextRefLocation(); + RefPosition* nextRefPosition = assignedInterval->getNextRefPosition(); + LsraLocation nextLocation = assignedInterval->getNextRefLocation(); // We should never spill a register that's occupied by an Interval with its next use at the current location. // Normally this won't occur (unless we actually had more uses in a single node than there are registers), // because we'll always find something with a later nextLocation, but it can happen in stress when // we have LSRA_SELECT_NEAREST. - if ((nextLocation == refLocation) && !refPosition->isFixedRegRef) + if ((nextLocation == refLocation) && !refPosition->isFixedRegRef && nextRefPosition->RequiresRegister()) { continue; } @@ -5578,7 +5894,17 @@ regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPositio else { // Must have found a spill candidate. - assert((farthestRefPhysRegRecord != nullptr) && (farthestLocation > refLocation || refPosition->isFixedRegRef)); + assert(farthestRefPhysRegRecord != nullptr); + if ((farthestLocation == refLocation) && !refPosition->isFixedRegRef) + { + Interval* assignedInterval = farthestRefPhysRegRecord->assignedInterval; + RefPosition* nextRefPosition = assignedInterval->getNextRefPosition(); + assert(!nextRefPosition->RequiresRegister()); + } + else + { + assert(farthestLocation > refLocation || refPosition->isFixedRegRef); + } } #endif @@ -5699,6 +6025,70 @@ void LinearScan::assignPhysReg(RegRecord* regRec, Interval* interval) } //------------------------------------------------------------------------ +// setIntervalAsSplit: Set this Interval as being split +// +// Arguments: +// interval - The Interval which is being split +// +// Return Value: +// None. +// +// Notes: +// The given Interval will be marked as split, and it will be added to the +// set of splitOrSpilledVars. +// +// Assumptions: +// "interval" must be a lclVar interval, as tree temps are never split. +// This is asserted in the call to getVarIndex(). +// +void LinearScan::setIntervalAsSplit(Interval* interval) +{ + if (interval->isLocalVar) + { + unsigned varIndex = interval->getVarIndex(compiler); + if (!interval->isSplit) + { + VarSetOps::AddElemD(compiler, splitOrSpilledVars, varIndex); + } + else + { + assert(VarSetOps::IsMember(compiler, splitOrSpilledVars, varIndex)); + } + } + interval->isSplit = true; +} + +//------------------------------------------------------------------------ +// setIntervalAsSpilled: Set this Interval as being spilled +// +// Arguments: +// interval - The Interval which is being spilled +// +// Return Value: +// None. +// +// Notes: +// The given Interval will be marked as spilled, and it will be added +// to the set of splitOrSpilledVars. +// +void LinearScan::setIntervalAsSpilled(Interval* interval) +{ + if (interval->isLocalVar) + { + unsigned varIndex = interval->getVarIndex(compiler); + if (!interval->isSpilled) + { + VarSetOps::AddElemD(compiler, splitOrSpilledVars, varIndex); + } + else + { + assert(VarSetOps::IsMember(compiler, splitOrSpilledVars, varIndex)); + } + } + interval->isSpilled = true; +} + +//------------------------------------------------------------------------ // spill: Spill this Interval between "fromRefPosition" and "toRefPosition" // // Arguments: @@ -5739,8 +6129,10 @@ void LinearScan::spillInterval(Interval* interval, RefPosition* fromRefPosition, } #endif // DEBUG - interval->isActive = false; - interval->isSpilled = true; + INTRACK_STATS(updateLsraStat(LSRA_STAT_SPILL, fromRefPosition->bbNum)); + + interval->isActive = false; + setIntervalAsSpilled(interval); // If fromRefPosition occurs before the beginning of this block, mark this as living in the stack // on entry to this block. @@ -5923,7 +6315,7 @@ void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPositio setInVarRegForBB(curBBNum, assignedInterval->varNum, REG_STK); if (spillRefPosition->nextRefPosition != nullptr) { - assignedInterval->isSpilled = true; + setIntervalAsSpilled(assignedInterval); } } else @@ -5945,7 +6337,8 @@ void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPositio { assignedInterval->assignedReg = regRec; } - else if (regRec->previousInterval != nullptr && regRec->previousInterval->assignedReg == regRec && + else if (regRec->previousInterval != nullptr && regRec->previousInterval != assignedInterval && + regRec->previousInterval->assignedReg == regRec && regRec->previousInterval->getNextRefPosition() != nullptr) { regRec->assignedInterval = regRec->previousInterval; @@ -6128,7 +6521,14 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock, bool alloc if (allocationPass) { targetReg = predVarToRegMap[varIndex]; - INDEBUG(targetReg = rotateBlockStartLocation(interval, targetReg, (~liveRegs | inactiveRegs))); +#ifdef DEBUG + regNumber newTargetReg = rotateBlockStartLocation(interval, targetReg, (~liveRegs | inactiveRegs)); + if (newTargetReg != targetReg) + { + targetReg = newTargetReg; + setIntervalAsSplit(interval); + } +#endif // DEBUG inVarToRegMap[varIndex] = targetReg; } else // !allocationPass (i.e. resolution/write-back pass) @@ -6686,6 +7086,7 @@ void LinearScan::allocateRegisters() INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_ENTRY_REG_ALLOCATED, currentInterval)); didDump = true; allocate = false; + setIntervalAsSpilled(currentInterval); } // If it has no actual references, mark it as "lastUse"; since they're not actually part // of any flow they won't have been marked during dataflow. Otherwise, if we allocate a @@ -6912,6 +7313,7 @@ void LinearScan::allocateRegisters() } currentRefPosition->moveReg = true; assignedRegister = REG_NA; + setIntervalAsSplit(currentInterval); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_MOVE_REG, currentInterval, assignedRegister)); } else if ((genRegMask(assignedRegister) & currentRefPosition->registerAssignment) != 0) @@ -6936,65 +7338,47 @@ void LinearScan::allocateRegisters() } else { - // This must be a localVar or a single-reg fixed use or a tree temp with conflicting def & use. - - assert(currentInterval && (currentInterval->isLocalVar || currentRefPosition->isFixedRegRef || - currentInterval->hasConflictingDefUse)); + assert(currentInterval != nullptr); // It's already in a register, but not one we need. - // If it is a fixed use that is not marked "delayRegFree", there is already a FixedReg to ensure that - // the needed reg is not otherwise in use, so we can simply ignore it and codegen will do the copy. - // The reason we need special handling for the "delayRegFree" case is that we need to mark the - // fixed-reg as in-use and delayed (the FixedReg RefPosition doesn't handle the delay requirement). - // Otherwise, if this is a pure use localVar or tree temp, we assign a copyReg, but must free both regs - // if it is a last use. - if (!currentRefPosition->isFixedRegRef || currentRefPosition->delayRegFree) - { - if (!RefTypeIsDef(currentRefPosition->refType)) + if (!RefTypeIsDef(currentRefPosition->refType)) + { + regNumber copyReg = assignCopyReg(currentRefPosition); + assert(copyReg != REG_NA); + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_COPY_REG, currentInterval, copyReg)); + lastAllocatedRefPosition = currentRefPosition; + if (currentRefPosition->lastUse) { - regNumber copyReg = assignCopyReg(currentRefPosition); - assert(copyReg != REG_NA); - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_COPY_REG, currentInterval, copyReg)); - lastAllocatedRefPosition = currentRefPosition; - if (currentRefPosition->lastUse) + if (currentRefPosition->delayRegFree) { - if (currentRefPosition->delayRegFree) - { - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED, currentInterval, - assignedRegister)); - delayRegsToFree |= - (genRegMask(assignedRegister) | currentRefPosition->registerAssignment); - } - else - { - INDEBUG( - dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE, currentInterval, assignedRegister)); - regsToFree |= (genRegMask(assignedRegister) | currentRefPosition->registerAssignment); - } + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED, currentInterval, + assignedRegister)); + delayRegsToFree |= (genRegMask(assignedRegister) | currentRefPosition->registerAssignment); } - // If this is a tree temp (non-localVar) interval, we will need an explicit move. - if (!currentInterval->isLocalVar) + else { - currentRefPosition->moveReg = true; - currentRefPosition->copyReg = false; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE, currentInterval, assignedRegister)); + regsToFree |= (genRegMask(assignedRegister) | currentRefPosition->registerAssignment); } - continue; } - else + // If this is a tree temp (non-localVar) interval, we will need an explicit move. + if (!currentInterval->isLocalVar) { - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NEEDS_NEW_REG, nullptr, assignedRegister)); - regsToFree |= genRegMask(assignedRegister); - // We want a new register, but we don't want this to be considered a spill. - assignedRegister = REG_NA; - if (physRegRecord->assignedInterval == currentInterval) - { - unassignPhysRegNoSpill(physRegRecord); - } + currentRefPosition->moveReg = true; + currentRefPosition->copyReg = false; } + continue; } else { - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, assignedRegister)); + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NEEDS_NEW_REG, nullptr, assignedRegister)); + regsToFree |= genRegMask(assignedRegister); + // We want a new register, but we don't want this to be considered a spill. + assignedRegister = REG_NA; + if (physRegRecord->assignedInterval == currentInterval) + { + unassignPhysRegNoSpill(physRegRecord); + } } } } @@ -7031,23 +7415,39 @@ void LinearScan::allocateRegisters() // then find a register to spill if (assignedRegister == REG_NA) { -#ifdef FEATURE_SIMD +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE if (refType == RefTypeUpperVectorSaveDef) { // TODO-CQ: Determine whether copying to two integer callee-save registers would be profitable. - currentRefPosition->registerAssignment = (allRegs(TYP_FLOAT) & RBM_FLT_CALLEE_TRASH); - assignedRegister = tryAllocateFreeReg(currentInterval, currentRefPosition); + + // SaveDef position occurs after the Use of args and at the same location as Kill/Def + // positions of a call node. But SaveDef position cannot use any of the arg regs as + // they are needed for call node. + currentRefPosition->registerAssignment = + (allRegs(TYP_FLOAT) & RBM_FLT_CALLEE_TRASH & ~RBM_FLTARG_REGS); + assignedRegister = tryAllocateFreeReg(currentInterval, currentRefPosition); + // There MUST be caller-save registers available, because they have all just been killed. + // Amd64 Windows: xmm4-xmm5 are guaranteed to be available as xmm0-xmm3 are used for passing args. + // Amd64 Unix: xmm8-xmm15 are guaranteed to be avilable as xmm0-xmm7 are used for passing args. + // X86 RyuJIT Windows: xmm4-xmm7 are guanrateed to be available. assert(assignedRegister != REG_NA); + // Now, spill it. - // (These will look a bit backward in the dump, but it's a pain to dump the alloc before the spill). + // Note: + // i) The reason we have to spill is that SaveDef position is allocated after the Kill positions + // of the call node are processed. Since callee-trash registers are killed by call node + // we explicity spill and unassign the register. + // ii) These will look a bit backward in the dump, but it's a pain to dump the alloc before the + // spill). unassignPhysReg(getRegisterRecord(assignedRegister), currentRefPosition); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_REG, currentInterval, assignedRegister)); + // Now set assignedRegister to REG_NA again so that we don't re-activate it. assignedRegister = REG_NA; } else -#endif // FEATURE_SIMD +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE if (currentRefPosition->RequiresRegister() || currentRefPosition->AllocateIfProfitable()) { if (allocateReg) @@ -7069,6 +7469,7 @@ void LinearScan::allocateRegisters() currentRefPosition->registerAssignment = RBM_NONE; currentRefPosition->reload = false; + setIntervalAsSpilled(currentInterval); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval)); } @@ -7078,6 +7479,7 @@ void LinearScan::allocateRegisters() INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval)); currentRefPosition->registerAssignment = RBM_NONE; currentInterval->isActive = false; + setIntervalAsSpilled(currentInterval); } } #ifdef DEBUG @@ -7224,7 +7626,7 @@ void LinearScan::allocateRegisters() // - interval->physReg is set to the assigned register // (i.e. at the code location which is currently being handled by resolveRegisters()) // - interval->isActive is true iff the interval is live and occupying a register -// - interval->isSpilled is set to true if the interval is EVER spilled +// - interval->isSpilled should have already been set to true if the interval is EVER spilled // - interval->isSplit is set to true if the interval does not occupy the same // register throughout the method // - RegRecord->assignedInterval points to the interval which currently occupies @@ -7264,9 +7666,9 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosi if (currentRefPosition->registerAssignment == RBM_NONE) { assert(!currentRefPosition->RequiresRegister()); + assert(interval->isSpilled); - interval->isSpilled = true; - varDsc->lvRegNum = REG_STK; + varDsc->lvRegNum = REG_STK; if (interval->assignedReg != nullptr && interval->assignedReg->assignedInterval == interval) { interval->assignedReg->assignedInterval = nullptr; @@ -7314,8 +7716,10 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosi // In the reload case we simply do not set GTF_REG_VAL, and it gets // referenced from the variable's home location. // This is also true for a pure def which is spilled. - if (reload && currentRefPosition->refType != RefTypeDef) + if (reload) { + assert(currentRefPosition->refType != RefTypeDef); + assert(interval->isSpilled); varDsc->lvRegNum = REG_STK; if (!spillAfter) { @@ -7353,31 +7757,15 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosi { assert(currentRefPosition->refType == RefTypeExpUse); } - - // If we have an undefined use set it as non-reg - if (!interval->isSpilled) - { - if (varDsc->lvIsParam && !varDsc->lvIsRegArg && currentRefPosition == interval->firstRefPosition) - { - // Parameters are the only thing that can be used before defined - } - else - { - // if we see a use before def of something else, the zero init flag better not be set. - noway_assert(!compiler->info.compInitMem); - // if it is not set, then the behavior is undefined but we don't want to crash or assert - interval->isSpilled = true; - } - } } else if (spillAfter && !RefTypeIsUse(currentRefPosition->refType)) { // In the case of a pure def, don't bother spilling - just assign it to the // stack. However, we need to remember that it was spilled. - interval->isSpilled = true; - varDsc->lvRegNum = REG_STK; - interval->physReg = REG_NA; + assert(interval->isSpilled); + varDsc->lvRegNum = REG_STK; + interval->physReg = REG_NA; if (treeNode != nullptr) { treeNode->gtRegNum = REG_NA; @@ -7409,6 +7797,7 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosi } else { + assert(interval->isSplit); interval->physReg = assignedReg; } @@ -7426,13 +7815,11 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosi { if (varDsc->lvRegNum != REG_STK) { - // If the register assignments don't match, then this interval is spilt, - // but not spilled (yet) - // However, we don't have a single register assignment now + // If the register assignments don't match, then this interval is split. if (varDsc->lvRegNum != assignedReg) { - interval->isSplit = TRUE; - varDsc->lvRegNum = REG_STK; + setIntervalAsSplit(interval); + varDsc->lvRegNum = REG_STK; } } else @@ -7447,9 +7834,9 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosi { treeNode->gtFlags |= GTF_SPILL; } - interval->isSpilled = true; - interval->physReg = REG_NA; - varDsc->lvRegNum = REG_STK; + assert(interval->isSpilled); + interval->physReg = REG_NA; + varDsc->lvRegNum = REG_STK; } // This value is in a register, UNLESS we already saw this treeNode @@ -7489,6 +7876,7 @@ void LinearScan::writeRegisters(RefPosition* currentRefPosition, GenTree* tree) // than the one it was spilled from (GT_RELOAD). // // Arguments: +// block - basic block in which GT_COPY/GT_RELOAD is inserted. // tree - This is the node to copy or reload. // Insert copy or reload node between this node and its parent. // multiRegIdx - register position of tree node for which copy or reload is needed. @@ -7557,6 +7945,10 @@ void LinearScan::insertCopyOrReload(BasicBlock* block, GenTreePtr tree, unsigned else { oper = GT_COPY; + +#if TRACK_LSRA_STATS + updateLsraStat(LSRA_STAT_COPY_REG, block->bbNum); +#endif } // If the parent is a reload/copy node, then tree must be a multi-reg call node @@ -8100,7 +8492,7 @@ void LinearScan::resolveRegisters() { JITDUMP(" internal"); GenTreePtr indNode = nullptr; - if (treeNode->OperIsIndir()) + if (treeNode->OperGet() == GT_IND) { indNode = treeNode; JITDUMP(" allocated at GT_IND"); @@ -8223,6 +8615,11 @@ void LinearScan::resolveRegisters() printf("RESOLVING BB BOUNDARIES\n"); printf("-----------------------\n"); + printf("Resolution Candidates: "); + dumpConvertedVarSet(compiler, resolutionCandidateVars); + printf("\n"); + printf("Has %sCritical Edges\n\n", hasCriticalEdges ? "" : "No"); + printf("Prior to Resolution\n"); foreach_block(compiler, block) { @@ -8282,23 +8679,10 @@ void LinearScan::resolveRegisters() varDsc->lvArgInitReg = initialReg; JITDUMP(" Set V%02u argument initial register to %s\n", lclNum, getRegName(initialReg)); } - if (!varDsc->lvIsRegArg) - { - // stack arg - if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc)) - { - if (sourceReg != initialReg) - { - // The code generator won't initialize struct - // fields, so we have to do that if it's not already - // where it belongs. - assert(interval->isStructField); - JITDUMP(" Move struct field param V%02u from %s to %s\n", lclNum, getRegName(sourceReg), - getRegName(initialReg)); - insertMove(insertionBlock, insertionPoint, lclNum, sourceReg, initialReg); - } - } - } + + // Stack args that are part of dependently-promoted structs should never be register candidates (see + // LinearScan::isRegCandidate). + assert(varDsc->lvIsRegArg || !compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc)); } // If lvRegNum is REG_STK, that means that either no register @@ -8347,8 +8731,8 @@ void LinearScan::resolveRegisters() } if (firstRefPosition->registerAssignment == RBM_NONE || firstRefPosition->spillAfter) { - // Either this RefPosition is spilled, or it is not a "real" def or use - assert(firstRefPosition->spillAfter || + // Either this RefPosition is spilled, or regOptional or it is not a "real" def or use + assert(firstRefPosition->spillAfter || firstRefPosition->AllocateIfProfitable() || (firstRefPosition->refType != RefTypeDef && firstRefPosition->refType != RefTypeUse)); varDsc->lvRegNum = REG_STK; } @@ -8432,6 +8816,8 @@ void LinearScan::insertMove( BasicBlock* block, GenTreePtr insertionPoint, unsigned lclNum, regNumber fromReg, regNumber toReg) { LclVarDsc* varDsc = compiler->lvaTable + lclNum; + // the lclVar must be a register candidate + assert(isRegCandidate(varDsc)); // One or both MUST be a register assert(fromReg != REG_STK || toReg != REG_STK); // They must not be the same register. @@ -8440,20 +8826,22 @@ void LinearScan::insertMove( // This var can't be marked lvRegister now varDsc->lvRegNum = REG_STK; - var_types lclTyp = varDsc->TypeGet(); - if (varDsc->lvNormalizeOnStore()) - { - lclTyp = genActualType(lclTyp); - } - GenTreePtr src = compiler->gtNewLclvNode(lclNum, lclTyp); + GenTreePtr src = compiler->gtNewLclvNode(lclNum, varDsc->TypeGet()); src->gtLsraInfo.isLsraAdded = true; - GenTreePtr top; - // If we are moving from STK to reg, mark the lclVar nodes with GTF_SPILLED - // Otherwise, if we are moving from reg to stack, mark it as GTF_SPILL - // Finally, for a reg-to-reg move, generate a GT_COPY + // There are three cases we need to handle: + // - We are loading a lclVar from the stack. + // - We are storing a lclVar to the stack. + // - We are copying a lclVar between registers. + // + // In the first and second cases, the lclVar node will be marked with GTF_SPILLED and GTF_SPILL, respectively. + // It is up to the code generator to ensure that any necessary normalization is done when loading or storing the + // lclVar's value. + // + // In the third case, we generate GT_COPY(GT_LCL_VAR) and type each node with the normalized type of the lclVar. + // This is safe because a lclVar is always normalized once it is in a register. - top = src; + GenTree* dst = src; if (fromReg == REG_STK) { src->gtFlags |= GTF_SPILLED; @@ -8467,21 +8855,22 @@ void LinearScan::insertMove( } else { - top = new (compiler, GT_COPY) GenTreeCopyOrReload(GT_COPY, varDsc->TypeGet(), src); + var_types movType = genActualType(varDsc->TypeGet()); + src->gtType = movType; + + dst = new (compiler, GT_COPY) GenTreeCopyOrReload(GT_COPY, movType, src); // This is the new home of the lclVar - indicate that by clearing the GTF_VAR_DEATH flag. // Note that if src is itself a lastUse, this will have no effect. - top->gtFlags &= ~(GTF_VAR_DEATH); + dst->gtFlags &= ~(GTF_VAR_DEATH); src->gtRegNum = fromReg; src->SetInReg(); - top->gtRegNum = toReg; - src->gtNext = top; - top->gtPrev = src; + dst->gtRegNum = toReg; src->gtLsraInfo.isLocalDefUse = false; - top->gtLsraInfo.isLsraAdded = true; + dst->gtLsraInfo.isLsraAdded = true; } - top->gtLsraInfo.isLocalDefUse = true; + dst->gtLsraInfo.isLocalDefUse = true; - LIR::Range treeRange = LIR::SeqTree(compiler, top); + LIR::Range treeRange = LIR::SeqTree(compiler, dst); LIR::Range& blockRange = LIR::AsRange(block); if (insertionPoint != nullptr) @@ -8497,7 +8886,7 @@ void LinearScan::insertMove( noway_assert(!blockRange.IsEmpty()); GenTree* branch = blockRange.LastNode(); - assert(branch->OperGet() == GT_JTRUE || branch->OperGet() == GT_SWITCH_TABLE || + assert(branch->OperIsConditionalJump() || branch->OperGet() == GT_SWITCH_TABLE || branch->OperGet() == GT_SWITCH); blockRange.InsertBefore(branch, std::move(treeRange)); @@ -8568,7 +8957,7 @@ void LinearScan::insertSwap( noway_assert(!blockRange.IsEmpty()); GenTree* branch = blockRange.LastNode(); - assert(branch->OperGet() == GT_JTRUE || branch->OperGet() == GT_SWITCH_TABLE || + assert(branch->OperIsConditionalJump() || branch->OperGet() == GT_SWITCH_TABLE || branch->OperGet() == GT_SWITCH); blockRange.InsertBefore(branch, std::move(swapRange)); @@ -8682,12 +9071,15 @@ void LinearScan::addResolution( insertMove(block, insertionPoint, interval->varNum, fromReg, toReg); if (fromReg == REG_STK || toReg == REG_STK) { - interval->isSpilled = true; + assert(interval->isSpilled); } else { - interval->isSplit = true; + // We should have already marked this as spilled or split. + assert((interval->isSpilled) || (interval->isSplit)); } + + INTRACK_STATS(updateLsraStat(LSRA_STAT_RESOLUTION_MOV, block->bbNum)); } //------------------------------------------------------------------------ @@ -8706,6 +9098,12 @@ void LinearScan::addResolution( void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) { + VARSET_TP VARSET_INIT_NOCOPY(outResolutionSet, + VarSetOps::Intersection(compiler, block->bbLiveOut, resolutionCandidateVars)); + if (VarSetOps::IsEmpty(compiler, outResolutionSet)) + { + return; + } VARSET_TP VARSET_INIT_NOCOPY(sameResolutionSet, VarSetOps::MakeEmpty(compiler)); VARSET_TP VARSET_INIT_NOCOPY(sameLivePathsSet, VarSetOps::MakeEmpty(compiler)); VARSET_TP VARSET_INIT_NOCOPY(singleTargetSet, VarSetOps::MakeEmpty(compiler)); @@ -8720,6 +9118,8 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) // First, determine the live regs at the end of this block so that we know what regs are // available to copy into. + // Note that for this purpose we use the full live-out set, because we must ensure that + // even the registers that remain the same across the edge are preserved correctly. regMaskTP liveOutRegs = RBM_NONE; VARSET_ITER_INIT(compiler, iter1, block->bbLiveOut, varIndex1); while (iter1.NextElem(compiler, &varIndex1)) @@ -8755,7 +9155,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) regMaskTP sameWriteRegs = RBM_NONE; regMaskTP diffReadRegs = RBM_NONE; - // For each var, classify them as: + // For each var that may require resolution, classify them as: // - in the same register at the end of this block and at each target (no resolution needed) // - in different registers at different targets (resolve separately): // diffResolutionSet @@ -8764,7 +9164,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) // write to any registers that are read by those in the diffResolutionSet: // sameResolutionSet - VARSET_ITER_INIT(compiler, iter, block->bbLiveOut, varIndex); + VARSET_ITER_INIT(compiler, iter, outResolutionSet, varIndex); while (iter.NextElem(compiler, &varIndex)) { unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; @@ -8936,6 +9336,16 @@ void LinearScan::resolveEdges() { JITDUMP("RESOLVING EDGES\n"); + // The resolutionCandidateVars set was initialized with all the lclVars that are live-in to + // any block. We now intersect that set with any lclVars that ever spilled or split. + // If there are no candidates for resoultion, simply return. + + VarSetOps::IntersectionD(compiler, resolutionCandidateVars, splitOrSpilledVars); + if (VarSetOps::IsEmpty(compiler, resolutionCandidateVars)) + { + return; + } + BasicBlock *block, *prevBlock = nullptr; // Handle all the critical edges first. @@ -8944,18 +9354,21 @@ void LinearScan::resolveEdges() // remaining mismatches. We visit the out-edges, as that allows us to share the moves that are // common among allt he targets. - foreach_block(compiler, block) + if (hasCriticalEdges) { - if (block->bbNum > bbNumMaxBeforeResolution) - { - // This is a new block added during resolution - we don't need to visit these now. - continue; - } - if (blockInfo[block->bbNum].hasCriticalOutEdge) + foreach_block(compiler, block) { - handleOutgoingCriticalEdges(block); + if (block->bbNum > bbNumMaxBeforeResolution) + { + // This is a new block added during resolution - we don't need to visit these now. + continue; + } + if (blockInfo[block->bbNum].hasCriticalOutEdge) + { + handleOutgoingCriticalEdges(block); + } + prevBlock = block; } - prevBlock = block; } prevBlock = nullptr; @@ -8975,7 +9388,9 @@ void LinearScan::resolveEdges() // we may need resolution at the beginning of this block. // This may be true even if it's the block we used for starting locations, // if a variable was spilled. - if (!VarSetOps::IsEmpty(compiler, block->bbLiveIn)) + VARSET_TP VARSET_INIT_NOCOPY(inResolutionSet, + VarSetOps::Intersection(compiler, block->bbLiveIn, resolutionCandidateVars)); + if (!VarSetOps::IsEmpty(compiler, inResolutionSet)) { if (uniquePredBlock != nullptr) { @@ -8988,7 +9403,7 @@ void LinearScan::resolveEdges() uniquePredBlock = uniquePredBlock->GetUniquePred(compiler); noway_assert(uniquePredBlock != nullptr); } - resolveEdge(uniquePredBlock, block, ResolveSplit, block->bbLiveIn); + resolveEdge(uniquePredBlock, block, ResolveSplit, inResolutionSet); } } @@ -9003,7 +9418,12 @@ void LinearScan::resolveEdges() BasicBlock* succBlock = block->GetSucc(0, compiler); if (succBlock->GetUniquePred(compiler) == nullptr) { - resolveEdge(block, succBlock, ResolveJoin, succBlock->bbLiveIn); + VARSET_TP VARSET_INIT_NOCOPY(outResolutionSet, VarSetOps::Intersection(compiler, succBlock->bbLiveIn, + resolutionCandidateVars)); + if (!VarSetOps::IsEmpty(compiler, outResolutionSet)) + { + resolveEdge(block, succBlock, ResolveJoin, outResolutionSet); + } } } } @@ -9161,6 +9581,9 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, // in resolveEdges(), after all the edge resolution has been done (by calling this // method for each edge). block = compiler->fgSplitEdge(fromBlock, toBlock); + + // Split edges are counted against fromBlock. + INTRACK_STATS(updateLsraStat(LSRA_STAT_SPLIT_EDGE, fromBlock->bbNum)); break; default: unreached(); @@ -9347,11 +9770,13 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, { useSwap = true; } -#else // !_TARGET_XARCH_ +#else // !_TARGET_XARCH_ + else { tempReg = tempRegInt; } + #endif // !_TARGET_XARCH_ if (useSwap || tempReg == REG_NA) { @@ -9396,6 +9821,8 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, sourceIntervals[sourceReg]->varNum, fromReg); location[sourceReg] = REG_NA; location[source[otherTargetReg]] = (regNumberSmall)fromReg; + + INTRACK_STATS(updateLsraStat(LSRA_STAT_RESOLUTION_MOV, block->bbNum)); } else { @@ -9406,6 +9833,7 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, // First, spill "otherInterval" from targetReg to the stack. Interval* otherInterval = sourceIntervals[source[otherTargetReg]]; + setIntervalAsSpilled(otherInterval); addResolution(block, insertionPoint, otherInterval, REG_STK, targetReg); JITDUMP(" (%s)\n", resolveTypeName[resolveType]); location[source[otherTargetReg]] = REG_STK; @@ -9527,6 +9955,126 @@ void TreeNodeInfo::addInternalCandidates(LinearScan* lsra, regMaskTP mask) internalCandsIndex = (unsigned char)i; } +#if TRACK_LSRA_STATS +// ---------------------------------------------------------- +// updateLsraStat: Increment LSRA stat counter. +// +// Arguments: +// stat - LSRA stat enum +// bbNum - Basic block to which LSRA stat needs to be +// associated with. +// +void LinearScan::updateLsraStat(LsraStat stat, unsigned bbNum) +{ + if (bbNum > bbNumMaxBeforeResolution) + { + // This is a newly created basic block as part of resolution. + // These blocks contain resolution moves that are already accounted. + return; + } + + switch (stat) + { + case LSRA_STAT_SPILL: + ++(blockInfo[bbNum].spillCount); + break; + + case LSRA_STAT_COPY_REG: + ++(blockInfo[bbNum].copyRegCount); + break; + + case LSRA_STAT_RESOLUTION_MOV: + ++(blockInfo[bbNum].resolutionMovCount); + break; + + case LSRA_STAT_SPLIT_EDGE: + ++(blockInfo[bbNum].splitEdgeCount); + break; + + default: + break; + } +} + +// ----------------------------------------------------------- +// dumpLsraStats - dumps Lsra stats to given file. +// +// Arguments: +// file - file to which stats are to be written. +// +void LinearScan::dumpLsraStats(FILE* file) +{ + unsigned sumSpillCount = 0; + unsigned sumCopyRegCount = 0; + unsigned sumResolutionMovCount = 0; + unsigned sumSplitEdgeCount = 0; + UINT64 wtdSpillCount = 0; + UINT64 wtdCopyRegCount = 0; + UINT64 wtdResolutionMovCount = 0; + + fprintf(file, "----------\n"); + fprintf(file, "LSRA Stats"); +#ifdef DEBUG + if (!VERBOSE) + { + fprintf(file, " : %s\n", compiler->info.compFullName); + } + else + { + // In verbose mode no need to print full name + // while printing lsra stats. + fprintf(file, "\n"); + } +#else + fprintf(file, " : %s\n", compiler->eeGetMethodFullName(compiler->info.compCompHnd)); +#endif + + fprintf(file, "----------\n"); + + for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext) + { + if (block->bbNum > bbNumMaxBeforeResolution) + { + continue; + } + + unsigned spillCount = blockInfo[block->bbNum].spillCount; + unsigned copyRegCount = blockInfo[block->bbNum].copyRegCount; + unsigned resolutionMovCount = blockInfo[block->bbNum].resolutionMovCount; + unsigned splitEdgeCount = blockInfo[block->bbNum].splitEdgeCount; + + if (spillCount != 0 || copyRegCount != 0 || resolutionMovCount != 0 || splitEdgeCount != 0) + { + fprintf(file, "BB%02u [%8d]: ", block->bbNum, block->bbWeight); + fprintf(file, "SpillCount = %d, ResolutionMovs = %d, SplitEdges = %d, CopyReg = %d\n", spillCount, + resolutionMovCount, splitEdgeCount, copyRegCount); + } + + sumSpillCount += spillCount; + sumCopyRegCount += copyRegCount; + sumResolutionMovCount += resolutionMovCount; + sumSplitEdgeCount += splitEdgeCount; + + wtdSpillCount += (UINT64)spillCount * block->bbWeight; + wtdCopyRegCount += (UINT64)copyRegCount * block->bbWeight; + wtdResolutionMovCount += (UINT64)resolutionMovCount * block->bbWeight; + } + + fprintf(file, "Total Spill Count: %d Weighted: %I64u\n", sumSpillCount, wtdSpillCount); + fprintf(file, "Total CopyReg Count: %d Weighted: %I64u\n", sumCopyRegCount, wtdCopyRegCount); + fprintf(file, "Total ResolutionMov Count: %d Weighted: %I64u\n", sumResolutionMovCount, wtdResolutionMovCount); + fprintf(file, "Total number of split edges: %d\n", sumSplitEdgeCount); + + // compute total number of spill temps created + unsigned numSpillTemps = 0; + for (int i = 0; i < TYP_COUNT; i++) + { + numSpillTemps += maxSpill[i]; + } + fprintf(file, "Total Number of spill temps created: %d\n\n", numSpillTemps); +} +#endif // TRACK_LSRA_STATS + #ifdef DEBUG void dumpRegMask(regMaskTP regs) { @@ -9645,6 +10193,11 @@ void RefPosition::dump() { printf(" outOfOrder"); } + + if (this->AllocateIfProfitable()) + { + printf(" regOptional"); + } printf(">\n"); } @@ -11329,9 +11882,18 @@ void LinearScan::verifyFinalAllocation() { if (VERBOSE) { + // If refPos is marked as copyReg, then the reg that is spilled + // is the homeReg of the interval not the reg currently assigned + // to refPos. + regNumber spillReg = regNum; + if (currentRefPosition->copyReg) + { + assert(interval != nullptr); + spillReg = interval->physReg; + } dumpRegRecords(); dumpEmptyRefPosition(); - printf("Spill %-4s ", getRegName(regNum)); + printf("Spill %-4s ", getRegName(spillReg)); } } else if (currentRefPosition->copyReg) @@ -11392,15 +11954,14 @@ void LinearScan::verifyFinalAllocation() interval->physReg = REG_NA; interval->assignedReg = nullptr; - // regRegcord could be null if RefPosition is to be allocated a - // reg only if profitable. + // regRegcord could be null if the RefPosition does not require a register. if (regRecord != nullptr) { regRecord->assignedInterval = nullptr; } else { - assert(currentRefPosition->AllocateIfProfitable()); + assert(!currentRefPosition->RequiresRegister()); } } } @@ -11506,6 +12067,8 @@ void LinearScan::verifyResolutionMove(GenTree* resolutionMove, LsraLocation curr assert(leftInterval->physReg == leftRegNum && rightInterval->physReg == rightRegNum); leftInterval->physReg = rightRegNum; rightInterval->physReg = leftRegNum; + leftInterval->assignedReg = &physRegs[rightRegNum]; + rightInterval->assignedReg = &physRegs[leftRegNum]; physRegs[rightRegNum].assignedInterval = leftInterval; physRegs[leftRegNum].assignedInterval = rightInterval; if (VERBOSE) diff --git a/src/jit/lsra.h b/src/jit/lsra.h index a3c41fe1e3..c8a3fb4e24 100644 --- a/src/jit/lsra.h +++ b/src/jit/lsra.h @@ -73,6 +73,25 @@ struct LsraBlockInfo unsigned int predBBNum; bool hasCriticalInEdge; bool hasCriticalOutEdge; + +#if TRACK_LSRA_STATS + // Per block maintained LSRA statistics. + + // Number of spills of local vars or tree temps in this basic block. + unsigned spillCount; + + // Number of GT_COPY nodes inserted in this basic block while allocating regs. + // Note that GT_COPY nodes are also inserted as part of basic block boundary + // resolution, which are accounted against resolutionMovCount but not + // against copyRegCount. + unsigned copyRegCount; + + // Number of resolution moves inserted in this basic block. + unsigned resolutionMovCount; + + // Number of critical edges from this block that are split. + unsigned splitEdgeCount; +#endif // TRACK_LSRA_STATS }; // This is sort of a bit mask @@ -504,6 +523,8 @@ private: { return (LsraStressLimitRegs)(lsraStressMask & LSRA_LIMIT_MASK); } + + regMaskTP getConstrainedRegMask(regMaskTP regMaskActual, regMaskTP regMaskConstrain, unsigned minRegCount); regMaskTP stressLimitRegs(RefPosition* refPosition, regMaskTP mask); // This controls the heuristics used to select registers @@ -572,7 +593,7 @@ private: regNumber rotateBlockStartLocation(Interval* interval, regNumber targetReg, regMaskTP availableRegs); // This controls whether we always insert a GT_RELOAD instruction after a spill - // Note that this can be combined with LsraSpillAlways (or not) + // Note that this can be combined with LSRA_SPILL_ALWAYS (or not) enum LsraReload{LSRA_NO_RELOAD_IF_SAME = 0, LSRA_ALWAYS_INSERT_RELOAD = 0x400, LSRA_RELOAD_MASK = 0x400}; LsraReload getLsraReload() { @@ -769,11 +790,19 @@ private: regMaskTP getDefCandidates(GenTree* tree); var_types getDefType(GenTree* tree); - RefPosition* defineNewInternalTemp(GenTree* tree, RegisterType regType, LsraLocation currentLoc, regMaskTP regMask); + RefPosition* defineNewInternalTemp(GenTree* tree, + RegisterType regType, + LsraLocation currentLoc, + regMaskTP regMask DEBUGARG(unsigned minRegCandidateCount)); - int buildInternalRegisterDefsForNode(GenTree* tree, LsraLocation currentLoc, RefPosition* defs[]); + int buildInternalRegisterDefsForNode(GenTree* tree, + LsraLocation currentLoc, + RefPosition* defs[] DEBUGARG(unsigned minRegCandidateCount)); - void buildInternalRegisterUsesForNode(GenTree* tree, LsraLocation currentLoc, RefPosition* defs[], int total); + void buildInternalRegisterUsesForNode(GenTree* tree, + LsraLocation currentLoc, + RefPosition* defs[], + int total DEBUGARG(unsigned minRegCandidateCount)); void resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosition* currentRefPosition); @@ -824,7 +853,7 @@ private: RefType theRefType, GenTree* theTreeNode, regMaskTP mask, - unsigned multiRegIdx = 0); + unsigned multiRegIdx = 0 DEBUGARG(unsigned minRegCandidateCount = 1)); RefPosition* newRefPosition( regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask); @@ -864,6 +893,8 @@ private: unassignPhysReg(getRegisterRecord(reg), nullptr); } + void setIntervalAsSpilled(Interval* interval); + void setIntervalAsSplit(Interval* interval); void spillInterval(Interval* interval, RefPosition* fromRefPosition, RefPosition* toRefPosition); void spillGCRefs(RefPosition* killRefPosition); @@ -936,11 +967,8 @@ private: char* operandString, unsigned operandStringLength); void lsraDispNode(GenTreePtr tree, LsraTupleDumpMode mode, bool hasDest); - void DumpOperandDefs(GenTree* operand, - bool& first, - LsraTupleDumpMode mode, - char* operandString, - const unsigned operandStringLength); + void DumpOperandDefs( + GenTree* operand, bool& first, LsraTupleDumpMode mode, char* operandString, const unsigned operandStringLength); void TupleStyleDump(LsraTupleDumpMode mode); bool dumpTerse; @@ -1020,6 +1048,20 @@ private: void validateIntervals(); #endif // DEBUG +#if TRACK_LSRA_STATS + enum LsraStat{ + LSRA_STAT_SPILL, LSRA_STAT_COPY_REG, LSRA_STAT_RESOLUTION_MOV, LSRA_STAT_SPLIT_EDGE, + }; + + void updateLsraStat(LsraStat stat, unsigned currentBBNum); + + void dumpLsraStats(FILE* file); + +#define INTRACK_STATS(x) x +#else // !TRACK_LSRA_STATS +#define INTRACK_STATS(x) +#endif // !TRACK_LSRA_STATS + Compiler* compiler; private: @@ -1066,6 +1108,10 @@ private: return BlockSetOps::IsMember(compiler, bbVisitedSet, block->bbNum); } +#if DOUBLE_ALIGN + bool doDoubleAlign; +#endif + // A map from bbNum to the block information used during register allocation. LsraBlockInfo* blockInfo; BasicBlock* findPredBlockForLiveIn(BasicBlock* block, BasicBlock* prevBlock DEBUGARG(bool* pPredBlockIsAllocated)); @@ -1092,6 +1138,8 @@ private: unsigned int bbSeqCount; // The Location of the start of the current block. LsraLocation curBBStartLocation; + // True if the method contains any critical edges. + bool hasCriticalEdges; // Ordered list of RefPositions RefPositionList refPositions; @@ -1111,6 +1159,12 @@ private: // Current set of live tracked vars, used during building of RefPositions to determine whether // to preference to callee-save VARSET_TP currentLiveVars; + // Set of variables that may require resolution across an edge. + // This is first constructed during interval building, to contain all the lclVars that are live at BB edges. + // Then, any lclVar that is always in the same register is removed from the set. + VARSET_TP resolutionCandidateVars; + // This set contains all the lclVars that are ever spilled or split. + VARSET_TP splitOrSpilledVars; // Set of floating point variables to consider for callee-save registers. VARSET_TP fpCalleeSaveCandidateVars; #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE @@ -1382,6 +1436,7 @@ public: , delayRegFree(false) , outOfOrder(false) #ifdef DEBUG + , minRegCandidateCount(1) , rpNum(0) #endif { @@ -1555,9 +1610,15 @@ public: } #ifdef DEBUG - unsigned rpNum; // The unique RefPosition number, equal to its index in the refPositions list. Only used for - // debugging dumps. -#endif // DEBUG + // Minimum number registers that needs to be ensured while + // constraining candidates for this ref position under + // LSRA stress. + unsigned minRegCandidateCount; + + // The unique RefPosition number, equal to its index in the + // refPositions list. Only used for debugging dumps. + unsigned rpNum; +#endif // DEBUG bool isIntervalRef() { diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index 00df17baa0..678bb34c54 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -204,6 +204,9 @@ GenTreePtr Compiler::fgMorphCast(GenTreePtr tree) { case TYP_INT: #ifdef _TARGET_X86_ // there is no rounding convert to integer instruction on ARM or x64 so skip this +#ifdef LEGACY_BACKEND + // the RyuJIT backend does not use the x87 FPU and therefore + // does not support folding the cast conv.i4(round.d(d)) if ((oper->gtOper == GT_INTRINSIC) && (oper->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round)) { @@ -212,7 +215,9 @@ GenTreePtr Compiler::fgMorphCast(GenTreePtr tree) return fgMorphTree(oper); } // if SSE2 is not enabled, we need the helper - else if (!opts.compCanUseSSE2) + else +#endif // LEGACY_BACKEND + if (!opts.compCanUseSSE2) { return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper); } @@ -360,8 +365,17 @@ GenTreePtr Compiler::fgMorphCast(GenTreePtr tree) oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG); oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED)); tree->gtFlags &= ~GTF_UNSIGNED; +#ifndef LEGACY_BACKEND + return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper); +#endif } } +#ifndef LEGACY_BACKEND + else if (((tree->gtFlags & GTF_UNSIGNED) == 0) && (srcType == TYP_LONG) && varTypeIsFloating(dstType)) + { + return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper); + } +#endif #endif //_TARGET_XARCH_ else if (varTypeIsGC(srcType) != varTypeIsGC(dstType)) { @@ -1010,12 +1024,12 @@ fgArgInfo::fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall) { /* Get hold of the next argument values for the oldCall and newCall */ - assert(newArgs->IsList()); + assert(newArgs->OperIsList()); newCurr = newArgs->Current(); newArgs = newArgs->Rest(); - assert(oldArgs->IsList()); + assert(oldArgs->OperIsList()); oldCurr = oldArgs->Current(); oldArgs = oldArgs->Rest(); @@ -1047,6 +1061,8 @@ fgArgInfo::fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall) argCount = oldArgInfo->argCount; nextSlotNum = oldArgInfo->nextSlotNum; + hasRegArgs = oldArgInfo->hasRegArgs; + hasStackArgs = oldArgInfo->hasStackArgs; argsComplete = true; argsSorted = true; } @@ -1188,7 +1204,7 @@ fgArgTabEntry* fgArgInfo::RemorphRegArg( GenTreePtr argx; if (curArgTabEntry->parent != nullptr) { - assert(curArgTabEntry->parent->IsList()); + assert(curArgTabEntry->parent->OperIsList()); argx = curArgTabEntry->parent->Current(); isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0; } @@ -1255,7 +1271,7 @@ void fgArgInfo::RemorphStkArg( if (curArgTabEntry->parent != nullptr) { - assert(curArgTabEntry->parent->IsList()); + assert(curArgTabEntry->parent->OperIsList()); argx = curArgTabEntry->parent->Current(); isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0; } @@ -1283,7 +1299,7 @@ void fgArgInfo::RemorphStkArg( assert(curArgTabEntry->numSlots == numSlots); assert(curArgTabEntry->alignment == alignment); assert(curArgTabEntry->parent == parent); - assert(parent->IsList()); + assert(parent->OperIsList()); #if FEATURE_FIXED_OUT_ARGS if (curArgTabEntry->node != node) @@ -1512,7 +1528,7 @@ void fgArgInfo::ArgsComplete() #ifndef LEGACY_BACKEND #if FEATURE_MULTIREG_ARGS - // For RyuJIT backend we will expand a Multireg arg into a GT_LIST + // For RyuJIT backend we will expand a Multireg arg into a GT_FIELD_LIST // with multiple indirections, so here we consider spilling it into a tmp LclVar. // // Note that Arm32 is a LEGACY_BACKEND and it defines FEATURE_MULTIREG_ARGS @@ -2364,7 +2380,7 @@ void fgArgInfo::EvalArgsToTemps() { GenTreePtr parent = curArgTabEntry->parent; /* a normal argument from the list */ - noway_assert(parent->IsList()); + noway_assert(parent->OperIsList()); noway_assert(parent->gtOp.gtOp1 == argx); parent->gtOp.gtOp1 = setupArg; @@ -2387,7 +2403,7 @@ void fgArgInfo::EvalArgsToTemps() } else { - noway_assert(tmpRegArgNext->IsList()); + noway_assert(tmpRegArgNext->OperIsList()); noway_assert(tmpRegArgNext->Current()); tmpRegArgNext->gtOp.gtOp2 = compiler->gtNewArgList(defArg); tmpRegArgNext = tmpRegArgNext->Rest(); @@ -2603,7 +2619,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) unsigned argSlots = 0; unsigned nonRegPassedStructSlots = 0; - bool lateArgsComputed = (call->gtCallLateArgs != nullptr); + bool reMorphing = call->AreArgsComplete(); bool callHasRetBuffArg = call->HasRetBufArg(); #ifndef _TARGET_X86_ // i.e. _TARGET_AMD64_ or _TARGET_ARM_ @@ -2731,7 +2747,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // Process the late arguments (which were determined by a previous caller). // Do this before resetting fgPtrArgCntCur as fgMorphTree(call->gtCallLateArgs) // may need to refer to it. - if (lateArgsComputed) + if (reMorphing) { // We need to reMorph the gtCallLateArgs early since that is what triggers // the expression folding and we need to have the final folded gtCallLateArgs @@ -2745,14 +2761,17 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // // Since the late arguments are evaluated last we have pushed all of the // other arguments on the stack before we evaluate these late arguments, - // so we record the stack depth on the first morph call when lateArgsComputed + // so we record the stack depth on the first morph call when reMorphing // was false (via RecordStkLevel) and then retrieve that value here (via RetrieveStkLevel) // unsigned callStkLevel = call->fgArgInfo->RetrieveStkLevel(); - fgPtrArgCntCur += callStkLevel; - call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList(); - flagsSummary |= call->gtCallLateArgs->gtFlags; - fgPtrArgCntCur -= callStkLevel; + if (call->gtCallLateArgs != nullptr) + { + fgPtrArgCntCur += callStkLevel; + call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList(); + flagsSummary |= call->gtCallLateArgs->gtFlags; + fgPtrArgCntCur -= callStkLevel; + } assert(call->fgArgInfo != nullptr); call->fgArgInfo->RemorphReset(); @@ -2780,7 +2799,8 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // *********** END NOTE ********* CLANG_FORMAT_COMMENT_ANCHOR; -#if !defined(LEGACY_BACKEND) && defined(_TARGET_X86_) +#if !defined(LEGACY_BACKEND) +#if defined(_TARGET_X86_) // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper has a custom calling convention. Set the argument registers // correctly here. if (call->IsHelperCall(this, CORINFO_HELP_INIT_PINVOKE_FRAME)) @@ -2792,21 +2812,20 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) } // The x86 shift helpers have custom calling conventions and expect the lo part of the long to be in EAX and the // hi part to be in EDX. This sets the argument registers up correctly. - else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) || call->IsHelperCall(this, CORINFO_HELP_LRSZ)) + else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) || + call->IsHelperCall(this, CORINFO_HELP_LRSZ)) { GenTreeArgList* args = call->gtCallArgs; - GenTree* arg1 = args->Current(); + GenTree* arg1 = args->Current(); assert(arg1 != nullptr); nonStandardArgs.Add(arg1, REG_LNGARG_LO); - args = args->Rest(); + args = args->Rest(); GenTree* arg2 = args->Current(); assert(arg2 != nullptr); nonStandardArgs.Add(arg2, REG_LNGARG_HI); } -#endif // !defined(LEGACY_BACKEND) && defined(_TARGET_X86_) - -#if !defined(LEGACY_BACKEND) && !defined(_TARGET_X86_) +#else // !defined(_TARGET_X86_) // TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed. // If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling // convention for x86/SSE. @@ -2817,7 +2836,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) { args = call->gtCallArgs; assert(args != nullptr); - assert(args->IsList()); + assert(args->OperIsList()); argx = call->gtCallArgs->Current(); @@ -2871,21 +2890,32 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) nonStandardArgs.Add(arg, REG_VIRTUAL_STUB_PARAM); } - else if (call->gtCallType == CT_INDIRECT && call->gtCallCookie) + else +#endif // defined(_TARGET_X86_) + if (call->gtCallType == CT_INDIRECT && (call->gtCallCookie != nullptr)) { assert(!call->IsUnmanaged()); - // put cookie into R11 GenTree* arg = call->gtCallCookie; noway_assert(arg != nullptr); call->gtCallCookie = nullptr; +#if defined(_TARGET_X86_) + // x86 passes the cookie on the stack as the final argument to the call. + GenTreeArgList** insertionPoint = &call->gtCallArgs; + for (; *insertionPoint != nullptr; insertionPoint = &(*insertionPoint)->Rest()) + { + } + *insertionPoint = gtNewListNode(arg, nullptr); +#else // !defined(_TARGET_X86_) + // All other architectures pass the cookie in a register. call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); - numArgs++; +#endif // defined(_TARGET_X86_) nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM); + numArgs++; - // put destination into R10 + // put destination into R10/EAX arg = gtClone(call->gtCallAddr, true); call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs); numArgs++; @@ -2896,7 +2926,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) call->gtCallType = CT_HELPER; call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI); } -#endif // !defined(LEGACY_BACKEND) && !defined(_TARGET_X86_) +#endif // !defined(LEGACY_BACKEND) // Allocate the fgArgInfo for the call node; // @@ -2929,7 +2959,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) /* We must fill in or update the argInfo table */ - if (lateArgsComputed) + if (reMorphing) { /* this is a register argument - possibly update it in the table */ call->fgArgInfo->RemorphRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1); @@ -3075,7 +3105,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) *parentArgx = argx; flagsSummary |= argx->gtFlags; - assert(args->IsList()); + assert(args->OperIsList()); assert(argx == args->Current()); #ifndef LEGACY_BACKEND @@ -3114,13 +3144,15 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) compFloatingPointUsed = true; } - unsigned size = 0; - CORINFO_CLASS_HANDLE copyBlkClass = nullptr; - bool isRegArg = false; + unsigned size = 0; + CORINFO_CLASS_HANDLE copyBlkClass = nullptr; + bool isRegArg = false; + bool isNonStandard = false; + regNumber nonStdRegNum = REG_NA; fgArgTabEntryPtr argEntry = nullptr; - if (lateArgsComputed) + if (reMorphing) { argEntry = gtArgEntryByArgNum(call, argIndex); } @@ -3128,7 +3160,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) #ifdef _TARGET_ARM_ bool passUsingIntRegs; - if (lateArgsComputed) + if (reMorphing) { passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum); passUsingIntRegs = isValidIntArgReg(argEntry->regNum); @@ -3179,7 +3211,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) #elif defined(_TARGET_ARM64_) - if (lateArgsComputed) + if (reMorphing) { passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum); } @@ -3189,8 +3221,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) } #elif defined(_TARGET_AMD64_) -#if defined(UNIX_AMD64_ABI) - if (lateArgsComputed) + if (reMorphing) { passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum); } @@ -3198,9 +3229,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) { passUsingFloatRegs = varTypeIsFloating(argx); } -#else // WINDOWS_AMD64_ABI - passUsingFloatRegs = varTypeIsFloating(argx); -#endif // !UNIX_AMD64_ABI #elif defined(_TARGET_X86_) passUsingFloatRegs = false; @@ -3216,7 +3244,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) bool isStructArg = varTypeIsStruct(argx); - if (lateArgsComputed) + if (reMorphing) { #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) // Get the struct description for the already completed struct argument. @@ -3260,7 +3288,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // This size has now been computed assert(size != 0); } - else // !lateArgsComputed + else // !reMorphing { // // Figure out the size of the argument. This is either in number of registers, or number of @@ -3287,7 +3315,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) } } #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING - size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot' + size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot' #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING #elif defined(_TARGET_ARM64_) if (isStructArg) @@ -3379,7 +3407,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) GenTreePtr argObj = argx; GenTreePtr* parentOfArgObj = parentArgx; - assert(args->IsList()); + assert(args->OperIsList()); assert(argx == args->Current()); /* The GT_OBJ may be be a child of a GT_COMMA */ @@ -3686,11 +3714,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // the obj reading memory past the end of the valuetype CLANG_FORMAT_COMMENT_ANCHOR; -#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) - // TODO-X86-CQ: [1091733] Revisit for small structs, we should use push instruction - copyBlkClass = objClass; - size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items -#else // !defined(_TARGET_X86_) || defined(LEGACY_BACKEND) if (roundupSize > originalSize) { copyBlkClass = objClass; @@ -3705,7 +3728,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) } size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items -#endif // !defined(_TARGET_X86_) || defined(LEGACY_BACKEND) } } } @@ -3841,7 +3863,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) } } #else // !defined(UNIX_AMD64_ABI) - isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs; + isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs; #endif // !defined(UNIX_AMD64_ABI) #endif // _TARGET_ARM_ } @@ -3850,8 +3872,19 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) isRegArg = false; } -#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) - if (call->IsTailCallViaHelper()) +#ifndef LEGACY_BACKEND + // If there are nonstandard args (outside the calling convention) they were inserted above + // and noted them in a table so we can recognize them here and build their argInfo. + // + // They should not affect the placement of any other args or stack space required. + // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls. + isNonStandard = nonStandardArgs.FindReg(argx, &nonStdRegNum); + if (isNonStandard && (nonStdRegNum == REG_STK)) + { + isRegArg = false; + } +#if defined(_TARGET_X86_) + else if (call->IsTailCallViaHelper()) { // We have already (before calling fgMorphArgs()) appended the 4 special args // required by the x86 tailcall helper. These args are required to go on the @@ -3862,9 +3895,9 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) isRegArg = false; } } -#endif // defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) - - } // end !lateArgsComputed +#endif // defined(_TARGET_X86_) +#endif // !LEGACY_BACKEND + } // end !reMorphing // // Now we know if the argument goes in registers or not and how big it is, @@ -3943,23 +3976,17 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) #endif fgArgTabEntryPtr newArgEntry; - if (lateArgsComputed) + if (reMorphing) { // This is a register argument - possibly update it in the table newArgEntry = call->fgArgInfo->RemorphRegArg(argIndex, argx, args, nextRegNum, size, argAlign); } else { - bool isNonStandard = false; - -#ifndef LEGACY_BACKEND - // If there are nonstandard args (outside the calling convention) they were inserted above - // and noted them in a table so we can recognize them here and build their argInfo. - // - // They should not affect the placement of any other args or stack space required. - // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls. - isNonStandard = nonStandardArgs.FindReg(argx, &nextRegNum); -#endif // !LEGACY_BACKEND + if (isNonStandard) + { + nextRegNum = nonStdRegNum; + } // This is a register argument - put it in the table newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign @@ -4053,7 +4080,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // If the register arguments have not been determined then we must fill in the argInfo - if (lateArgsComputed) + if (reMorphing) { // This is a stack argument - possibly update it in the table call->fgArgInfo->RemorphStkArg(argIndex, argx, args, size, argAlign); @@ -4068,14 +4095,14 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) if (copyBlkClass != NO_CLASS_HANDLE) { - noway_assert(!lateArgsComputed); + noway_assert(!reMorphing); fgMakeOutgoingStructArgCopy(call, args, argIndex, copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(&structDesc)); // This can cause a GTF_EXCEPT flag to be set. // TODO-CQ: Fix the cases where this happens. We shouldn't be adding any new flags. // This currently occurs in the case where we are re-morphing the args on x86/RyuJIT, and - // there are no register arguments. Then lateArgsComputed is never true, so we keep re-copying + // there are no register arguments. Then reMorphing is never true, so we keep re-copying // any struct arguments. // i.e. assert(((call->gtFlags & GTF_EXCEPT) != 0) || ((args->Current()->gtFlags & GTF_EXCEPT) == 0) flagsSummary |= (args->Current()->gtFlags & GTF_EXCEPT); @@ -4088,10 +4115,21 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) #ifndef LEGACY_BACKEND if (argx->gtOper == GT_MKREFANY) { - NYI_X86("MKREFANY"); - // 'Lower' the MKREFANY tree and insert it. - noway_assert(!lateArgsComputed); + noway_assert(!reMorphing); + +#ifdef _TARGET_X86_ + + // Build the mkrefany as a GT_FIELD_LIST + GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) + GenTreeFieldList(argx->gtOp.gtOp1, offsetof(CORINFO_RefAny, dataPtr), TYP_BYREF, nullptr); + (void)new (this, GT_FIELD_LIST) + GenTreeFieldList(argx->gtOp.gtOp2, offsetof(CORINFO_RefAny, type), TYP_I_IMPL, fieldList); + fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx); + fp->node = fieldList; + args->gtOp.gtOp1 = fieldList; + +#else // !_TARGET_X86_ // Get a new temp // Here we don't need unsafe value cls check since the addr of temp is used only in mkrefany @@ -4117,9 +4155,47 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // EvalArgsToTemps will cause tmp to actually get loaded as the argument call->fgArgInfo->EvalToTmp(argIndex, tmp, asg); lvaSetVarAddrExposed(tmp); +#endif // !_TARGET_X86_ } #endif // !LEGACY_BACKEND +#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND) + if (isStructArg) + { + GenTree* lclNode = fgIsIndirOfAddrOfLocal(argx); + if ((lclNode != nullptr) && + (lvaGetPromotionType(lclNode->AsLclVarCommon()->gtLclNum) == Compiler::PROMOTION_TYPE_INDEPENDENT)) + { + // Make a GT_FIELD_LIST of the field lclVars. + GenTreeLclVarCommon* lcl = lclNode->AsLclVarCommon(); + LclVarDsc* varDsc = &(lvaTable[lcl->gtLclNum]); + GenTreeFieldList* fieldList = nullptr; + for (unsigned fieldLclNum = varDsc->lvFieldLclStart; + fieldLclNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++fieldLclNum) + { + LclVarDsc* fieldVarDsc = &lvaTable[fieldLclNum]; + if (fieldList == nullptr) + { + lcl->SetLclNum(fieldLclNum); + lcl->ChangeOper(GT_LCL_VAR); + lcl->gtType = fieldVarDsc->lvType; + fieldList = new (this, GT_FIELD_LIST) + GenTreeFieldList(lcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, nullptr); + fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx); + fp->node = fieldList; + args->gtOp.gtOp1 = fieldList; + } + else + { + GenTree* fieldLcl = gtNewLclvNode(fieldLclNum, fieldVarDsc->lvType); + fieldList = new (this, GT_FIELD_LIST) + GenTreeFieldList(fieldLcl, fieldVarDsc->lvFldOffset, fieldVarDsc->lvType, fieldList); + } + } + } + } +#endif // defined (_TARGET_X86_) && !defined(LEGACY_BACKEND) + #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING if (isStructArg && !isRegArg) { @@ -4132,7 +4208,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) } } // end foreach argument loop - if (!lateArgsComputed) + if (!reMorphing) { call->fgArgInfo->ArgsComplete(); #ifdef LEGACY_BACKEND @@ -4240,11 +4316,11 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // For UNIX_AMD64, the condition without hasStackArgCopy cannot catch // all cases of fgMakeOutgoingStructArgCopy() being called. hasStackArgCopy // is added to make sure to call EvalArgsToTemp. - if (!lateArgsComputed && (call->fgArgInfo->HasRegArgs() + if (!reMorphing && (call->fgArgInfo->HasRegArgs() #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING - || hasStackArgCopy + || hasStackArgCopy #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING - )) + )) { // This is the first time that we morph this call AND it has register arguments. // Follow into the code below and do the 'defer or eval to temp' analysis. @@ -4271,7 +4347,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode) // In the future we can migrate UNIX_AMD64 to use this // method instead of fgMorphSystemVStructArgs - // We only build GT_LISTs for MultiReg structs for the RyuJIT backend + // We only build GT_FIELD_LISTs for MultiReg structs for the RyuJIT backend if (hasMultiregStructArgs) { fgMorphMultiregStructArgs(call); @@ -4334,7 +4410,7 @@ void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgumen { for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) { - assert(list->IsList()); + assert(list->OperIsList()); GenTreePtr argNode = list->Current(); if (argx == argNode) @@ -4355,7 +4431,7 @@ void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgumen { var_types originalType = type; // If we have already processed the arg... - if (arg->OperGet() == GT_LIST && varTypeIsStruct(arg)) + if (arg->OperGet() == GT_FIELD_LIST && varTypeIsStruct(arg)) { continue; } @@ -4386,6 +4462,16 @@ void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgumen // Create LCL_FLD for each eightbyte. argListCreated = true; + // First eightbyte. + arg->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField(); + arg->gtType = + GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0], + fgEntryPtr->structDesc.eightByteSizes[0]); + GenTreeFieldList* fieldList = + new (this, GT_FIELD_LIST) GenTreeFieldList(arg, 0, originalType, nullptr); + fieldList->gtType = originalType; // Preserve the type. It is a special case. + arg = fieldList; + // Second eightbyte. GenTreeLclFld* newLclField = new (this, GT_LCL_FLD) GenTreeLclFld(GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc @@ -4393,17 +4479,9 @@ void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgumen fgEntryPtr->structDesc.eightByteSizes[1]), lclCommon->gtLclNum, fgEntryPtr->structDesc.eightByteOffsets[1]); - GenTreeArgList* aggregate = gtNewAggregate(newLclField); - aggregate->gtType = originalType; // Preserve the type. It is a special case. - newLclField->gtFieldSeq = FieldSeqStore::NotAField(); - - // First field - arg->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField(); - arg->gtType = - GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0], - fgEntryPtr->structDesc.eightByteSizes[0]); - arg = aggregate->Prepend(this, arg); - arg->gtType = type; // Preserve the type. It is a special case. + fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(newLclField, 0, originalType, fieldList); + fieldList->gtType = originalType; // Preserve the type. It is a special case. + newLclField->gtFieldSeq = FieldSeqStore::NotAField(); } else { @@ -4450,7 +4528,7 @@ void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgumen { for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) { - assert(list->IsList()); + assert(list->OperIsList()); GenTreePtr argNode = list->Current(); if (argx == argNode) @@ -4490,8 +4568,8 @@ void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgumen // // Notes: // We only call fgMorphMultiregStructArg for the register passed TYP_STRUCT arguments. -// The call to fgMorphMultiregStructArg will mutate the argument into the GT_LIST form -// whicj is only used for register arguments. +// The call to fgMorphMultiregStructArg will mutate the argument into the GT_FIELD_LIST form +// which is only used for struct arguments. // If this method fails to find any TYP_STRUCT arguments it will assert. // void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) @@ -4540,7 +4618,7 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) { for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) { - assert(list->IsList()); + assert(list->OperIsList()); GenTreePtr argNode = list->Current(); if (argx == argNode) @@ -4588,7 +4666,7 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) //----------------------------------------------------------------------------- // fgMorphMultiregStructArg: Given a multireg TYP_STRUCT arg from a call argument list -// Morph the argument into a set of GT_LIST nodes. +// Morph the argument into a set of GT_FIELD_LIST nodes. // // Arguments: // arg - A GenTree node containing a TYP_STRUCT arg that @@ -4600,7 +4678,7 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) // for passing in multiple registers. // If arg is a LclVar we check if it is struct promoted and has the right number of fields // and if they are at the appropriate offsets we will use the struct promted fields -// in the GT_LIST nodes that we create. +// in the GT_FIELD_LIST nodes that we create. // If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements // we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct // this also forces the struct to be stack allocated into the local frame. @@ -4715,7 +4793,7 @@ GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr f // We should still have a TYP_STRUCT assert(argValue->TypeGet() == TYP_STRUCT); - GenTreeArgList* newArg = nullptr; + GenTreeFieldList* newArg = nullptr; // Are we passing a struct LclVar? // @@ -4817,9 +4895,10 @@ GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr f // Create a new tree for 'arg' // replace the existing LDOBJ(ADDR(LCLVAR)) - // with a LIST(LCLVAR-LO, LIST(LCLVAR-HI, nullptr)) + // with a FIELD_LIST(LCLVAR-LO, FIELD_LIST(LCLVAR-HI, nullptr)) // - newArg = gtNewAggregate(hiLclVar)->Prepend(this, loLclVar); + newArg = new (this, GT_FIELD_LIST) GenTreeFieldList(loLclVar, 0, loType, nullptr); + (void)new (this, GT_FIELD_LIST) GenTreeFieldList(hiLclVar, TARGET_POINTER_SIZE, hiType, newArg); } } } @@ -4885,27 +4964,22 @@ GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr f // lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField)); - // Start building our list from the last element - unsigned offset = lastOffset; - unsigned inx = elemCount; - // Create a new tree for 'arg' // replace the existing LDOBJ(ADDR(LCLVAR)) - // with a LIST(LCLFLD-LO, LIST(LCLFLD-HI, nullptr) ...) + // with a FIELD_LIST(LCLFLD-LO, FIELD_LIST(LCLFLD-HI, nullptr) ...) // - while (inx > 0) + unsigned offset = 0; + GenTreeFieldList* listEntry = nullptr; + for (unsigned inx = 0; inx < elemCount; inx++) { - inx--; - offset -= elemSize; + elemSize = genTypeSize(type[inx]); GenTreePtr nextLclFld = gtNewLclFldNode(varNum, type[inx], offset); + listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(nextLclFld, offset, type[inx], listEntry); if (newArg == nullptr) { - newArg = gtNewAggregate(nextLclFld); - } - else - { - newArg = newArg->Prepend(this, nextLclFld); + newArg = listEntry; } + offset += elemSize; } } // Are we passing a GT_OBJ struct? @@ -4918,17 +4992,14 @@ GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr f // Create a new tree for 'arg' // replace the existing LDOBJ(EXPR) - // with a LIST(IND(EXPR), LIST(IND(EXPR+8), nullptr) ...) + // with a FIELD_LIST(IND(EXPR), FIELD_LIST(IND(EXPR+8), nullptr) ...) // - // Start building our list from the last element - unsigned offset = structSize; - unsigned inx = elemCount; - while (inx > 0) + unsigned offset = 0; + GenTreeFieldList* listEntry = nullptr; + for (unsigned inx = 0; inx < elemCount; inx++) { - inx--; - elemSize = genTypeSize(type[inx]); - offset -= elemSize; + elemSize = genTypeSize(type[inx]); GenTreePtr curAddr = baseAddr; if (offset != 0) { @@ -4941,14 +5012,21 @@ GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr f curAddr = baseAddr; } GenTreePtr curItem = gtNewOperNode(GT_IND, type[inx], curAddr); - if (newArg == nullptr) + + // For safety all GT_IND should have at least GT_GLOB_REF set. + curItem->gtFlags |= GTF_GLOB_REF; + if (fgAddrCouldBeNull(curItem)) { - newArg = gtNewAggregate(curItem); + // This indirection can cause a GPF if the address could be null. + curItem->gtFlags |= GTF_EXCEPT; } - else + + listEntry = new (this, GT_FIELD_LIST) GenTreeFieldList(curItem, offset, type[inx], listEntry); + if (newArg == nullptr) { - newArg = newArg->Prepend(this, curItem); + newArg = listEntry; } + offset += elemSize; } } } @@ -5674,7 +5752,7 @@ GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree) addr = gtNewOperNode(GT_ADD, TYP_BYREF, addr, cns); #if SMALL_TREE_NODES - assert(tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE); + assert((tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE) || GenTree::s_gtNodeSizes[GT_IND] == TREE_NODE_SZ_SMALL); #endif // Change the orginal GT_INDEX node into a GT_IND node @@ -5847,7 +5925,15 @@ GenTreePtr Compiler::fgMorphStackArgForVarArgs(unsigned lclNum, var_types varTyp lclOffs)); // Access the argument through the local - GenTreePtr tree = gtNewOperNode(GT_IND, varType, ptrArg); + GenTreePtr tree; + if (varType == TYP_STRUCT) + { + tree = gtNewBlockVal(ptrArg, varDsc->lvExactSize); + } + else + { + tree = gtNewOperNode(GT_IND, varType, ptrArg); + } tree->gtFlags |= GTF_IND_TGTANYWHERE; if (varDsc->lvAddrExposed) @@ -5884,8 +5970,14 @@ GenTreePtr Compiler::fgMorphLocalVar(GenTreePtr tree) if (info.compIsVarArgs) { GenTreePtr newTree = fgMorphStackArgForVarArgs(lclNum, varType, 0); - if (newTree != NULL) + if (newTree != nullptr) + { + if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0)) + { + fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType); + } return newTree; + } } #endif // _TARGET_X86_ @@ -6205,7 +6297,9 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac) GenTreePtr baseOffset = gtNewIconEmbHndNode(tree->gtField.gtFieldLookup.addr, nullptr, GTF_ICON_FIELD_HDL); if (tree->gtField.gtFieldLookup.accessType == IAT_PVALUE) + { baseOffset = gtNewOperNode(GT_IND, TYP_I_IMPL, baseOffset); + } addr = gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr, baseOffset); @@ -6483,8 +6577,8 @@ void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult) // hanging a "nothing" node to it. Later the "nothing" node will be removed // and the original GT_CALL tree will be picked up by the GT_RET_EXPR node. - noway_assert(fgMorphStmt->gtStmt.gtStmtExpr == call); - fgMorphStmt->gtStmt.gtStmtExpr = gtNewNothingNode(); + noway_assert(fgMorphStmt->gtStmtExpr == call); + fgMorphStmt->gtStmtExpr = gtNewNothingNode(); } // Clear the Inline Candidate flag so we can ensure later we tried @@ -6662,7 +6756,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) { nCalleeArgs++; - assert(args->IsList()); + assert(args->OperIsList()); GenTreePtr argx = args->gtOp.gtOp1; if (varTypeIsStruct(argx)) @@ -6980,7 +7074,14 @@ void Compiler::fgMorphTailCall(GenTreeCall* call) } #endif // _TARGET_X86_ +#if defined(_TARGET_X86_) + // When targeting x86, the runtime requires that we perforrm a null check on the `this` argument before tail + // calling to a virtual dispatch stub. This requirement is a consequence of limitations in the runtime's + // ability to map an AV to a NullReferenceException if the AV occurs in a dispatch stub. + if (call->NeedsNullCheck() || call->IsVirtualStub()) +#else if (call->NeedsNullCheck()) +#endif // defined(_TARGET_X86_) { // clone "this" if "this" has no side effects. if ((thisPtr == nullptr) && !(objp->gtFlags & GTF_SIDE_EFFECT)) @@ -7668,17 +7769,39 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) } #endif - GenTreePtr stmtExpr = fgMorphStmt->gtStmt.gtStmtExpr; + GenTreePtr stmtExpr = fgMorphStmt->gtStmtExpr; #ifdef DEBUG // Tail call needs to be in one of the following IR forms // Either a call stmt or - // GT_RETURN(GT_CALL(..)) or - // var = call - noway_assert((stmtExpr->gtOper == GT_CALL && stmtExpr == call) || - (stmtExpr->gtOper == GT_RETURN && - (stmtExpr->gtOp.gtOp1 == call || stmtExpr->gtOp.gtOp1->gtOp.gtOp1 == call)) || - (stmtExpr->gtOper == GT_ASG && stmtExpr->gtOp.gtOp2 == call)); + // GT_RETURN(GT_CALL(..)) or GT_RETURN(GT_CAST(GT_CALL(..))) + // var = GT_CALL(..) or var = (GT_CAST(GT_CALL(..))) + genTreeOps stmtOper = stmtExpr->gtOper; + if (stmtOper == GT_CALL) + { + noway_assert(stmtExpr == call); + } + else + { + noway_assert(stmtOper == GT_RETURN || stmtOper == GT_ASG); + GenTreePtr treeWithCall; + if (stmtOper == GT_RETURN) + { + treeWithCall = stmtExpr->gtGetOp1(); + } + else + { + treeWithCall = stmtExpr->gtGetOp2(); + } + if (treeWithCall->gtOper == GT_CAST) + { + noway_assert(treeWithCall->gtGetOp1() == call && !treeWithCall->gtOverflow()); + } + else + { + noway_assert(treeWithCall == call); + } + } #endif // For void calls, we would have created a GT_CALL in the stmt list. @@ -7687,7 +7810,7 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) // For debuggable code, it would be an assignment of the call to a temp // We want to get rid of any of this extra trees, and just leave // the call. - GenTreePtr nextMorphStmt = fgMorphStmt->gtNext; + GenTreeStmt* nextMorphStmt = fgMorphStmt->gtNextStmt; #ifdef _TARGET_AMD64_ // Legacy Jit64 Compat: @@ -7703,46 +7826,46 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) if ((stmtExpr->gtOper == GT_CALL) || (stmtExpr->gtOper == GT_ASG)) { // First delete all GT_NOPs after the call - GenTreePtr morphStmtToRemove = nullptr; + GenTreeStmt* morphStmtToRemove = nullptr; while (nextMorphStmt != nullptr) { - GenTreePtr nextStmtExpr = nextMorphStmt->gtStmt.gtStmtExpr; + GenTreePtr nextStmtExpr = nextMorphStmt->gtStmtExpr; if (!nextStmtExpr->IsNothingNode()) { break; } morphStmtToRemove = nextMorphStmt; - nextMorphStmt = morphStmtToRemove->gtNext; + nextMorphStmt = morphStmtToRemove->gtNextStmt; fgRemoveStmt(compCurBB, morphStmtToRemove); } // Check to see if there is a pop. // Since tail call is honored, we can get rid of the stmt corresponding to pop. - if (nextMorphStmt != nullptr && nextMorphStmt->gtStmt.gtStmtExpr->gtOper != GT_RETURN) + if (nextMorphStmt != nullptr && nextMorphStmt->gtStmtExpr->gtOper != GT_RETURN) { // Note that pop opcode may or may not result in a new stmt (for details see // impImportBlockCode()). Hence, it is not possible to assert about the IR // form generated by pop but pop tree must be side-effect free so that we can // delete it safely. - GenTreePtr popStmt = nextMorphStmt; - nextMorphStmt = nextMorphStmt->gtNext; + GenTreeStmt* popStmt = nextMorphStmt; + nextMorphStmt = nextMorphStmt->gtNextStmt; - noway_assert((popStmt->gtStmt.gtStmtExpr->gtFlags & GTF_ALL_EFFECT) == 0); + noway_assert((popStmt->gtStmtExpr->gtFlags & GTF_ALL_EFFECT) == 0); fgRemoveStmt(compCurBB, popStmt); } // Next delete any GT_NOP nodes after pop while (nextMorphStmt != nullptr) { - GenTreePtr nextStmtExpr = nextMorphStmt->gtStmt.gtStmtExpr; + GenTreePtr nextStmtExpr = nextMorphStmt->gtStmtExpr; if (!nextStmtExpr->IsNothingNode()) { break; } morphStmtToRemove = nextMorphStmt; - nextMorphStmt = morphStmtToRemove->gtNext; + nextMorphStmt = morphStmtToRemove->gtNextStmt; fgRemoveStmt(compCurBB, morphStmtToRemove); } } @@ -7751,7 +7874,7 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) // Delete GT_RETURN if any if (nextMorphStmt != nullptr) { - GenTreePtr retExpr = nextMorphStmt->gtStmt.gtStmtExpr; + GenTreePtr retExpr = nextMorphStmt->gtStmtExpr; noway_assert(retExpr->gtOper == GT_RETURN); // If var=call, then the next stmt must be a GT_RETURN(TYP_VOID) or GT_RETURN(var). @@ -7766,7 +7889,7 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call) fgRemoveStmt(compCurBB, nextMorphStmt); } - fgMorphStmt->gtStmt.gtStmtExpr = call; + fgMorphStmt->gtStmtExpr = call; // Tail call via helper: The VM can't use return address hijacking if we're // not going to return and the helper doesn't have enough info to safely poll, @@ -7855,7 +7978,7 @@ NO_TAIL_CALL: || call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR) #endif ) && - (call == fgMorphStmt->gtStmt.gtStmtExpr)) + (call == fgMorphStmt->gtStmtExpr)) { // This is call to CORINFO_HELP_VIRTUAL_FUNC_PTR with ignored result. // Transform it into a null check. @@ -8008,31 +8131,72 @@ NO_TAIL_CALL: // This needs to be done after the arguments are morphed to ensure constant propagation has already taken place. if ((call->gtCallType == CT_HELPER) && (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ARRADDR_ST))) { - GenTreePtr value = gtArgEntryByArgNum(call, 2)->node; - + GenTree* value = gtArgEntryByArgNum(call, 2)->node; if (value->IsIntegralConst(0)) { assert(value->OperGet() == GT_CNS_INT); - GenTreePtr arr = gtArgEntryByArgNum(call, 0)->node; - GenTreePtr index = gtArgEntryByArgNum(call, 1)->node; - arr = gtClone(arr, true); - if (arr != nullptr) + GenTree* arr = gtArgEntryByArgNum(call, 0)->node; + GenTree* index = gtArgEntryByArgNum(call, 1)->node; + + // Either or both of the array and index arguments may have been spilled to temps by `fgMorphArgs`. Copy + // the spill trees as well if necessary. + GenTreeOp* argSetup = nullptr; + for (GenTreeArgList* earlyArgs = call->gtCallArgs; earlyArgs != nullptr; earlyArgs = earlyArgs->Rest()) { - index = gtClone(index, true); - if (index != nullptr) + GenTree* const arg = earlyArgs->Current(); + if (arg->OperGet() != GT_ASG) { - value = gtClone(value); - noway_assert(value != nullptr); + continue; + } + + assert(arg != arr); + assert(arg != index); - GenTreePtr nullCheckedArr = impCheckForNullPointer(arr); - GenTreePtr arrIndexNode = gtNewIndexRef(TYP_REF, nullCheckedArr, index); - GenTreePtr arrStore = gtNewAssignNode(arrIndexNode, value); - arrStore->gtFlags |= GTF_ASG; + arg->gtFlags &= ~GTF_LATE_ARG; - return fgMorphTree(arrStore); + GenTree* op1 = argSetup; + if (op1 == nullptr) + { + op1 = gtNewNothingNode(); +#if DEBUG + op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; +#endif // DEBUG } + + argSetup = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, op1, arg); + +#if DEBUG + argSetup->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; +#endif // DEBUG } + +#ifdef DEBUG + auto resetMorphedFlag = [](GenTree** slot, fgWalkData* data) -> fgWalkResult { + (*slot)->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED; + return WALK_CONTINUE; + }; + + fgWalkTreePost(&arr, resetMorphedFlag); + fgWalkTreePost(&index, resetMorphedFlag); + fgWalkTreePost(&value, resetMorphedFlag); +#endif // DEBUG + + GenTree* const nullCheckedArr = impCheckForNullPointer(arr); + GenTree* const arrIndexNode = gtNewIndexRef(TYP_REF, nullCheckedArr, index); + GenTree* const arrStore = gtNewAssignNode(arrIndexNode, value); + arrStore->gtFlags |= GTF_ASG; + + GenTree* result = fgMorphTree(arrStore); + if (argSetup != nullptr) + { + result = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, argSetup, result); +#if DEBUG + result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; +#endif // DEBUG + } + + return result; } } @@ -8187,8 +8351,14 @@ GenTreePtr Compiler::fgMorphLeaf(GenTreePtr tree) { GenTreePtr newTree = fgMorphStackArgForVarArgs(tree->gtLclFld.gtLclNum, tree->gtType, tree->gtLclFld.gtLclOffs); - if (newTree != NULL) + if (newTree != nullptr) + { + if (newTree->OperIsBlk() && ((tree->gtFlags & GTF_VAR_DEF) == 0)) + { + fgMorphBlkToInd(newTree->AsBlk(), newTree->gtType); + } return newTree; + } } } #endif // _TARGET_X86_ @@ -8390,7 +8560,7 @@ GenTreePtr Compiler::fgMorphOneAsgBlockOp(GenTreePtr tree) // with the bits to create a single assigment. noway_assert(size <= REGSIZE_BYTES); - if (isInitBlock && (src->gtOper != GT_CNS_INT)) + if (isInitBlock && !src->IsConstInitVal()) { return nullptr; } @@ -8563,8 +8733,12 @@ GenTreePtr Compiler::fgMorphOneAsgBlockOp(GenTreePtr tree) } else #endif - if (src->IsCnsIntOrI()) { + if (src->OperIsInitVal()) + { + src = src->gtGetOp1(); + } + assert(src->IsCnsIntOrI()); // This will mutate the integer constant, in place, to be the correct // value for the type we are using in the assignment. src->AsIntCon()->FixupInitBlkValue(asgType); @@ -8632,7 +8806,8 @@ GenTreePtr Compiler::fgMorphOneAsgBlockOp(GenTreePtr tree) GenTreePtr Compiler::fgMorphInitBlock(GenTreePtr tree) { - noway_assert(tree->gtOper == GT_ASG && varTypeIsStruct(tree)); + // We must have the GT_ASG form of InitBlkOp. + noway_assert((tree->OperGet() == GT_ASG) && tree->OperIsInitBlkOp()); #ifdef DEBUG bool morphed = false; #endif // DEBUG @@ -8647,6 +8822,12 @@ GenTreePtr Compiler::fgMorphInitBlock(GenTreePtr tree) tree->gtOp.gtOp1 = dest; } tree->gtType = dest->TypeGet(); + // (Constant propagation may cause a TYP_STRUCT lclVar to be changed to GT_CNS_INT, and its + // type will be the type of the original lclVar, in which case we will change it to TYP_INT). + if ((src->OperGet() == GT_CNS_INT) && varTypeIsStruct(src)) + { + src->gtType = TYP_INT; + } JITDUMP("\nfgMorphInitBlock:"); GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree); @@ -8658,7 +8839,7 @@ GenTreePtr Compiler::fgMorphInitBlock(GenTreePtr tree) else { GenTree* destAddr = nullptr; - GenTree* initVal = src; + GenTree* initVal = src->OperIsInitVal() ? src->gtGetOp1() : src; GenTree* blockSize = nullptr; unsigned blockWidth = 0; FieldSeqNode* destFldSeq = nullptr; @@ -8727,6 +8908,7 @@ GenTreePtr Compiler::fgMorphInitBlock(GenTreePtr tree) if (destLclVar->lvPromoted && blockWidthIsConst) { + assert(initVal->OperGet() == GT_CNS_INT); noway_assert(varTypeIsStruct(destLclVar)); noway_assert(!opts.MinOpts()); if (destLclVar->lvAddrExposed & destLclVar->lvContainsHoles) @@ -8786,25 +8968,9 @@ GenTreePtr Compiler::fgMorphInitBlock(GenTreePtr tree) #if CPU_USES_BLOCK_MOVE compBlkOpUsed = true; #endif - if (!dest->OperIsBlk()) - { - GenTree* destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest); - CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(dest); - if (clsHnd == NO_CLASS_HANDLE) - { - dest = new (this, GT_BLK) GenTreeBlk(GT_BLK, dest->TypeGet(), destAddr, blockWidth); - } - else - { - GenTree* newDest = gtNewObjNode(clsHnd, destAddr); - if (newDest->OperGet() == GT_OBJ) - { - gtSetObjGcInfo(newDest->AsObj()); - } - dest = newDest; - } - tree->gtOp.gtOp1 = dest; - } + dest = fgMorphBlockOperand(dest, dest->TypeGet(), blockWidth, true); + tree->gtOp.gtOp1 = dest; + tree->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT); } else { @@ -9068,9 +9234,18 @@ GenTree* Compiler::fgMorphBlkNode(GenTreePtr tree, bool isDest) if (blkNode->AsDynBlk()->gtDynamicSize->IsCnsIntOrI()) { unsigned size = (unsigned)blkNode->AsDynBlk()->gtDynamicSize->AsIntConCommon()->IconValue(); - blkNode->AsDynBlk()->gtDynamicSize = nullptr; - blkNode->ChangeOper(GT_BLK); - blkNode->gtBlkSize = size; + // A GT_BLK with size of zero is not supported, + // so if we encounter such a thing we just leave it as a GT_DYN_BLK + if (size != 0) + { + blkNode->AsDynBlk()->gtDynamicSize = nullptr; + blkNode->ChangeOper(GT_BLK); + blkNode->gtBlkSize = size; + } + else + { + return tree; + } } else { @@ -9104,7 +9279,7 @@ GenTree* Compiler::fgMorphBlkNode(GenTreePtr tree, bool isDest) // // Notes: // This does the following: -// - Ensures that a struct operand is a block node. +// - Ensures that a struct operand is a block node or (for non-LEGACY_BACKEND) lclVar. // - Ensures that any COMMAs are above ADDR nodes. // Although 'tree' WAS an operand of a block assignment, the assignment // may have been retyped to be a scalar assignment. @@ -9113,10 +9288,6 @@ GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigne { GenTree* effectiveVal = tree->gtEffectiveVal(); - // TODO-1stClassStucts: We would like to transform non-TYP_STRUCT nodes to - // either plain lclVars or GT_INDs. However, for now we want to preserve most - // of the block nodes until the Rationalizer. - if (!varTypeIsStruct(asgType)) { if (effectiveVal->OperIsIndir()) @@ -9143,69 +9314,141 @@ GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigne } else { + GenTreeIndir* indirTree = nullptr; + GenTreeLclVarCommon* lclNode = nullptr; + bool needsIndirection = true; + + if (effectiveVal->OperIsIndir()) + { + indirTree = effectiveVal->AsIndir(); + GenTree* addr = effectiveVal->AsIndir()->Addr(); + if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->OperGet() == GT_LCL_VAR)) + { + lclNode = addr->gtGetOp1()->AsLclVarCommon(); + } + } + else if (effectiveVal->OperGet() == GT_LCL_VAR) + { + lclNode = effectiveVal->AsLclVarCommon(); + } #ifdef FEATURE_SIMD if (varTypeIsSIMD(asgType)) { - if (effectiveVal->OperIsIndir()) + if ((indirTree != nullptr) && (lclNode == nullptr) && (indirTree->Addr()->OperGet() == GT_ADDR) && + (indirTree->Addr()->gtGetOp1()->gtOper == GT_SIMD)) { - GenTree* addr = effectiveVal->AsIndir()->Addr(); - if (!isDest && (addr->OperGet() == GT_ADDR)) - { - if ((addr->gtGetOp1()->gtOper == GT_SIMD) || (addr->gtGetOp1()->OperGet() == GT_LCL_VAR)) - { - effectiveVal = addr->gtGetOp1(); - } - } - else if (isDest && !effectiveVal->OperIsBlk()) - { - effectiveVal = new (this, GT_BLK) GenTreeBlk(GT_BLK, asgType, addr, blockWidth); - } + assert(!isDest); + needsIndirection = false; + effectiveVal = indirTree->Addr()->gtGetOp1(); } - else if (!effectiveVal->OperIsSIMD() && (!effectiveVal->IsLocal() || isDest) && !effectiveVal->OperIsBlk()) + if (effectiveVal->OperIsSIMD()) { - GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal); - effectiveVal = new (this, GT_BLK) GenTreeBlk(GT_BLK, asgType, addr, blockWidth); + needsIndirection = false; } } - else #endif // FEATURE_SIMD - if (!effectiveVal->OperIsBlk()) + if (lclNode != nullptr) + { + LclVarDsc* varDsc = &(lvaTable[lclNode->gtLclNum]); + if (varTypeIsStruct(varDsc) && (varDsc->lvExactSize == blockWidth)) + { +#ifndef LEGACY_BACKEND + effectiveVal = lclNode; + needsIndirection = false; +#endif // !LEGACY_BACKEND + } + else + { + // This may be a lclVar that was determined to be address-exposed. + effectiveVal->gtFlags |= (lclNode->gtFlags & GTF_ALL_EFFECT); + } + } + if (needsIndirection) { - GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal); - CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(effectiveVal); - GenTree* newTree; - if (clsHnd == NO_CLASS_HANDLE) + if (indirTree != nullptr) { - newTree = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, addr, blockWidth); + // We should never find a struct indirection on the lhs of an assignment. + assert(!isDest || indirTree->OperIsBlk()); + if (!isDest && indirTree->OperIsBlk()) + { + (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType); + } } else { - newTree = gtNewObjNode(clsHnd, addr); - if (isDest && (newTree->OperGet() == GT_OBJ)) + GenTree* newTree; + GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal); + if (isDest) { - gtSetObjGcInfo(newTree->AsObj()); + CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(effectiveVal); + if (clsHnd == NO_CLASS_HANDLE) + { + newTree = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, addr, blockWidth); + } + else + { + newTree = gtNewObjNode(clsHnd, addr); + if (isDest && (newTree->OperGet() == GT_OBJ)) + { + gtSetObjGcInfo(newTree->AsObj()); + } + if (effectiveVal->IsLocal() && ((effectiveVal->gtFlags & GTF_GLOB_EFFECT) == 0)) + { + // This is not necessarily a global reference, though gtNewObjNode always assumes it is. + // TODO-1stClassStructs: This check should be done in the GenTreeObj constructor, + // where it currently sets GTF_GLOB_EFFECT unconditionally, but it is handled + // separately now to avoid excess diffs. + newTree->gtFlags &= ~(GTF_GLOB_EFFECT); + } + } } - if (effectiveVal->IsLocal() && ((effectiveVal->gtFlags & GTF_GLOB_EFFECT) == 0)) + else { - // This is not necessarily a global reference, though gtNewObjNode always assumes it is. - // TODO-1stClassStructs: This check should be done in the GenTreeObj constructor, - // where it currently sets GTF_GLOB_EFFECT unconditionally, but it is handled - // separately now to avoid excess diffs. - newTree->gtFlags &= ~(GTF_GLOB_EFFECT); + newTree = new (this, GT_IND) GenTreeIndir(GT_IND, asgType, addr, nullptr); } + effectiveVal = newTree; } - effectiveVal = newTree; } } - if (!isDest && effectiveVal->OperIsBlk()) - { - (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType); - } tree = effectiveVal; return tree; } //------------------------------------------------------------------------ +// fgMorphUnsafeBlk: Convert a CopyObj with a dest on the stack to a GC Unsafe CopyBlk +// +// Arguments: +// dest - the GT_OBJ or GT_STORE_OBJ +// +// Assumptions: +// The destination must be known (by the caller) to be on the stack. +// +// Notes: +// If we have a CopyObj with a dest on the stack, and its size is small enouch +// to be completely unrolled (i.e. between [16..64] bytes), we will convert it into a +// GC Unsafe CopyBlk that is non-interruptible. +// This is not supported for the JIT32_GCENCODER, in which case this method is a no-op. +// +void Compiler::fgMorphUnsafeBlk(GenTreeObj* dest) +{ +#if defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER) + assert(dest->gtGcPtrCount != 0); + unsigned blockWidth = dest->AsBlk()->gtBlkSize; +#ifdef DEBUG + bool destOnStack = false; + GenTree* destAddr = dest->Addr(); + assert(destAddr->IsLocalAddrExpr() != nullptr); +#endif + if ((blockWidth >= (2 * TARGET_POINTER_SIZE)) && (blockWidth <= CPBLK_UNROLL_LIMIT)) + { + genTreeOps newOper = (dest->gtOper == GT_OBJ) ? GT_BLK : GT_STORE_BLK; + dest->SetOper(newOper); + dest->AsBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsafe copy block + } +#endif // defined(CPBLK_UNROLL_LIMIT) && !defined(JIT32_GCENCODER) +} + +//------------------------------------------------------------------------ // fgMorphCopyBlock: Perform the Morphing of block copy // // Arguments: @@ -9444,6 +9687,14 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree) bool requiresCopyBlock = false; bool srcSingleLclVarAsg = false; + if ((destLclVar != nullptr) && (srcLclVar == destLclVar)) + { + // Beyond perf reasons, it is not prudent to have a copy of a struct to itself. + GenTree* nop = gtNewNothingNode(); + INDEBUG(nop->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + return nop; + } + // If either src or dest is a reg-sized non-field-addressed struct, keep the copyBlock. if ((destLclVar != nullptr && destLclVar->lvRegStruct) || (srcLclVar != nullptr && srcLclVar->lvRegStruct)) { @@ -9485,12 +9736,19 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree) // Are both dest and src promoted structs? if (destDoFldAsg && srcDoFldAsg) { - // Both structs should be of the same type, if not we will use a copy block + // Both structs should be of the same type, or each have a single field of the same type. + // If not we will use a copy block. if (lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() != lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle()) { - requiresCopyBlock = true; // Mismatched types, leave as a CopyBlock - JITDUMP(" with mismatched types"); + unsigned destFieldNum = lvaTable[destLclNum].lvFieldLclStart; + unsigned srcFieldNum = lvaTable[srcLclNum].lvFieldLclStart; + if ((lvaTable[destLclNum].lvFieldCnt != 1) || (lvaTable[srcLclNum].lvFieldCnt != 1) || + (lvaTable[destFieldNum].lvType != lvaTable[srcFieldNum].lvType)) + { + requiresCopyBlock = true; // Mismatched types, leave as a CopyBlock + JITDUMP(" with mismatched types"); + } } } // Are neither dest or src promoted structs? @@ -9584,34 +9842,24 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree) var_types asgType = dest->TypeGet(); dest = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/); asg->gtOp.gtOp1 = dest; - hasGCPtrs = ((dest->OperGet() == GT_OBJ) && (dest->AsObj()->gtGcPtrCount != 0)); + asg->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT); -#ifdef CPBLK_UNROLL_LIMIT // Note that the unrolling of CopyBlk is only implemented on some platforms. - // Currently that includes x64 and Arm64 but not x64 or Arm32. + // Currently that includes x64 and ARM but not x86: the code generation for this + // construct requires the ability to mark certain regions of the generated code + // as non-interruptible, and the GC encoding for the latter platform does not + // have this capability. // If we have a CopyObj with a dest on the stack // we will convert it into an GC Unsafe CopyBlk that is non-interruptible - // when its size is small enouch to be completely unrolled (i.e. between [16..64] bytes) + // when its size is small enouch to be completely unrolled (i.e. between [16..64] bytes). + // (This is not supported for the JIT32_GCENCODER, for which fgMorphUnsafeBlk is a no-op.) // - if (hasGCPtrs && destOnStack && blockWidthIsConst && (blockWidth >= (2 * TARGET_POINTER_SIZE)) && - (blockWidth <= CPBLK_UNROLL_LIMIT)) + if (destOnStack && (dest->OperGet() == GT_OBJ)) { - if (dest->OperGet() == GT_OBJ) - { - dest->SetOper(GT_BLK); - dest->AsBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsafe copy block - } - else - { - assert(dest->OperIsLocal()); - GenTree* destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest); - dest = new (this, GT_BLK) GenTreeBlk(GT_BLK, dest->TypeGet(), destAddr, blockWidth); - dest->AsBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsafe copy block - tree->gtOp.gtOp1 = dest; - } + fgMorphUnsafeBlk(dest->AsObj()); } -#endif + // Eliminate the "OBJ or BLK" node on the rhs. rhs = fgMorphBlockOperand(rhs, asgType, blockWidth, false /*!isDest*/); asg->gtOp.gtOp2 = rhs; @@ -9659,8 +9907,6 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree) // To do fieldwise assignments for both sides, they'd better be the same struct type! // All of these conditions were checked above... assert(destLclNum != BAD_VAR_NUM && srcLclNum != BAD_VAR_NUM); - assert(lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() == - lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle()); assert(destLclVar != nullptr && srcLclVar != nullptr && destLclVar->lvFieldCnt == srcLclVar->lvFieldCnt); fieldCnt = destLclVar->lvFieldCnt; @@ -10354,23 +10600,12 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) /* fgDoNormalizeOnStore can change op2 */ noway_assert(op1 == tree->gtOp.gtOp1); op2 = tree->gtOp.gtOp2; - // TODO-1stClassStructs: this is here to match previous behavior, but results in some - // unnecessary pessimization in the handling of addresses in fgMorphCopyBlock(). - if (tree->OperIsBlkOp()) - { - op1->gtFlags |= GTF_DONT_CSE; - if (tree->OperIsCopyBlkOp() && - (op2->IsLocal() || (op2->OperIsIndir() && (op2->AsIndir()->Addr()->OperGet() == GT_ADDR)))) - { - op2->gtFlags |= GTF_DONT_CSE; - } - } #ifdef FEATURE_SIMD { // We should check whether op2 should be assigned to a SIMD field or not. // If it is, we should tranlate the tree to simd intrinsic. - assert((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0); + assert(!fgGlobalMorph || ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0)); GenTreePtr newTree = fgMorphFieldAssignToSIMDIntrinsicSet(tree); typ = tree->TypeGet(); op1 = tree->gtGetOp1(); @@ -10451,8 +10686,8 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) case GT_COLON: #if LOCAL_ASSERTION_PROP if (optLocalAssertionProp) - { #endif + { isQmarkColon = true; } break; @@ -10608,13 +10843,6 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) { op2 = gtFoldExprConst(op2); } - - if (fgShouldUseMagicNumberDivide(tree->AsOp())) - { - tree = fgMorphDivByConst(tree->AsOp()); - op1 = tree->gtOp.gtOp1; - op2 = tree->gtOp.gtOp2; - } #endif // !LEGACY_BACKEND break; @@ -10673,44 +10901,44 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) // Note for _TARGET_ARMARCH_ we don't have a remainder instruction, so we don't do this optimization // #else // _TARGET_XARCH - /* If this is an unsigned long mod with op2 which is a cast to long from a - constant int, then don't morph to a call to the helper. This can be done - faster inline using idiv. - */ + /* If this is an unsigned long mod with op2 which is a cast to long from a + constant int, then don't morph to a call to the helper. This can be done + faster inline using idiv. + */ - noway_assert(op2); - if ((typ == TYP_LONG) && opts.OptEnabled(CLFLG_CONSTANTFOLD) && - ((tree->gtFlags & GTF_UNSIGNED) == (op1->gtFlags & GTF_UNSIGNED)) && - ((tree->gtFlags & GTF_UNSIGNED) == (op2->gtFlags & GTF_UNSIGNED))) - { - if (op2->gtOper == GT_CAST && op2->gtCast.CastOp()->gtOper == GT_CNS_INT && - op2->gtCast.CastOp()->gtIntCon.gtIconVal >= 2 && - op2->gtCast.CastOp()->gtIntCon.gtIconVal <= 0x3fffffff && - (tree->gtFlags & GTF_UNSIGNED) == (op2->gtCast.CastOp()->gtFlags & GTF_UNSIGNED)) - { - tree->gtOp.gtOp2 = op2 = fgMorphCast(op2); - noway_assert(op2->gtOper == GT_CNS_NATIVELONG); - } + noway_assert(op2); + if ((typ == TYP_LONG) && opts.OptEnabled(CLFLG_CONSTANTFOLD) && + ((tree->gtFlags & GTF_UNSIGNED) == (op1->gtFlags & GTF_UNSIGNED)) && + ((tree->gtFlags & GTF_UNSIGNED) == (op2->gtFlags & GTF_UNSIGNED))) + { + if (op2->gtOper == GT_CAST && op2->gtCast.CastOp()->gtOper == GT_CNS_INT && + op2->gtCast.CastOp()->gtIntCon.gtIconVal >= 2 && + op2->gtCast.CastOp()->gtIntCon.gtIconVal <= 0x3fffffff && + (tree->gtFlags & GTF_UNSIGNED) == (op2->gtCast.CastOp()->gtFlags & GTF_UNSIGNED)) + { + tree->gtOp.gtOp2 = op2 = fgMorphCast(op2); + noway_assert(op2->gtOper == GT_CNS_NATIVELONG); + } - if (op2->gtOper == GT_CNS_NATIVELONG && op2->gtIntConCommon.LngValue() >= 2 && - op2->gtIntConCommon.LngValue() <= 0x3fffffff) - { - tree->gtOp.gtOp1 = op1 = fgMorphTree(op1); - noway_assert(op1->TypeGet() == TYP_LONG); + if (op2->gtOper == GT_CNS_NATIVELONG && op2->gtIntConCommon.LngValue() >= 2 && + op2->gtIntConCommon.LngValue() <= 0x3fffffff) + { + tree->gtOp.gtOp1 = op1 = fgMorphTree(op1); + noway_assert(op1->TypeGet() == TYP_LONG); - // Update flags for op1 morph - tree->gtFlags &= ~GTF_ALL_EFFECT; + // Update flags for op1 morph + tree->gtFlags &= ~GTF_ALL_EFFECT; - tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); // Only update with op1 as op2 is a constant + tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); // Only update with op1 as op2 is a constant - // If op1 is a constant, then do constant folding of the division operator - if (op1->gtOper == GT_CNS_NATIVELONG) - { - tree = gtFoldExpr(tree); + // If op1 is a constant, then do constant folding of the division operator + if (op1->gtOper == GT_CNS_NATIVELONG) + { + tree = gtFoldExpr(tree); + } + return tree; } - return tree; } - } #endif // _TARGET_XARCH ASSIGN_HELPER_FOR_MOD: @@ -10773,16 +11001,28 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) tree = fgMorphModToSubMulDiv(tree->AsOp()); op1 = tree->gtOp.gtOp1; op2 = tree->gtOp.gtOp2; - -#else // !_TARGET_ARM64_ - - if (oper != GT_UMOD && fgShouldUseMagicNumberDivide(tree->AsOp())) - { - tree = fgMorphModByConst(tree->AsOp()); - op1 = tree->gtOp.gtOp1; - op2 = tree->gtOp.gtOp2; +#else //_TARGET_ARM64_ + // If b is not a power of 2 constant then lowering replaces a % b + // with a - (a / b) * b and applies magic division optimization to + // a / b. The code may already contain an a / b expression (e.g. + // x = a / 10; y = a % 10;) and then we end up with redundant code. + // If we convert % to / here we give CSE the opportunity to eliminate + // the redundant division. If there's no redundant division then + // nothing is lost, lowering would have done this transform anyway. + + if ((tree->OperGet() == GT_MOD) && op2->IsIntegralConst()) + { + ssize_t divisorValue = op2->AsIntCon()->IconValue(); + size_t absDivisorValue = (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue) + : static_cast<size_t>(abs(divisorValue)); + + if (!isPow2(absDivisorValue)) + { + tree = fgMorphModToSubMulDiv(tree->AsOp()); + op1 = tree->gtOp.gtOp1; + op2 = tree->gtOp.gtOp2; + } } - #endif //_TARGET_ARM64_ #endif // !LEGACY_BACKEND break; @@ -10857,12 +11097,12 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) ((op2->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) || (op2->gtCall.gtCallType == CT_HELPER))) #else - if ((((op1->gtOper == GT_INTRINSIC) && - (op1->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) || - ((op1->gtOper == GT_CALL) && (op1->gtCall.gtCallType == CT_HELPER))) && - (((op2->gtOper == GT_INTRINSIC) && - (op2->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) || - ((op2->gtOper == GT_CALL) && (op2->gtCall.gtCallType == CT_HELPER)))) + if ((((op1->gtOper == GT_INTRINSIC) && + (op1->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) || + ((op1->gtOper == GT_CALL) && (op1->gtCall.gtCallType == CT_HELPER))) && + (((op2->gtOper == GT_INTRINSIC) && + (op2->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) || + ((op2->gtOper == GT_CALL) && (op2->gtCall.gtCallType == CT_HELPER)))) #endif { GenTreePtr pGetClassFromHandle; @@ -10872,8 +11112,8 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) bool bOp1ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op1); bool bOp2ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op2); #else - bool bOp1ClassFromHandle = op1->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op1) : false; - bool bOp2ClassFromHandle = op2->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op2) : false; + bool bOp1ClassFromHandle = op1->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op1) : false; + bool bOp2ClassFromHandle = op2->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op2) : false; #endif // Optimize typeof(...) == typeof(...) @@ -10929,8 +11169,8 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) info.compCompHnd->getIntrinsicID(pGetType->gtCall.gtCallMethHnd) == CORINFO_INTRINSIC_Object_GetType && #else - if ((pGetType->gtOper == GT_INTRINSIC) && - (pGetType->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType) && + if ((pGetType->gtOper == GT_INTRINSIC) && + (pGetType->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType) && #endif pConstLiteral->gtOper == GT_CNS_INT && pConstLiteral->gtType == TYP_I_IMPL) { @@ -10944,7 +11184,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) #ifdef LEGACY_BACKEND GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtCall.gtCallObjp); #else - GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtUnOp.gtOp1); + GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtUnOp.gtOp1); #endif objMT->gtFlags |= GTF_EXCEPT; // Null ref exception if object is null compCurBB->bbFlags |= BBF_HAS_VTABREF; @@ -11041,7 +11281,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) // Assume it's an Ind context to start. MorphAddrContext subIndMac1(MACK_Ind); MorphAddrContext* subMac1 = mac; - if (subMac1 == nullptr || subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_CopyBlock) + if (subMac1 == nullptr || subMac1->m_kind == MACK_Ind) { switch (tree->gtOper) { @@ -11532,7 +11772,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) // // EQ/NE // / \ - // op1 CNS 0/1 + // op1 CNS 0/1 // ival2 = INT_MAX; // The value of INT_MAX for ival2 just means that the constant value is not 0 or 1 @@ -11557,11 +11797,11 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) // // EQ/NE Possible REVERSE(RELOP) // / \ / \ - // COMMA CNS 0/1 -> COMMA relop_op2 + // COMMA CNS 0/1 -> COMMA relop_op2 // / \ / \ - // x RELOP x relop_op1 + // x RELOP x relop_op1 // / \ - // relop_op1 relop_op2 + // relop_op1 relop_op2 // // // @@ -11600,13 +11840,13 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) // // EQ/NE EQ/NE // / \ / \ - // COMMA CNS 0/1 -> RELOP CNS 0/1 + // COMMA CNS 0/1 -> RELOP CNS 0/1 // / \ / \ - // ASG LCL_VAR + // ASG LCL_VAR // / \ - // LCL_VAR RELOP + // LCL_VAR RELOP // / \ - // + // GenTreePtr asg = op1->gtOp.gtOp1; GenTreePtr lcl = op1->gtOp.gtOp2; @@ -11689,9 +11929,9 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) // // EQ/NE -> RELOP/!RELOP // / \ / \ - // RELOP CNS 0/1 + // RELOP CNS 0/1 // / \ - // + // // Note that we will remove/destroy the EQ/NE node and move // the RELOP up into it's location. @@ -11721,11 +11961,11 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) // // EQ/NE EQ/NE // / \ / \ - // AND CNS 0/1 -> AND CNS 0 + // AND CNS 0/1 -> AND CNS 0 // / \ / \ - // RSZ/RSH CNS 1 x CNS (1 << y) + // RSZ/RSH CNS 1 x CNS (1 << y) // / \ - // x CNS_INT +y + // x CNS_INT +y if (op1->gtOper == GT_AND) { @@ -12121,38 +12361,42 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) goto CM_OVF_OP; } - /* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */ - - noway_assert(op2); - if (op2->IsCnsIntOrI()) + // TODO #4104: there are a lot of other places where + // this condition is not checked before transformations. + if (fgGlobalMorph) { - /* Negate the constant and change the node to be "+" */ + /* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */ - op2->gtIntConCommon.SetIconValue(-op2->gtIntConCommon.IconValue()); - oper = GT_ADD; - tree->ChangeOper(oper); - goto CM_ADD_OP; - } + noway_assert(op2); + if (op2->IsCnsIntOrI()) + { + /* Negate the constant and change the node to be "+" */ - /* Check for "cns1 - op2" , we change it to "(cns1 + (-op2))" */ + op2->gtIntConCommon.SetIconValue(-op2->gtIntConCommon.IconValue()); + oper = GT_ADD; + tree->ChangeOper(oper); + goto CM_ADD_OP; + } - noway_assert(op1); - if (op1->IsCnsIntOrI()) - { - noway_assert(varTypeIsIntOrI(tree)); + /* Check for "cns1 - op2" , we change it to "(cns1 + (-op2))" */ - tree->gtOp.gtOp2 = op2 = - gtNewOperNode(GT_NEG, tree->gtType, op2); // The type of the new GT_NEG node should be the same - // as the type of the tree, i.e. tree->gtType. - fgMorphTreeDone(op2); + noway_assert(op1); + if (op1->IsCnsIntOrI()) + { + noway_assert(varTypeIsIntOrI(tree)); - oper = GT_ADD; - tree->ChangeOper(oper); - goto CM_ADD_OP; - } + tree->gtOp.gtOp2 = op2 = gtNewOperNode(GT_NEG, tree->gtType, op2); // The type of the new GT_NEG + // node should be the same + // as the type of the tree, i.e. tree->gtType. + fgMorphTreeDone(op2); - /* No match - exit */ + oper = GT_ADD; + tree->ChangeOper(oper); + goto CM_ADD_OP; + } + /* No match - exit */ + } break; #ifdef _TARGET_ARM64_ @@ -12281,7 +12525,8 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) // Dereferencing the pointer in either case will have the // same effect. - if (!gtIsActiveCSE_Candidate(op1) && varTypeIsGC(op2->TypeGet())) + if (!optValnumCSE_phase && varTypeIsGC(op2->TypeGet()) && + ((op1->gtFlags & GTF_ALL_EFFECT) == 0)) { op2->gtType = tree->gtType; DEBUG_DESTROY_NODE(op1); @@ -12520,7 +12765,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) // Also make sure that the tree type matches the fieldVarType and that it's lvFldOffset // is zero - if (fieldVarDsc->TypeGet() == tree->TypeGet() && (fieldVarDsc->lvFldOffset == 0)) + if (fieldVarDsc->TypeGet() == typ && (fieldVarDsc->lvFldOffset == 0)) { // We can just use the existing promoted field LclNum temp->gtLclVarCommon.SetLclNum(lclNumFld); @@ -12538,8 +12783,8 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) else if (varTypeIsSmall(typ) && (genTypeSize(lvaTable[lclNum].lvType) == genTypeSize(typ)) && !lvaTable[lclNum].lvNormalizeOnLoad()) { - tree->gtType = temp->gtType; - foldAndReturnTemp = true; + tree->gtType = typ = temp->TypeGet(); + foldAndReturnTemp = true; } else { @@ -12554,7 +12799,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) // Append the field sequence, change the type. temp->AsLclFld()->gtFieldSeq = GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq); - temp->gtType = tree->TypeGet(); + temp->gtType = typ; foldAndReturnTemp = true; } @@ -12623,9 +12868,9 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) #ifdef _TARGET_ARM_ // Check for a LclVar TYP_STRUCT with misalignment on a Floating Point field // - if (varTypeIsFloating(tree->TypeGet())) + if (varTypeIsFloating(typ)) { - if ((ival1 % emitTypeSize(tree->TypeGet())) != 0) + if ((ival1 % emitTypeSize(typ)) != 0) { tree->gtFlags |= GTF_IND_UNALIGNED; break; @@ -12638,24 +12883,35 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) } } -#ifdef DEBUG - // If we have decided to fold, then temp cannot be nullptr - if (foldAndReturnTemp) - { - assert(temp != nullptr); - } -#endif - - if (temp != nullptr) - { - noway_assert(op1->gtOper == GT_ADD || op1->gtOper == GT_ADDR); - - // If we haven't already decided to fold this expression - // - if (!foldAndReturnTemp) + // At this point we may have a lclVar or lclFld that might be foldable with a bit of extra massaging: + // - We may have a load of a local where the load has a different type than the local + // - We may have a load of a local plus an offset + // + // In these cases, we will change the lclVar or lclFld into a lclFld of the appropriate type and + // offset if doing so is legal. The only cases in which this transformation is illegal are if the load + // begins before the local or if the load extends beyond the end of the local (i.e. if the load is + // out-of-bounds w.r.t. the local). + if ((temp != nullptr) && !foldAndReturnTemp) + { + assert(temp->OperIsLocal()); + + const unsigned lclNum = temp->AsLclVarCommon()->gtLclNum; + LclVarDsc* const varDsc = &lvaTable[lclNum]; + + const var_types tempTyp = temp->TypeGet(); + const bool useExactSize = + varTypeIsStruct(tempTyp) || (tempTyp == TYP_BLK) || (tempTyp == TYP_LCLBLK); + const unsigned varSize = useExactSize ? varDsc->lvExactSize : genTypeSize(temp); + + // If the size of the load is greater than the size of the lclVar, we cannot fold this access into + // a lclFld: the access represented by an lclFld node must begin at or after the start of the + // lclVar and must not extend beyond the end of the lclVar. + if ((ival1 < 0) || ((ival1 + genTypeSize(typ)) > varSize)) + { + lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField)); + } + else { - noway_assert(temp->OperIsLocal()); - LclVarDsc* varDsc = &(lvaTable[temp->AsLclVarCommon()->gtLclNum]); // Make sure we don't separately promote the fields of this struct. if (varDsc->lvRegStruct) { @@ -12664,7 +12920,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) } else { - lvaSetVarDoNotEnregister(temp->gtLclVarCommon.gtLclNum DEBUGARG(DNER_LocalField)); + lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField)); } // We will turn a GT_LCL_VAR into a GT_LCL_FLD with an gtLclOffs of 'ival' @@ -12689,19 +12945,19 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) temp->gtType = tree->gtType; foldAndReturnTemp = true; } + } - assert(foldAndReturnTemp == true); + if (foldAndReturnTemp) + { + assert(temp != nullptr); + assert(temp->TypeGet() == typ); + assert((op1->OperGet() == GT_ADD) || (op1->OperGet() == GT_ADDR)); - // Keep the DONT_CSE flag in sync - // (i.e keep the original value of this flag from tree) - // as it can be set for 'temp' because a GT_ADDR always marks it for it's op1 - // + // Copy the value of GTF_DONT_CSE from the original tree to `temp`: it can be set for + // 'temp' because a GT_ADDR always marks it for its operand. temp->gtFlags &= ~GTF_DONT_CSE; temp->gtFlags |= (tree->gtFlags & GTF_DONT_CSE); - noway_assert(op1->gtOper == GT_ADD || op1->gtOper == GT_ADDR); - noway_assert(temp->gtType == tree->gtType); - if (op1->OperGet() == GT_ADD) { DEBUG_DESTROY_NODE(op1->gtOp.gtOp1); // GT_ADDR @@ -12984,7 +13240,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac) // If we are in the Valuenum CSE phase then don't morph away anything as these // nodes may have CSE defs/uses in them. // - if (!optValnumCSE_phase && (oper != GT_ASG) && (oper != GT_COLON) && !tree->IsList()) + if (!optValnumCSE_phase && (oper != GT_ASG) && (oper != GT_COLON) && !tree->OperIsAnyList()) { /* Check for op1 as a GT_COMMA with a unconditional throw node */ if (op1 && fgIsCommaThrow(op1, true)) @@ -13530,6 +13786,7 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree) /* The target is used as well as being defined */ if (op1->OperIsLocal()) { + op1->gtFlags &= ~GTF_VAR_USEDEF; op1->gtFlags |= GTF_VAR_USEASG; } @@ -13666,7 +13923,7 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree) /* Check for the case "(val + icon) << icon" */ - if (op2->IsCnsIntOrI() && op1->gtOper == GT_ADD && !op1->gtOverflow()) + if (!optValnumCSE_phase && op2->IsCnsIntOrI() && op1->gtOper == GT_ADD && !op1->gtOverflow()) { GenTreePtr cns = op1->gtOp.gtOp2; @@ -13731,192 +13988,45 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree) break; + case GT_INIT_VAL: + // Initialization values for initBlk have special semantics - their lower + // byte is used to fill the struct. However, we allow 0 as a "bare" value, + // which enables them to get a VNForZero, and be propagated. + if (op1->IsIntegralConst(0)) + { + return op1; + } + break; + default: break; } return tree; } -// code to generate a magic number and shift amount for the magic number division -// optimization. This code is previously from UTC where it notes it was taken from -// _The_PowerPC_Compiler_Writer's_Guide_, pages 57-58. -// The paper it is based on is "Division by invariant integers using multiplication" -// by Torbjorn Granlund and Peter L. Montgomery in PLDI 94 - -template <typename T> -T GetSignedMagicNumberForDivide(T denom, int* shift /*out*/) -{ - // static SMAG smag; - const int bits = sizeof(T) * 8; - const int bits_minus_1 = bits - 1; - - typedef typename jitstd::make_unsigned<T>::type UT; - - const UT two_nminus1 = UT(1) << bits_minus_1; - - int p; - UT absDenom; - UT absNc; - UT delta; - UT q1; - UT r1; - UT r2; - UT q2; - UT t; - T result_magic; - int result_shift; - int iters = 0; - - absDenom = abs(denom); - t = two_nminus1 + ((unsigned int)denom >> 31); - absNc = t - 1 - (t % absDenom); // absolute value of nc - p = bits_minus_1; // initialize p - q1 = two_nminus1 / absNc; // initialize q1 = 2^p / abs(nc) - r1 = two_nminus1 - (q1 * absNc); // initialize r1 = rem(2^p, abs(nc)) - q2 = two_nminus1 / absDenom; // initialize q1 = 2^p / abs(denom) - r2 = two_nminus1 - (q2 * absDenom); // initialize r1 = rem(2^p, abs(denom)) - - do - { - iters++; - p++; - q1 *= 2; // update q1 = 2^p / abs(nc) - r1 *= 2; // update r1 = rem(2^p / abs(nc)) - - if (r1 >= absNc) - { // must be unsigned comparison - q1++; - r1 -= absNc; - } - - q2 *= 2; // update q2 = 2^p / abs(denom) - r2 *= 2; // update r2 = rem(2^p / abs(denom)) - - if (r2 >= absDenom) - { // must be unsigned comparison - q2++; - r2 -= absDenom; - } - - delta = absDenom - r2; - } while (q1 < delta || (q1 == delta && r1 == 0)); - - result_magic = q2 + 1; // resulting magic number - if (denom < 0) - { - result_magic = -result_magic; - } - *shift = p - bits; // resulting shift - - return result_magic; -} - -bool Compiler::fgShouldUseMagicNumberDivide(GenTreeOp* tree) -{ -#ifdef _TARGET_ARM64_ - // TODO-ARM64-NYI: We don't have a 'mulHi' implementation yet for ARM64 - return false; -#else - - // During the optOptimizeValnumCSEs phase we can call fgMorph and when we do, - // if this method returns true we will introduce a new LclVar and - // a couple of new GenTree nodes, including an assignment to the new LclVar. - // None of these new GenTree nodes will have valid ValueNumbers. - // That is an invalid state for a GenTree node during the optOptimizeValnumCSEs phase. - // - // Also during optAssertionProp when extracting side effects we can assert - // during gtBuildCommaList if we have one tree that has Value Numbers - // and another one that does not. - // - if (!fgGlobalMorph) - { - // We only perform the Magic Number Divide optimization during - // the initial global morph phase - return false; - } - - if (tree->gtFlags & GTF_OVERFLOW) - { - return false; - } - - if (tree->gtOp2->gtOper != GT_CNS_INT && tree->gtOp2->gtOper != GT_CNS_LNG) - { - return false; - } - - ssize_t cons = tree->gtOp2->gtIntConCommon.IconValue(); - - if (cons == 0 || cons == -1 || cons == 1) - { - return false; - } - - // codegen will expand these - if (cons == SSIZE_T_MIN || isPow2(abs(cons))) - { - return false; - } - - // someone else will fold this away, so don't make it complicated for them - if (tree->gtOp1->IsCnsIntOrI()) - { - return false; - } - - // There is no technical barrier to handling unsigned, however it is quite rare - // and more work to support and test - if (tree->gtFlags & GTF_UNSIGNED) - { - return false; - } - - return true; -#endif -} - -// transform x%c -> x-((x/c)*c) - -GenTree* Compiler::fgMorphModByConst(GenTreeOp* tree) -{ - assert(fgShouldUseMagicNumberDivide(tree)); - - var_types type = tree->gtType; - - GenTree* cns = tree->gtOp2; - - GenTree* numerator = fgMakeMultiUse(&tree->gtOp1); - - tree->SetOper(GT_DIV); - - GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(cns)); - - GenTree* sub = gtNewOperNode(GT_SUB, type, numerator, mul); - -#ifdef DEBUG - sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; -#endif - - return sub; -} - -// For ARM64 we don't have a remainder instruction, -// The architecture manual suggests the following transformation to -// generate code for such operator: +//------------------------------------------------------------------------ +// fgMorphModToSubMulDiv: Transform a % b into the equivalent a - (a / b) * b +// (see ECMA III 3.55 and III.3.56). // -// a % b = a - (a / b) * b; +// Arguments: +// tree - The GT_MOD/GT_UMOD tree to morph // -// This method will produce the above expression in 'a' and 'b' are -// leaf nodes, otherwise, if any of them is not a leaf it will spill -// its value into a temporary variable, an example: -// (x * 2 - 1) % (y + 1) -> t1 - (t2 * ( comma(t1 = x * 2 - 1, t1) / comma(t2 = y + 1, t2) ) ) +// Returns: +// The morphed tree +// +// Notes: +// For ARM64 we don't have a remainder instruction so this transform is +// always done. For XARCH this transform is done if we know that magic +// division will be used, in that case this transform allows CSE to +// eliminate the redundant div from code like "x = a / 3; y = a % 3;". +// +// This method will produce the above expression in 'a' and 'b' are +// leaf nodes, otherwise, if any of them is not a leaf it will spill +// its value into a temporary variable, an example: +// (x * 2 - 1) % (y + 1) -> t1 - (t2 * ( comma(t1 = x * 2 - 1, t1) / comma(t2 = y + 1, t2) ) ) // GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree) { -#ifndef _TARGET_ARM64_ - assert(!"This should only be called for ARM64"); -#endif - if (tree->OperGet() == GT_MOD) { tree->SetOper(GT_DIV); @@ -13944,8 +14054,16 @@ GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree) denominator = fgMakeMultiUse(&tree->gtOp2); } + // The numerator and denominator may have been assigned to temps, in which case + // their defining assignments are in the current tree. Therefore, we need to + // set the execuction order accordingly on the nodes we create. + // That is, the "mul" will be evaluated in "normal" order, and the "sub" must + // be set to be evaluated in reverse order. + // GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(denominator)); + assert(!mul->IsReverseOp()); GenTree* sub = gtNewOperNode(GT_SUB, type, gtCloneExpr(numerator), mul); + sub->gtFlags |= GTF_REVERSE_OPS; #ifdef DEBUG sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; @@ -13954,95 +14072,6 @@ GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree) return sub; } -// Turn a division by a constant into a multiplication by constant + some adjustments -// see comments on GetSignedMagicNumberForDivide for source of this algorithm. -// returns: the transformed tree - -GenTree* Compiler::fgMorphDivByConst(GenTreeOp* tree) -{ - assert(fgShouldUseMagicNumberDivide(tree)); - - JITDUMP("doing magic number divide optimization\n"); - - int64_t denominator = tree->gtOp2->gtIntConCommon.IconValue(); - int64_t magic; - int shift; - var_types type = tree->gtType; - - if (tree->gtType == TYP_INT) - { - magic = GetSignedMagicNumberForDivide<int32_t>((int32_t)denominator, &shift); - } - else - { - magic = GetSignedMagicNumberForDivide<int64_t>((int64_t)denominator, &shift); - } - - GenTree* numerator = nullptr; - - // If signs of the denominator and magic number don't match, - // we will need to use the numerator again. - if (signum(denominator) != signum(magic)) - { - numerator = fgMakeMultiUse(&tree->gtOp1); - tree->gtFlags |= GTF_ASG; - } - - if (type == TYP_LONG) - { - tree->gtOp2->gtIntConCommon.SetLngValue(magic); - } - else - { - tree->gtOp2->gtIntConCommon.SetIconValue((ssize_t)magic); - } - - tree->SetOper(GT_MULHI); - - GenTree* t = tree; - GenTree* mulresult = tree; - - JITDUMP("Multiply Result:\n"); - DISPTREE(mulresult); - - GenTree* adjusted = mulresult; - - if (denominator > 0 && magic < 0) - { - // add the numerator back in - adjusted = gtNewOperNode(GT_ADD, type, mulresult, numerator); - } - else if (denominator < 0 && magic > 0) - { - // subtract the numerator off - adjusted = gtNewOperNode(GT_SUB, type, mulresult, numerator); - } - else - { - adjusted = mulresult; - } - - GenTree* result1 = adjusted; - if (shift != 0) - { - result1 = gtNewOperNode(GT_RSH, type, adjusted, gtNewIconNode(shift, TYP_INT)); - } - - GenTree* secondClone = fgMakeMultiUse(&result1); - - GenTree* result2 = gtNewOperNode(GT_RSZ, type, secondClone, gtNewIconNode(genTypeSize(type) * 8 - 1, type)); - - GenTree* result = gtNewOperNode(GT_ADD, type, result1, result2); - JITDUMP("Final Magic Number divide:\n"); - DISPTREE(result); - -#ifdef DEBUG - result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; -#endif - - return result; -} - //------------------------------------------------------------------------------ // fgOperIsBitwiseRotationRoot : Check if the operation can be a root of a bitwise rotation tree. // @@ -14238,10 +14267,10 @@ GenTreePtr Compiler::fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree) #ifndef _TARGET_64BIT_ if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64)) { - // TODO: we need to handle variable-sized long shifts specially on x86. + // TODO-X86-CQ: we need to handle variable-sized long shifts specially on x86. // GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need // to add helpers for GT_ROL and GT_ROR. - NYI("Rotation of a long value by variable amount"); + return tree; } #endif @@ -14276,7 +14305,15 @@ GenTreePtr Compiler::fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree) tree->gtOp.gtOp1 = rotatedValue; tree->gtOp.gtOp2 = rotateIndex; tree->ChangeOper(rotateOp); - noway_assert(inputTreeEffects == ((rotatedValue->gtFlags | rotateIndex->gtFlags) & GTF_ALL_EFFECT)); + + unsigned childFlags = 0; + for (GenTree* op : tree->Operands()) + { + childFlags |= (op->gtFlags & GTF_ALL_EFFECT); + } + + // The parent's flags should be a superset of its operands' flags + noway_assert((inputTreeEffects & childFlags) == childFlags); } else { @@ -14719,29 +14756,15 @@ DONE: } #if LOCAL_ASSERTION_PROP -/***************************************************************************** - * - * Kill all dependent assertions with regard to lclNum. - * - */ - -void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTreePtr tree)) +//------------------------------------------------------------------------ +// fgKillDependentAssertionsSingle: Kill all assertions specific to lclNum +// +// Arguments: +// lclNum - The varNum of the lclVar for which we're killing assertions. +// tree - (DEBUG only) the tree responsible for killing its assertions. +// +void Compiler::fgKillDependentAssertionsSingle(unsigned lclNum DEBUGARG(GenTreePtr tree)) { - LclVarDsc* varDsc = &lvaTable[lclNum]; - - if (varDsc->lvPromoted) - { - noway_assert(varTypeIsStruct(varDsc)); - - // Kill the field locals. - for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i) - { - fgKillDependentAssertions(i DEBUGARG(tree)); - } - - // Fall through to kill the struct local itself. - } - /* All dependent assertions are killed here */ ASSERT_TP killed = BitVecOps::MakeCopy(apTraits, GetAssertionDep(lclNum)); @@ -14778,6 +14801,48 @@ void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTreePtr tre noway_assert(BitVecOps::IsEmpty(apTraits, killed)); } } +//------------------------------------------------------------------------ +// fgKillDependentAssertions: Kill all dependent assertions with regard to lclNum. +// +// Arguments: +// lclNum - The varNum of the lclVar for which we're killing assertions. +// tree - (DEBUG only) the tree responsible for killing its assertions. +// +// Notes: +// For structs and struct fields, it will invalidate the children and parent +// respectively. +// Calls fgKillDependentAssertionsSingle to kill the assertions for a single lclVar. +// +void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTreePtr tree)) +{ + LclVarDsc* varDsc = &lvaTable[lclNum]; + + if (varDsc->lvPromoted) + { + noway_assert(varTypeIsStruct(varDsc)); + + // Kill the field locals. + for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i) + { + fgKillDependentAssertionsSingle(i DEBUGARG(tree)); + } + + // Kill the struct local itself. + fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree)); + } + else if (varDsc->lvIsStructField) + { + // Kill the field local. + fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree)); + + // Kill the parent struct. + fgKillDependentAssertionsSingle(varDsc->lvParentLcl DEBUGARG(tree)); + } + else + { + fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree)); + } +} #endif // LOCAL_ASSERTION_PROP /***************************************************************************** @@ -14841,13 +14906,12 @@ void Compiler::fgMorphTreeDone(GenTreePtr tree, if (optAssertionCount > 0) { /* Is this an assignment to a local variable */ - - if ((tree->OperKind() & GTK_ASGOP) && - (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR || tree->gtOp.gtOp1->gtOper == GT_LCL_FLD)) + GenTreeLclVarCommon* lclVarTree = nullptr; + if (tree->DefinesLocal(this, &lclVarTree)) { - unsigned op1LclNum = tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum; - noway_assert(op1LclNum < lvaCount); - fgKillDependentAssertions(op1LclNum DEBUGARG(tree)); + unsigned lclNum = lclVarTree->gtLclNum; + noway_assert(lclNum < lvaCount); + fgKillDependentAssertions(lclNum DEBUGARG(tree)); } } @@ -15223,14 +15287,15 @@ bool Compiler::fgFoldConditional(BasicBlock* block) // Returns false if 'stmt' is still in the block (even if other statements were removed). // -bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreePtr stmt DEBUGARG(const char* msg)) +bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreeStmt* stmt DEBUGARG(const char* msg)) { - noway_assert(stmt->gtOper == GT_STMT); + assert(block != nullptr); + assert(stmt != nullptr); compCurBB = block; compCurStmt = stmt; - GenTreePtr morph = fgMorphTree(stmt->gtStmt.gtStmtExpr); + GenTree* morph = fgMorphTree(stmt->gtStmtExpr); // Bug 1106830 - During the CSE phase we can't just remove // morph->gtOp.gtOp2 as it could contain CSE expressions. @@ -15239,7 +15304,7 @@ bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreePtr stmt DEBUGARG(cons // if (!optValnumCSE_phase) { - /* Check for morph as a GT_COMMA with an unconditional throw */ + // Check for morph as a GT_COMMA with an unconditional throw if (fgIsCommaThrow(morph, true)) { #ifdef DEBUG @@ -15251,12 +15316,12 @@ bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreePtr stmt DEBUGARG(cons printf("\n"); } #endif - /* Use the call as the new stmt */ + // Use the call as the new stmt morph = morph->gtOp.gtOp1; noway_assert(morph->gtOper == GT_CALL); } - /* we can get a throw as a statement root*/ + // we can get a throw as a statement root if (fgIsThrow(morph)) { #ifdef DEBUG @@ -15271,15 +15336,19 @@ bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreePtr stmt DEBUGARG(cons } } - stmt->gtStmt.gtStmtExpr = morph; + stmt->gtStmtExpr = morph; - /* Can the entire tree be removed ? */ + if (lvaLocalVarRefCounted) + { + // fgMorphTree may have introduced new lclVar references. Bump the ref counts if requested. + lvaRecursiveIncRefCounts(stmt->gtStmtExpr); + } + // Can the entire tree be removed? bool removedStmt = fgCheckRemoveStmt(block, stmt); - /* Or this is the last statement of a conditional branch that was just folded */ - - if ((!removedStmt) && (stmt->gtNext == nullptr) && !fgRemoveRestOfBlock) + // Or this is the last statement of a conditional branch that was just folded? + if (!removedStmt && (stmt->getNextStmt() == nullptr) && !fgRemoveRestOfBlock) { if (fgFoldConditional(block)) { @@ -15292,11 +15361,10 @@ bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreePtr stmt DEBUGARG(cons if (!removedStmt) { - /* Have to re-do the evaluation order since for example - * some later code does not expect constants as op1 */ + // Have to re-do the evaluation order since for example some later code does not expect constants as op1 gtSetStmtInfo(stmt); - /* Have to re-link the nodes for this statement */ + // Have to re-link the nodes for this statement fgSetStmtSeq(stmt); } @@ -15311,18 +15379,13 @@ bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreePtr stmt DEBUGARG(cons if (fgRemoveRestOfBlock) { - /* Remove the rest of the stmts in the block */ - - while (stmt->gtNext) + // Remove the rest of the stmts in the block + for (stmt = stmt->getNextStmt(); stmt != nullptr; stmt = stmt->getNextStmt()) { - stmt = stmt->gtNext; - noway_assert(stmt->gtOper == GT_STMT); - fgRemoveStmt(block, stmt); } - // The rest of block has been removed - // and we will always throw an exception + // The rest of block has been removed and we will always throw an exception. // Update succesors of block fgRemoveBlockAsPred(block); @@ -15368,8 +15431,9 @@ void Compiler::fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loa fgCurrentlyInUseArgTemps = hashBv::Create(this); - GenTreePtr stmt, prev; - for (stmt = block->bbTreeList, prev = nullptr; stmt; prev = stmt->gtStmt.gtStmtExpr, stmt = stmt->gtNext) + GenTreeStmt* stmt = block->firstStmt(); + GenTreePtr prev = nullptr; + for (; stmt != nullptr; prev = stmt->gtStmtExpr, stmt = stmt->gtNextStmt) { noway_assert(stmt->gtOper == GT_STMT); @@ -15379,8 +15443,7 @@ void Compiler::fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loa continue; } #ifdef FEATURE_SIMD - if (!opts.MinOpts() && stmt->gtStmt.gtStmtExpr->TypeGet() == TYP_FLOAT && - stmt->gtStmt.gtStmtExpr->OperGet() == GT_ASG) + if (!opts.MinOpts() && stmt->gtStmtExpr->TypeGet() == TYP_FLOAT && stmt->gtStmtExpr->OperGet() == GT_ASG) { fgMorphCombineSIMDFieldAssignments(block, stmt); } @@ -15388,7 +15451,7 @@ void Compiler::fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loa fgMorphStmt = stmt; compCurStmt = stmt; - GenTreePtr tree = stmt->gtStmt.gtStmtExpr; + GenTreePtr tree = stmt->gtStmtExpr; #ifdef DEBUG compCurStmtNum++; @@ -15416,15 +15479,15 @@ void Compiler::fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loa // Has fgMorphStmt been sneakily changed ? - if (stmt->gtStmt.gtStmtExpr != tree) + if (stmt->gtStmtExpr != tree) { /* This must be tailcall. Ignore 'morph' and carry on with the tail-call node */ - morph = stmt->gtStmt.gtStmtExpr; + morph = stmt->gtStmtExpr; noway_assert(compTailCallUsed); noway_assert((morph->gtOper == GT_CALL) && morph->AsCall()->IsTailCall()); - noway_assert(stmt->gtNext == nullptr); + noway_assert(stmt->gtNextStmt == nullptr); GenTreeCall* call = morph->AsCall(); // Could either be @@ -15448,7 +15511,7 @@ void Compiler::fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loa noway_assert(compTailCallUsed); noway_assert((tree->gtOper == GT_CALL) && tree->AsCall()->IsTailCall()); - noway_assert(stmt->gtNext == nullptr); + noway_assert(stmt->gtNextStmt == nullptr); GenTreeCall* call = morph->AsCall(); @@ -15505,7 +15568,7 @@ void Compiler::fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loa fgRemoveRestOfBlock = true; } - stmt->gtStmt.gtStmtExpr = tree = morph; + stmt->gtStmtExpr = tree = morph; noway_assert(fgPtrArgCntCur == 0); @@ -15958,6 +16021,45 @@ void Compiler::fgMorphBlocks() #endif } +//------------------------------------------------------------------------ +// fgCheckArgCnt: Check whether the maximum arg size will change codegen requirements +// +// Notes: +// fpPtrArgCntMax records the maximum number of pushed arguments. +// Depending upon this value of the maximum number of pushed arguments +// we may need to use an EBP frame or be partially interuptible. +// This functionality has been factored out of fgSetOptions() because +// the Rationalizer can create new calls. +// +// Assumptions: +// This must be called before isFramePointerRequired() is called, because it is a +// phased variable (can only be written before it has been read). +// +void Compiler::fgCheckArgCnt() +{ + if (!compCanEncodePtrArgCntMax()) + { +#ifdef DEBUG + if (verbose) + { + printf("Too many pushed arguments for fully interruptible encoding, marking method as partially " + "interruptible\n"); + } +#endif + genInterruptible = false; + } + if (fgPtrArgCntMax >= sizeof(unsigned)) + { +#ifdef DEBUG + if (verbose) + { + printf("Too many pushed arguments for an ESP based encoding, forcing an EBP frame\n"); + } +#endif + codeGen->setFramePointerRequired(true); + } +} + /***************************************************************************** * * Make some decisions about the kind of code to generate. @@ -15974,13 +16076,11 @@ void Compiler::fgSetOptions() } #endif -#ifdef DEBUGGING_SUPPORT if (opts.compDbgCode) { assert(!codeGen->isGCTypeFixed()); genInterruptible = true; // debugging is easier this way ... } -#endif /* Assume we won't need an explicit stack frame if this is allowed */ @@ -16035,32 +16135,7 @@ void Compiler::fgSetOptions() #endif // _TARGET_X86_ - // fpPtrArgCntMax records the maximum number of pushed arguments - // Depending upon this value of the maximum number of pushed arguments - // we may need to use an EBP frame or be partially interuptible - // - - if (!compCanEncodePtrArgCntMax()) - { -#ifdef DEBUG - if (verbose) - { - printf("Too many pushed arguments for fully interruptible encoding, marking method as partially " - "interruptible\n"); - } -#endif - genInterruptible = false; - } - if (fgPtrArgCntMax >= sizeof(unsigned)) - { -#ifdef DEBUG - if (verbose) - { - printf("Too many pushed arguments for an ESP based encoding, forcing an EBP frame\n"); - } -#endif - codeGen->setFramePointerRequiredGCInfo(true); - } + fgCheckArgCnt(); if (info.compCallUnmanaged) { @@ -16121,6 +16196,23 @@ GenTreePtr Compiler::fgInitThisClass() } else { +#ifdef FEATURE_READYTORUN_COMPILER + // Only CoreRT understands CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE. Don't do this on CoreCLR. + if (opts.IsReadyToRun() && IsTargetAbi(CORINFO_CORERT_ABI)) + { + CORINFO_RESOLVED_TOKEN resolvedToken; + memset(&resolvedToken, 0, sizeof(resolvedToken)); + + GenTreePtr ctxTree = getRuntimeContextTree(kind.runtimeLookupKind); + + // CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE with a zeroed out resolvedToken means "get the static + // base of the class that owns the method being compiled". If we're in this method, it means we're not + // inlining and there's no ambiguity. + return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE, TYP_BYREF, + gtNewArgList(ctxTree), &kind); + } +#endif + // Collectible types requires that for shared generic code, if we use the generic context paramter // that we report it. (This is a conservative approach, we could detect some cases particularly when the // context parameter is this that we don't need the eager reporting logic.) @@ -16774,19 +16866,13 @@ void Compiler::fgMorph() fgRemoveEmptyBlocks(); - /* Add any internal blocks/trees we may need */ - - fgAddInternal(); - -#if OPT_BOOL_OPS - fgMultipleNots = false; -#endif - #ifdef DEBUG /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */ fgDebugCheckBBlist(false, false); #endif // DEBUG + EndPhase(PHASE_MORPH_INIT); + /* Inline */ fgInline(); #if 0 @@ -16796,6 +16882,16 @@ void Compiler::fgMorph() RecordStateAtEndOfInlining(); // Record "start" values for post-inlining cycles and elapsed time. + EndPhase(PHASE_MORPH_INLINE); + + /* Add any internal blocks/trees we may need */ + + fgAddInternal(); + +#if OPT_BOOL_OPS + fgMultipleNots = false; +#endif + #ifdef DEBUG /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */ fgDebugCheckBBlist(false, false); @@ -16804,6 +16900,8 @@ void Compiler::fgMorph() /* For x64 and ARM64 we need to mark irregular parameters early so that they don't get promoted */ fgMarkImplicitByRefArgs(); + EndPhase(PHASE_MORPH_IMPBYREF); + /* Promote struct locals if necessary */ fgPromoteStructs(); @@ -16816,10 +16914,14 @@ void Compiler::fgMorph() fgStress64RsltMul(); #endif // DEBUG + EndPhase(PHASE_STR_ADRLCL); + /* Morph the trees in all the blocks of the method */ fgMorphBlocks(); + EndPhase(PHASE_MORPH_GLOBAL); + #if 0 JITDUMP("trees after fgMorphBlocks\n"); DBEXEC(VERBOSE, fgDispBasicBlocks(true)); @@ -17454,9 +17556,6 @@ enum AddrExposedContext AXC_AddrWide, // The address being computed will be dereferenced by a block operation that operates // on more bytes than the width of the storage location addressed. If this is a // field of a promoted struct local, declare the entire struct local address-taken. - AXC_InitBlk, // An GT_INITBLK is the immediate parent. The first argument is in an IND context. - AXC_CopyBlk, // An GT_COPYBLK is the immediate parent. The first argument is in a GT_LIST, whose - // args should be evaluated in an IND context. AXC_IndAdd, // A GT_ADD is the immediate parent, and it was evaluated in an IND contxt. // If one arg is a constant int, evaluate the other in an IND context. Otherwise, none. }; @@ -17572,14 +17671,8 @@ Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPreCB(GenTreePtr* pTree, f return WALK_CONTINUE; case GT_LIST: - if (axc == AXC_InitBlk || axc == AXC_CopyBlk) - { - axcStack->Push(axc); - } - else - { - axcStack->Push(AXC_None); - } + case GT_FIELD_LIST: + axcStack->Push(AXC_None); return WALK_CONTINUE; case GT_INDEX: @@ -18083,9 +18176,6 @@ bool Compiler::fgShouldCreateAssignOp(GenTreePtr tree, bool* bReverse) #endif // defined(LEGACY_BACKEND) } -// Static variables. -Compiler::MorphAddrContext Compiler::s_CopyBlockMAC(Compiler::MACK_CopyBlock); - #ifdef FEATURE_SIMD //----------------------------------------------------------------------------------- diff --git a/src/jit/nodeinfo.h b/src/jit/nodeinfo.h index a73033a91f..1937cc4377 100644 --- a/src/jit/nodeinfo.h +++ b/src/jit/nodeinfo.h @@ -21,17 +21,18 @@ public: _internalIntCount = 0; _internalFloatCount = 0; - srcCandsIndex = 0; - dstCandsIndex = 0; - internalCandsIndex = 0; - isLocalDefUse = false; - isHelperCallWithKills = false; - isLsraAdded = false; - isDelayFree = false; - hasDelayFreeSrc = false; - isTgtPref = false; - regOptional = false; - definesAnyRegisters = false; + srcCandsIndex = 0; + dstCandsIndex = 0; + internalCandsIndex = 0; + isLocalDefUse = false; + isHelperCallWithKills = false; + isLsraAdded = false; + isDelayFree = false; + hasDelayFreeSrc = false; + isTgtPref = false; + regOptional = false; + definesAnyRegisters = false; + isInternalRegDelayFree = false; #ifdef DEBUG isInitialized = false; #endif @@ -99,42 +100,54 @@ public: LsraLocation loc; -private: - unsigned char _dstCount; - unsigned char _srcCount; - unsigned char _internalIntCount; - unsigned char _internalFloatCount; - public: unsigned char srcCandsIndex; unsigned char dstCandsIndex; unsigned char internalCandsIndex; +private: + unsigned char _srcCount : 5; + unsigned char _dstCount : 3; + unsigned char _internalIntCount : 3; + unsigned char _internalFloatCount : 3; + +public: // isLocalDefUse identifies trees that produce a value that is not consumed elsewhere. // Examples include stack arguments to a call (they are immediately stored), lhs of comma // nodes, or top-level nodes that are non-void. unsigned char isLocalDefUse : 1; + // isHelperCallWithKills is set when this is a helper call that kills more than just its in/out regs. unsigned char isHelperCallWithKills : 1; + // Is this node added by LSRA, e.g. as a resolution or copy/reload move. unsigned char isLsraAdded : 1; + // isDelayFree is set when the register defined by this node will interfere with the destination // of the consuming node, and therefore it must not be freed immediately after use. unsigned char isDelayFree : 1; + // hasDelayFreeSrc is set when this node has sources that are marked "isDelayFree". This is because, // we may eventually "contain" this node, in which case we don't want it's children (which have // already been marked "isDelayFree" to be handled that way when allocating. unsigned char hasDelayFreeSrc : 1; + // isTgtPref is set to true when we have a rmw op, where we would like the result to be allocated // in the same register as op1. unsigned char isTgtPref : 1; + // Whether a spilled second src can be treated as a contained operand unsigned char regOptional : 1; + // Whether or not a node defines any registers, whether directly (for nodes where dstCout is non-zero) // or indirectly (for contained nodes, which propagate the transitive closure of the registers // defined by their inputs). Used during buildRefPositionsForNode in order to avoid unnecessary work. unsigned char definesAnyRegisters : 1; + // Whether internal register needs to be different from targetReg + // in which result is produced. + unsigned char isInternalRegDelayFree : 1; + #ifdef DEBUG // isInitialized is set when the tree node is handled. unsigned char isInitialized : 1; diff --git a/src/jit/optcse.cpp b/src/jit/optcse.cpp index d23b4cd198..3ff4cea385 100644 --- a/src/jit/optcse.cpp +++ b/src/jit/optcse.cpp @@ -301,15 +301,15 @@ Compiler::fgWalkResult Compiler::optCSE_MaskHelper(GenTreePtr* pTree, fgWalkData if (IS_CSE_INDEX(tree->gtCSEnum)) { - unsigned cseIndex = GET_CSE_INDEX(tree->gtCSEnum); - EXPSET_TP cseBit = genCSEnum2bit(cseIndex); + unsigned cseIndex = GET_CSE_INDEX(tree->gtCSEnum); + unsigned cseBit = genCSEnum2bit(cseIndex); if (IS_CSE_DEF(tree->gtCSEnum)) { - pUserData->CSE_defMask |= cseBit; + BitVecOps::AddElemD(comp->cseTraits, pUserData->CSE_defMask, cseBit); } else { - pUserData->CSE_useMask |= cseBit; + BitVecOps::AddElemD(comp->cseTraits, pUserData->CSE_useMask, cseBit); } } @@ -321,8 +321,8 @@ Compiler::fgWalkResult Compiler::optCSE_MaskHelper(GenTreePtr* pTree, fgWalkData // void Compiler::optCSE_GetMaskData(GenTreePtr tree, optCSE_MaskData* pMaskData) { - pMaskData->CSE_defMask = 0; - pMaskData->CSE_useMask = 0; + pMaskData->CSE_defMask = BitVecOps::MakeCopy(cseTraits, cseEmpty); + pMaskData->CSE_useMask = BitVecOps::MakeCopy(cseTraits, cseEmpty); fgWalkTreePre(&tree, optCSE_MaskHelper, (void*)pMaskData); } @@ -355,14 +355,14 @@ bool Compiler::optCSE_canSwap(GenTree* op1, GenTree* op2) optCSE_GetMaskData(op2, &op2MaskData); // We cannot swap if op1 contains a CSE def that is used by op2 - if ((op1MaskData.CSE_defMask & op2MaskData.CSE_useMask) != 0) + if (!BitVecOps::IsEmptyIntersection(cseTraits, op1MaskData.CSE_defMask, op2MaskData.CSE_useMask)) { canSwap = false; } else { // We also cannot swap if op2 contains a CSE def that is used by op1. - if ((op2MaskData.CSE_defMask & op1MaskData.CSE_useMask) != 0) + if (!BitVecOps::IsEmptyIntersection(cseTraits, op2MaskData.CSE_defMask, op1MaskData.CSE_useMask)) { canSwap = false; } @@ -495,6 +495,14 @@ void Compiler::optValnumCSE_Init() optCSEtab = nullptr; #endif + // Init traits and full/empty bitvectors. This will be used to track the + // individual cse indexes. + cseTraits = new (getAllocator()) BitVecTraits(EXPSET_SZ, this); + cseFull = BitVecOps::UninitVal(); + cseEmpty = BitVecOps::UninitVal(); + BitVecOps::AssignNoCopy(cseTraits, cseFull, BitVecOps::MakeFull(cseTraits)); + BitVecOps::AssignNoCopy(cseTraits, cseEmpty, BitVecOps::MakeEmpty(cseTraits)); + /* Allocate and clear the hash bucket table */ optCSEhash = new (this, CMK_CSE) CSEdsc*[s_optCSEhashSize](); @@ -631,8 +639,8 @@ unsigned Compiler::optValnumCSE_Index(GenTreePtr tree, GenTreePtr stmt) C_ASSERT((signed char)MAX_CSE_CNT == MAX_CSE_CNT); - unsigned CSEindex = ++optCSECandidateCount; - EXPSET_TP CSEmask = genCSEnum2bit(CSEindex); + unsigned CSEindex = ++optCSECandidateCount; + // EXPSET_TP CSEmask = genCSEnum2bit(CSEindex); /* Record the new CSE index in the hashDsc */ hashDsc->csdIndex = CSEindex; @@ -649,10 +657,11 @@ unsigned Compiler::optValnumCSE_Index(GenTreePtr tree, GenTreePtr stmt) #ifdef DEBUG if (verbose) { + EXPSET_TP tempMask = BitVecOps::MakeSingleton(cseTraits, genCSEnum2bit(CSEindex)); printf("\nCSE candidate #%02u, vn=", CSEindex); vnPrint(vnlib, 0); - printf(" cseMask=%s in BB%02u, [cost=%2u, size=%2u]: \n", genES2str(genCSEnum2bit(CSEindex)), - compCurBB->bbNum, tree->gtCostEx, tree->gtCostSz); + printf(" cseMask=%s in BB%02u, [cost=%2u, size=%2u]: \n", genES2str(cseTraits, tempMask), compCurBB->bbNum, + tree->gtCostEx, tree->gtCostSz); gtDispTree(tree); } #endif // DEBUG @@ -773,19 +782,18 @@ void Compiler::optValnumCSE_InitDataFlow() if (init_to_zero) { /* Initialize to {ZERO} prior to dataflow */ - - block->bbCseIn = 0; + block->bbCseIn = BitVecOps::MakeCopy(cseTraits, cseEmpty); } else { /* Initialize to {ALL} prior to dataflow */ - - block->bbCseIn = EXPSET_ALL; + block->bbCseIn = BitVecOps::MakeCopy(cseTraits, cseFull); } - block->bbCseOut = EXPSET_ALL; + + block->bbCseOut = BitVecOps::MakeCopy(cseTraits, cseFull); /* Initialize to {ZERO} prior to locating the CSE candidates */ - block->bbCseGen = 0; + block->bbCseGen = BitVecOps::MakeCopy(cseTraits, cseEmpty); } // We walk the set of CSE candidates and set the bit corresponsing to the CSEindex @@ -801,7 +809,7 @@ void Compiler::optValnumCSE_InitDataFlow() while (lst != nullptr) { BasicBlock* block = lst->tslBlock; - block->bbCseGen |= genCSEnum2bit(CSEindex); + BitVecOps::AddElemD(cseTraits, block->bbCseGen, genCSEnum2bit(CSEindex)); lst = lst->tslNext; } } @@ -814,7 +822,7 @@ void Compiler::optValnumCSE_InitDataFlow() bool headerPrinted = false; for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) { - if (block->bbCseGen != 0) + if (block->bbCseGen != nullptr) { if (!headerPrinted) { @@ -822,7 +830,7 @@ void Compiler::optValnumCSE_InitDataFlow() headerPrinted = true; } printf("BB%02u", block->bbNum); - printf(" cseGen = %s\n", genES2str(block->bbCseGen)); + printf(" cseGen = %s\n", genES2str(cseTraits, block->bbCseGen)); } } } @@ -857,21 +865,24 @@ public: // At the start of the merge function of the dataflow equations, initialize premerge state (to detect changes.) void StartMerge(BasicBlock* block) { - m_preMergeOut = block->bbCseOut; + m_preMergeOut = BitVecOps::MakeCopy(m_pCompiler->cseTraits, block->bbCseOut); } // During merge, perform the actual merging of the predecessor's (since this is a forward analysis) dataflow flags. void Merge(BasicBlock* block, BasicBlock* predBlock, flowList* preds) { - block->bbCseIn &= predBlock->bbCseOut; + BitVecOps::IntersectionD(m_pCompiler->cseTraits, block->bbCseIn, predBlock->bbCseOut); } // At the end of the merge store results of the dataflow equations, in a postmerge state. bool EndMerge(BasicBlock* block) { - EXPSET_TP mergeOut = block->bbCseOut & (block->bbCseIn | block->bbCseGen); - block->bbCseOut = mergeOut; - return (mergeOut != m_preMergeOut); + BitVecTraits* traits = m_pCompiler->cseTraits; + EXPSET_TP mergeOut = BitVecOps::MakeCopy(traits, block->bbCseIn); + BitVecOps::UnionD(traits, mergeOut, block->bbCseGen); + BitVecOps::IntersectionD(traits, mergeOut, block->bbCseOut); + BitVecOps::Assign(traits, block->bbCseOut, mergeOut); + return (!BitVecOps::Equal(traits, mergeOut, m_preMergeOut)); } }; @@ -905,8 +916,8 @@ void Compiler::optValnumCSE_DataFlow() for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) { printf("BB%02u", block->bbNum); - printf(" cseIn = %s", genES2str(block->bbCseIn)); - printf(" cseOut = %s", genES2str(block->bbCseOut)); + printf(" cseIn = %s", genES2str(cseTraits, block->bbCseIn)); + printf(" cseOut = %s", genES2str(cseTraits, block->bbCseOut)); printf("\n"); } @@ -946,7 +957,7 @@ void Compiler::optValnumCSE_Availablity() compCurBB = block; - EXPSET_TP available_cses = block->bbCseIn; + EXPSET_TP available_cses = BitVecOps::MakeCopy(cseTraits, block->bbCseIn); optCSEweight = block->getBBWeight(this); @@ -961,13 +972,13 @@ void Compiler::optValnumCSE_Availablity() { if (IS_CSE_INDEX(tree->gtCSEnum)) { - EXPSET_TP mask = genCSEnum2bit(tree->gtCSEnum); - CSEdsc* desc = optCSEfindDsc(tree->gtCSEnum); - unsigned stmw = block->getBBWeight(this); + unsigned int cseBit = genCSEnum2bit(tree->gtCSEnum); + CSEdsc* desc = optCSEfindDsc(tree->gtCSEnum); + unsigned stmw = block->getBBWeight(this); /* Is this expression available here? */ - if (available_cses & mask) + if (BitVecOps::IsMember(cseTraits, available_cses, cseBit)) { /* This is a CSE use */ @@ -993,8 +1004,7 @@ void Compiler::optValnumCSE_Availablity() tree->gtCSEnum = TO_CSE_DEF(tree->gtCSEnum); /* This CSE will be available after this def */ - - available_cses |= mask; + BitVecOps::AddElemD(cseTraits, available_cses, cseBit); } #ifdef DEBUG if (verbose && IS_CSE_INDEX(tree->gtCSEnum)) @@ -1236,6 +1246,7 @@ public: { printf("\nSorted CSE candidates:\n"); /* Print out the CSE candidates */ + EXPSET_TP tempMask; for (unsigned cnt = 0; cnt < m_pCompiler->optCSECandidateCount; cnt++) { Compiler::CSEdsc* dsc = sortTab[cnt]; @@ -1255,8 +1266,9 @@ public: use = dsc->csdUseWtCnt; // weighted use count (excluding the implicit uses at defs) } + tempMask = BitVecOps::MakeSingleton(m_pCompiler->cseTraits, genCSEnum2bit(dsc->csdIndex)); printf("CSE #%02u,cseMask=%s,useCnt=%d: [def=%3u, use=%3u", dsc->csdIndex, - genES2str(genCSEnum2bit(dsc->csdIndex)), dsc->csdUseCount, def, use); + genES2str(m_pCompiler->cseTraits, tempMask), dsc->csdUseCount, def, use); printf("] :: "); m_pCompiler->gtDispTree(expr, nullptr, nullptr, true); } @@ -2038,7 +2050,7 @@ public: assert(m_pCompiler->fgRemoveRestOfBlock == false); /* re-morph the statement */ - m_pCompiler->fgMorphBlockStmt(blk, stm DEBUGARG("optValnumCSE")); + m_pCompiler->fgMorphBlockStmt(blk, stm->AsStmt() DEBUGARG("optValnumCSE")); } while (lst != nullptr); } @@ -2516,8 +2528,6 @@ void Compiler::optCleanupCSEs() // for (BasicBlock* block = fgFirstBB; block; block = block->bbNext) { - unsigned blkFlags = block->bbFlags; - // And clear all the "visited" bits on the block // block->bbFlags &= ~(BBF_VISITED | BBF_MARKED); diff --git a/src/jit/optimizer.cpp b/src/jit/optimizer.cpp index 0fbdb27770..bd82f6a6f3 100644 --- a/src/jit/optimizer.cpp +++ b/src/jit/optimizer.cpp @@ -822,6 +822,10 @@ bool Compiler::optCheckIterInLoopTest( if (limitOp->gtOper == GT_CNS_INT) { optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT; + if ((limitOp->gtFlags & GTF_ICON_SIMD_COUNT) != 0) + { + optLoopTable[loopInd].lpFlags |= LPFLG_SIMD_LIMIT; + } } else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum)) { @@ -1081,9 +1085,24 @@ bool Compiler::optExtractInitTestIncr( // If it is a duplicated loop condition, skip it. if (init->gtFlags & GTF_STMT_CMPADD) { - // Must be a duplicated loop condition. - noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE); - init = init->gtPrev; + bool doGetPrev = true; +#ifdef DEBUG + if (opts.optRepeat) + { + // Previous optimization passes may have inserted compiler-generated + // statements other than duplicated loop conditions. + doGetPrev = (init->gtPrev != nullptr); + } + else + { + // Must be a duplicated loop condition. + noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE); + } +#endif // DEBUG + if (doGetPrev) + { + init = init->gtPrev; + } noway_assert(init != nullptr); } @@ -1217,10 +1236,14 @@ void Compiler::optRecordLoop(BasicBlock* head, } // Make sure the "iterVar" initialization is never skipped, - // i.e. HEAD dominates the ENTRY. - if (!fgDominate(head, entry)) + // i.e. every pred of ENTRY other than HEAD is in the loop. + for (flowList* predEdge = entry->bbPreds; predEdge; predEdge = predEdge->flNext) { - goto DONE_LOOP; + BasicBlock* predBlock = predEdge->flBlock; + if ((predBlock != head) && !optLoopTable[loopInd].lpContains(predBlock)) + { + goto DONE_LOOP; + } } if (!optPopulateInitInfo(loopInd, init, iterVar)) @@ -2798,11 +2821,6 @@ void Compiler::optUnrollLoops() } #endif - if (optCanCloneLoops()) - { - return; - } - #ifdef DEBUG if (verbose) { @@ -2811,276 +2829,266 @@ void Compiler::optUnrollLoops() #endif /* Look for loop unrolling candidates */ - /* Double loop so that after unrolling an inner loop we set change to true - * and we then go back over all of the loop candidates and try to unroll - * the next outer loop, until we don't unroll any loops, - * then change will be false and we are done. - */ - for (;;) - { - bool change = false; + bool change = false; + + // Visit loops from highest to lowest number to vist them in innermost + // to outermost order + for (unsigned lnum = optLoopCount - 1; lnum != ~0U; --lnum) + { + BasicBlock* block; + BasicBlock* head; + BasicBlock* bottom; + + GenTree* loop; + GenTree* test; + GenTree* incr; + GenTree* phdr; + GenTree* init; + + bool dupCond; + int lval; + int lbeg; // initial value for iterator + int llim; // limit value for iterator + unsigned lvar; // iterator lclVar # + int iterInc; // value to increment the iterator + genTreeOps iterOper; // type of iterator increment (i.e. ADD, SUB, etc.) + var_types iterOperType; // type result of the oper (for overflow instrs) + genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.) + bool unsTest; // Is the comparison u/int + + unsigned loopRetCount; // number of BBJ_RETURN blocks in loop + unsigned totalIter; // total number of iterations in the constant loop + unsigned loopFlags; // actual lpFlags + unsigned requiredFlags; // required lpFlags + + static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = { + 10, // BLENDED_CODE + 0, // SMALL_CODE + 20, // FAST_CODE + 0 // COUNT_OPT_CODE + }; + + noway_assert(ITER_LIMIT[SMALL_CODE] == 0); + noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0); + + unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()]; - for (unsigned lnum = 0; lnum < optLoopCount; lnum++) +#ifdef DEBUG + if (compStressCompile(STRESS_UNROLL_LOOPS, 50)) { - BasicBlock* block; - BasicBlock* head; - BasicBlock* bottom; - - GenTree* loop; - GenTree* test; - GenTree* incr; - GenTree* phdr; - GenTree* init; - - bool dupCond; - int lval; - int lbeg; // initial value for iterator - int llim; // limit value for iterator - unsigned lvar; // iterator lclVar # - int iterInc; // value to increment the iterator - genTreeOps iterOper; // type of iterator increment (i.e. ASG_ADD, ASG_SUB, etc.) - var_types iterOperType; // type result of the oper (for overflow instrs) - genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.) - bool unsTest; // Is the comparison u/int - - unsigned totalIter; // total number of iterations in the constant loop - unsigned loopCostSz; // Cost is size of one iteration - unsigned loopFlags; // actual lpFlags - unsigned requiredFlags; // required lpFlags + iterLimit *= 10; + } +#endif - GenTree* loopList; // new stmt list of the unrolled loop - GenTree* loopLast; + static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = { + 300, // BLENDED_CODE + 0, // SMALL_CODE + 600, // FAST_CODE + 0 // COUNT_OPT_CODE + }; - static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = { - 10, // BLENDED_CODE - 0, // SMALL_CODE - 20, // FAST_CODE - 0 // COUNT_OPT_CODE - }; + noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0); + noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0); - noway_assert(ITER_LIMIT[SMALL_CODE] == 0); - noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0); + int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()]; - unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()]; + loopFlags = optLoopTable[lnum].lpFlags; + // Check for required flags: + // LPFLG_DO_WHILE - required because this transform only handles loops of this form + // LPFLG_CONST - required because this transform only handles full unrolls + // LPFLG_SIMD_LIMIT - included here as a heuristic, not for correctness/structural reasons + requiredFlags = LPFLG_DO_WHILE | LPFLG_CONST | LPFLG_SIMD_LIMIT; #ifdef DEBUG - if (compStressCompile(STRESS_UNROLL_LOOPS, 50)) - { - iterLimit *= 10; - } -#endif - - static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = { - 30, // BLENDED_CODE - 0, // SMALL_CODE - 60, // FAST_CODE - 0 // COUNT_OPT_CODE - }; - - noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0); - noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0); - - int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()]; + if (compStressCompile(STRESS_UNROLL_LOOPS, 50)) + { + // In stress mode, quadruple the size limit, and drop + // the restriction that loop limit must be Vector<T>.Count. -#ifdef DEBUG - if (compStressCompile(STRESS_UNROLL_LOOPS, 50)) - { - unrollLimitSz *= 10; - } + unrollLimitSz *= 4; + requiredFlags &= ~LPFLG_SIMD_LIMIT; + } #endif - loopFlags = optLoopTable[lnum].lpFlags; - requiredFlags = LPFLG_DO_WHILE | LPFLG_ONE_EXIT | LPFLG_CONST; + /* Ignore the loop if we don't have a do-while + that has a constant number of iterations */ - /* Ignore the loop if we don't have a do-while with a single exit - that has a constant number of iterations */ - - if ((loopFlags & requiredFlags) != requiredFlags) - { - continue; - } + if ((loopFlags & requiredFlags) != requiredFlags) + { + continue; + } - /* ignore if removed or marked as not unrollable */ + /* ignore if removed or marked as not unrollable */ - if (optLoopTable[lnum].lpFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED)) - { - continue; - } + if (loopFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED)) + { + continue; + } - head = optLoopTable[lnum].lpHead; - noway_assert(head); - bottom = optLoopTable[lnum].lpBottom; - noway_assert(bottom); + head = optLoopTable[lnum].lpHead; + noway_assert(head); + bottom = optLoopTable[lnum].lpBottom; + noway_assert(bottom); - /* The single exit must be at the bottom of the loop */ - noway_assert(optLoopTable[lnum].lpExit); - if (optLoopTable[lnum].lpExit != bottom) - { - continue; - } + /* Get the loop data: + - initial constant + - limit constant + - iterator + - iterator increment + - increment operation type (i.e. ADD, SUB, etc...) + - loop test type (i.e. GT_GE, GT_LT, etc...) + */ - /* Unrolling loops with jumps in them is not worth the headache - * Later we might consider unrolling loops after un-switching */ + lbeg = optLoopTable[lnum].lpConstInit; + llim = optLoopTable[lnum].lpConstLimit(); + testOper = optLoopTable[lnum].lpTestOper(); - block = head; - do - { - block = block->bbNext; - noway_assert(block); + lvar = optLoopTable[lnum].lpIterVar(); + iterInc = optLoopTable[lnum].lpIterConst(); + iterOper = optLoopTable[lnum].lpIterOper(); - if (block->bbJumpKind != BBJ_NONE) - { - if (block != bottom) - { - goto DONE_LOOP; - } - } - } while (block != bottom); + iterOperType = optLoopTable[lnum].lpIterOperType(); + unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0; - /* Get the loop data: - - initial constant - - limit constant - - iterator - - iterator increment - - increment operation type (i.e. ASG_ADD, ASG_SUB, etc...) - - loop test type (i.e. GT_GE, GT_LT, etc...) - */ + if (lvaTable[lvar].lvAddrExposed) + { // If the loop iteration variable is address-exposed then bail + continue; + } + if (lvaTable[lvar].lvIsStructField) + { // If the loop iteration variable is a promoted field from a struct then + // bail + continue; + } - lbeg = optLoopTable[lnum].lpConstInit; - llim = optLoopTable[lnum].lpConstLimit(); - testOper = optLoopTable[lnum].lpTestOper(); + /* Locate the pre-header and initialization and increment/test statements */ - lvar = optLoopTable[lnum].lpIterVar(); - iterInc = optLoopTable[lnum].lpIterConst(); - iterOper = optLoopTable[lnum].lpIterOper(); + phdr = head->bbTreeList; + noway_assert(phdr); + loop = bottom->bbTreeList; + noway_assert(loop); - iterOperType = optLoopTable[lnum].lpIterOperType(); - unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0; + init = head->lastStmt(); + noway_assert(init && (init->gtNext == nullptr)); + test = bottom->lastStmt(); + noway_assert(test && (test->gtNext == nullptr)); + incr = test->gtPrev; + noway_assert(incr); - if (lvaTable[lvar].lvAddrExposed) - { // If the loop iteration variable is address-exposed then bail - continue; - } - if (lvaTable[lvar].lvIsStructField) - { // If the loop iteration variable is a promoted field from a struct then - // bail - continue; - } + if (init->gtFlags & GTF_STMT_CMPADD) + { + /* Must be a duplicated loop condition */ + noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE); - /* Locate the pre-header and initialization and increment/test statements */ + dupCond = true; + init = init->gtPrev; + noway_assert(init); + } + else + { + dupCond = false; + } - phdr = head->bbTreeList; - noway_assert(phdr); - loop = bottom->bbTreeList; - noway_assert(loop); + /* Find the number of iterations - the function returns false if not a constant number */ - init = head->lastStmt(); - noway_assert(init && (init->gtNext == nullptr)); - test = bottom->lastStmt(); - noway_assert(test && (test->gtNext == nullptr)); - incr = test->gtPrev; - noway_assert(incr); + if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter)) + { + continue; + } - if (init->gtFlags & GTF_STMT_CMPADD) - { - /* Must be a duplicated loop condition */ - noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE); + /* Forget it if there are too many repetitions or not a constant loop */ - dupCond = true; - init = init->gtPrev; - noway_assert(init); - } - else - { - dupCond = false; - } + if (totalIter > iterLimit) + { + continue; + } - /* Find the number of iterations - the function returns false if not a constant number */ + noway_assert(init->gtOper == GT_STMT); + init = init->gtStmt.gtStmtExpr; + noway_assert(test->gtOper == GT_STMT); + test = test->gtStmt.gtStmtExpr; + noway_assert(incr->gtOper == GT_STMT); + incr = incr->gtStmt.gtStmtExpr; - if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter)) - { - continue; - } + // Don't unroll loops we don't understand. + if (incr->gtOper != GT_ASG) + { + continue; + } + incr = incr->gtOp.gtOp2; - /* Forget it if there are too many repetitions or not a constant loop */ + /* Make sure everything looks ok */ + if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) || + (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) || + (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) || - if (totalIter > iterLimit) - { - continue; - } + !((incr->gtOper == GT_ADD) || (incr->gtOper == GT_SUB)) || (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) || + (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) || + (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) || - noway_assert(init->gtOper == GT_STMT); - init = init->gtStmt.gtStmtExpr; - noway_assert(test->gtOper == GT_STMT); - test = test->gtStmt.gtStmtExpr; - noway_assert(incr->gtOper == GT_STMT); - incr = incr->gtStmt.gtStmtExpr; + (test->gtOper != GT_JTRUE)) + { + noway_assert(!"Bad precondition in Compiler::optUnrollLoops()"); + continue; + } - // Don't unroll loops we don't understand. - if (incr->gtOper == GT_ASG) - { - continue; - } + /* heuristic - Estimated cost in code size of the unrolled loop */ - /* Make sure everything looks ok */ - if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) || - (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) || - (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) || + { + ClrSafeInt<unsigned> loopCostSz; // Cost is size of one iteration - !((incr->gtOper == GT_ASG_ADD) || (incr->gtOper == GT_ASG_SUB)) || - (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) || (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || - (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) || (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) || + block = head->bbNext; + auto tryIndex = block->bbTryIndex; - (test->gtOper != GT_JTRUE)) + loopRetCount = 0; + for (;; block = block->bbNext) { - noway_assert(!"Bad precondition in Compiler::optUnrollLoops()"); - continue; - } - - /* heuristic - Estimated cost in code size of the unrolled loop */ - - loopCostSz = 0; - - block = head; + if (block->bbTryIndex != tryIndex) + { + // Unrolling would require cloning EH regions + goto DONE_LOOP; + } - do - { - block = block->bbNext; + if (block->bbJumpKind == BBJ_RETURN) + { + ++loopRetCount; + } /* Visit all the statements in the block */ for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt) { - /* Get the expression and stop if end reached */ - - GenTreePtr expr = stmt->gtStmtExpr; - if (expr == incr) - { - break; - } - /* Calculate gtCostSz */ gtSetStmtInfo(stmt); /* Update loopCostSz */ loopCostSz += stmt->gtCostSz; } - } while (block != bottom); + + if (block == bottom) + { + break; + } + } + +#ifdef JIT32_GCENCODER + if (fgReturnCount + loopRetCount * (totalIter - 1) > SET_EPILOGCNT_MAX) + { + // Jit32 GC encoder can't report more than SET_EPILOGCNT_MAX epilogs. + goto DONE_LOOP; + } +#endif // !JIT32_GCENCODER /* Compute the estimated increase in code size for the unrolled loop */ - unsigned int fixedLoopCostSz; - fixedLoopCostSz = 8; + ClrSafeInt<unsigned> fixedLoopCostSz(8); - int unrollCostSz; - unrollCostSz = (loopCostSz * totalIter) - (loopCostSz + fixedLoopCostSz); + ClrSafeInt<int> unrollCostSz = ClrSafeInt<int>(loopCostSz * ClrSafeInt<unsigned>(totalIter)) - + ClrSafeInt<int>(loopCostSz + fixedLoopCostSz); /* Don't unroll if too much code duplication would result. */ - if (unrollCostSz > unrollLimitSz) + if (unrollCostSz.IsOverflow() || (unrollCostSz.Value() > unrollLimitSz)) { - /* prevent this loop from being revisited */ - optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL; goto DONE_LOOP; } @@ -3100,76 +3108,81 @@ void Compiler::optUnrollLoops() printf("\n"); } #endif + } - /* Create the unrolled loop statement list */ - - loopList = loopLast = nullptr; + /* Create the unrolled loop statement list */ + { + BlockToBlockMap blockMap(getAllocator()); + BasicBlock* insertAfter = bottom; for (lval = lbeg; totalIter; totalIter--) { - block = head; - - do + for (block = head->bbNext;; block = block->bbNext) { - GenTreeStmt* stmt; - GenTree* expr; - - block = block->bbNext; - noway_assert(block); + BasicBlock* newBlock = insertAfter = + fgNewBBafter(block->bbJumpKind, insertAfter, /*extendRegion*/ true); + blockMap.Set(block, newBlock); - /* Visit all the statements in the block */ - - for (stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt) + if (!BasicBlock::CloneBlockState(this, newBlock, block, lvar, lval)) { - /* Stop if we've reached the end of the loop */ - - if (stmt->gtStmtExpr == incr) - { - break; - } - - /* Clone/substitute the expression */ - - expr = gtCloneExpr(stmt, 0, lvar, lval); - // cloneExpr doesn't handle everything + BasicBlock* oldBottomNext = insertAfter->bbNext; + bottom->bbNext = oldBottomNext; + oldBottomNext->bbPrev = bottom; + optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL; + goto DONE_LOOP; + } + // Block weight should no longer have the loop multiplier + newBlock->modifyBBWeight(newBlock->bbWeight / BB_LOOP_WEIGHT); + // Jump dests are set in a post-pass; make sure CloneBlockState hasn't tried to set them. + assert(newBlock->bbJumpDest == nullptr); - if (!expr) - { - optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL; - goto DONE_LOOP; - } - - /* Append the expression to our list */ - - if (loopList) + if (block == bottom) + { + // Remove the test; we're doing a full unroll. + + GenTreeStmt* testCopyStmt = newBlock->lastStmt(); + GenTreePtr testCopyExpr = testCopyStmt->gtStmt.gtStmtExpr; + assert(testCopyExpr->gtOper == GT_JTRUE); + GenTreePtr sideEffList = nullptr; + gtExtractSideEffList(testCopyExpr, &sideEffList, GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF); + if (sideEffList == nullptr) { - loopLast->gtNext = expr; + fgRemoveStmt(newBlock, testCopyStmt); } else { - loopList = expr; + testCopyStmt->gtStmt.gtStmtExpr = sideEffList; } + newBlock->bbJumpKind = BBJ_NONE; - expr->gtPrev = loopLast; - loopLast = expr; + // Exit this loop; we've walked all the blocks. + break; } - } while (block != bottom); + } + + // Now redirect any branches within the newly-cloned iteration + for (block = head->bbNext; block != bottom; block = block->bbNext) + { + BasicBlock* newBlock = blockMap[block]; + optCopyBlkDest(block, newBlock); + optRedirectBlock(newBlock, &blockMap); + } /* update the new value for the unrolled iterator */ switch (iterOper) { - case GT_ASG_ADD: + case GT_ADD: lval += iterInc; break; - case GT_ASG_SUB: + case GT_SUB: lval -= iterInc; break; - case GT_ASG_RSH: - case GT_ASG_LSH: + case GT_RSH: + case GT_LSH: noway_assert(!"Unrolling not implemented for this loop iterator"); goto DONE_LOOP; @@ -3179,46 +3192,22 @@ void Compiler::optUnrollLoops() } } - /* Finish the linked list */ - - if (loopList) + // Gut the old loop body + for (block = head->bbNext;; block = block->bbNext) { - loopList->gtPrev = loopLast; - loopLast->gtNext = nullptr; - } - - /* Replace the body with the unrolled one */ - - block = head; - - do - { - block = block->bbNext; - noway_assert(block); block->bbTreeList = nullptr; block->bbJumpKind = BBJ_NONE; - block->bbFlags &= ~BBF_NEEDS_GCPOLL; - } while (block != bottom); - - bottom->bbJumpKind = BBJ_NONE; - bottom->bbTreeList = loopList; - bottom->bbFlags &= ~BBF_NEEDS_GCPOLL; - bottom->modifyBBWeight(bottom->bbWeight / BB_LOOP_WEIGHT); - - bool dummy; - - fgMorphStmts(bottom, &dummy, &dummy, &dummy); - - /* Update bbRefs and bbPreds */ - /* Here head->bbNext is bottom !!! - Replace it */ - - fgRemoveRefPred(head->bbNext, bottom); - - /* Now change the initialization statement in the HEAD to "lvar = lval;" - * (the last value of the iterator in the loop) - * and drop the jump condition since the unrolled loop will always execute */ + block->bbFlags &= ~(BBF_NEEDS_GCPOLL | BBF_LOOP_HEAD); + if (block->bbJumpDest != nullptr) + { + block->bbJumpDest = nullptr; + } - init->gtOp.gtOp2->gtIntCon.gtIconVal = lval; + if (block == bottom) + { + break; + } + } /* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */ @@ -3240,10 +3229,6 @@ void Compiler::optUnrollLoops() phdr->gtPrev = init; head->bbJumpKind = BBJ_NONE; head->bbFlags &= ~BBF_NEEDS_GCPOLL; - - /* Update bbRefs and bbPreds */ - - fgRemoveRefPred(head->bbJumpDest, head); } else { @@ -3256,18 +3241,9 @@ void Compiler::optUnrollLoops() { printf("Whole unrolled loop:\n"); - GenTreePtr s = loopList; - - while (s) - { - noway_assert(s->gtOper == GT_STMT); - gtDispTree(s); - s = s->gtNext; - } - printf("\n"); - gtDispTree(init); printf("\n"); + fgDumpTrees(head->bbNext, insertAfter); } #endif @@ -3278,22 +3254,25 @@ void Compiler::optUnrollLoops() /* Make sure to update loop table */ /* Use the LPFLG_REMOVED flag and update the bbLoopMask acordingly - * (also make head and bottom NULL - to hit an assert or GPF) */ + * (also make head and bottom NULL - to hit an assert or GPF) */ optLoopTable[lnum].lpFlags |= LPFLG_REMOVED; optLoopTable[lnum].lpHead = optLoopTable[lnum].lpBottom = nullptr; - DONE_LOOP:; + // Note if we created new BBJ_RETURNs + fgReturnCount += loopRetCount * (totalIter - 1); } - if (!change) - { - break; - } + DONE_LOOP:; + } + + if (change) + { + fgUpdateChangedFlowGraph(); } #ifdef DEBUG - fgDebugCheckBBlist(); + fgDebugCheckBBlist(true); #endif } #ifdef _PREFAST_ @@ -3639,12 +3618,10 @@ void Compiler::fgOptWhileLoop(BasicBlock* block) copyOfCondStmt->gtFlags |= GTF_STMT_CMPADD; -#ifdef DEBUGGING_SUPPORT if (opts.compDbgInfo) { copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx; } -#endif // Flag the block that received the copy as potentially having an array/vtable // reference if the block copied from did; this is a conservative guess. @@ -4265,7 +4242,7 @@ void Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTreePtr insertBefore GenTreePtr logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID); GenTreePtr stmt = fgNewStmtFromTree(logCall); fgInsertStmtBefore(block, insertBefore, stmt); - fgMorphBlockStmt(block, stmt DEBUGARG("Debug log loop cloning")); + fgMorphBlockStmt(block, stmt->AsStmt() DEBUGARG("Debug log loop cloning")); } #endif @@ -4394,14 +4371,18 @@ bool Compiler::optIsLoopClonable(unsigned loopInd) } // We've previously made a decision whether to have separate return epilogs, or branch to one. - // There's a GCInfo limitation in the x86 case, so that there can be no more than 4 separate epilogs. - // (I thought this was x86-specific, but it's not if-d. On other architectures, the decision should be made as a - // heuristic tradeoff; perhaps we're just choosing to live with 4 as the limit.) - if (fgReturnCount + loopRetCount > 4) + // There's a GCInfo limitation in the x86 case, so that there can be no more than SET_EPILOGCNT_MAX separate + // epilogs. Other architectures have a limit of 4 here for "historical reasons", but this should be revisited + // (or return blocks should not be considered part of the loop, rendering this issue moot). + unsigned epilogLimit = 4; +#ifdef JIT32_GCENCODER + epilogLimit = SET_EPILOGCNT_MAX; +#endif // JIT32_GCENCODER + if (fgReturnCount + loopRetCount > epilogLimit) { JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, " - "would exceed the limit of 4.\n", - loopRetCount, fgReturnCount); + "would exceed the limit of %d.\n", + loopRetCount, fgReturnCount, epilogLimit); return false; } @@ -4642,7 +4623,11 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind, newPred, /*extendRegion*/ true); - BasicBlock::CloneBlockState(this, newBlk, blk); + // Call CloneBlockState to make a copy of the block's statements (and attributes), and assert that it + // has a return value indicating success, because optCanOptimizeByLoopCloningVisitor has already + // checked them to guarantee they are clonable. + bool cloneOk = BasicBlock::CloneBlockState(this, newBlk, blk); + noway_assert(cloneOk); // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert // the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding // loop, if one exists -- the parent of the loop we're cloning. @@ -4716,6 +4701,12 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) } assert(foundIt && e2 != nullptr); + // Don't unroll loops that we've cloned -- the unroller expects any loop it should unroll to + // initialize the loop counter immediately before entering the loop, but we've left a shared + // initialization of the loop counter up above the test that determines which version of the + // loop to take. + optLoopTable[loopInd].lpFlags |= LPFLG_DONT_UNROLL; + fgUpdateChangedFlowGraph(); } @@ -6226,9 +6217,28 @@ bool Compiler::optHoistLoopExprsForTree( // be hoisted so that they are evaluated in the same order as they would have been in the loop, // and therefore throw exceptions in the same order. (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS // here, since that includes exceptions.) - if (tree->gtFlags & GTF_CALL) + if (tree->IsCall()) { - *pFirstBlockAndBeforeSideEffect = false; + // If it's a call, it must be a helper call that does not mutate the heap. + // Further, if it may run a cctor, it must be labeled as "Hoistable" + // (meaning it won't run a cctor because the class is not precise-init). + GenTreeCall* call = tree->AsCall(); + if (call->gtCallType != CT_HELPER) + { + *pFirstBlockAndBeforeSideEffect = false; + } + else + { + CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd); + if (s_helperCallProperties.MutatesHeap(helpFunc)) + { + *pFirstBlockAndBeforeSideEffect = false; + } + else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0) + { + *pFirstBlockAndBeforeSideEffect = false; + } + } } else if (tree->OperIsAssignment()) { @@ -6748,15 +6758,17 @@ void Compiler::fgCreateLoopPreHeader(unsigned lnum) bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum) { - unsigned lnum = blk->bbNatLoopNum; - while (lnum != BasicBlock::NOT_IN_LOOP) + for (unsigned lnum = blk->bbNatLoopNum; lnum != BasicBlock::NOT_IN_LOOP; lnum = optLoopTable[lnum].lpParent) { + if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED) + { + continue; + } if (optLoopTable[lnum].lpEntry == blk) { *pLnum = lnum; return true; } - lnum = optLoopTable[lnum].lpParent; } return false; } @@ -7239,7 +7251,7 @@ void Compiler::optRemoveRangeCheck( noway_assert(stmt->gtOper == GT_STMT); noway_assert(tree->gtOper == GT_COMMA); - noway_assert(tree->gtOp.gtOp1->gtOper == GT_ARR_BOUNDS_CHECK); + noway_assert(tree->gtOp.gtOp1->OperIsBoundsCheck()); noway_assert(forceRemove || optIsRangeCheckRemovable(tree->gtOp.gtOp1)); GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk(); diff --git a/src/jit/protojit/CMakeLists.txt b/src/jit/protojit/CMakeLists.txt index e3cc769ba0..91c69e9a83 100644 --- a/src/jit/protojit/CMakeLists.txt +++ b/src/jit/protojit/CMakeLists.txt @@ -3,8 +3,13 @@ project(protojit) add_definitions(-DALT_JIT) add_definitions(-DFEATURE_NO_HOST) add_definitions(-DSELF_NO_HOST) +add_definitions(-DFEATURE_READYTORUN_COMPILER) remove_definitions(-DFEATURE_MERGE_JIT_AND_ENGINE) +if(WIN32) + add_definitions(-DFX_VER_INTERNALNAME_STR=protojit.dll) +endif(WIN32) + add_library_clr(protojit SHARED ${SHARED_LIB_SOURCES} @@ -28,7 +33,8 @@ if(CLR_CMAKE_PLATFORM_UNIX) ) else() list(APPEND RYUJIT_LINK_LIBRARIES - msvcrt.lib + ${STATIC_MT_CRT_LIB} + ${STATIC_MT_VCRT_LIB} kernel32.lib advapi32.lib ole32.lib diff --git a/src/jit/rangecheck.cpp b/src/jit/rangecheck.cpp index ae0c792f11..8d16cce31a 100644 --- a/src/jit/rangecheck.cpp +++ b/src/jit/rangecheck.cpp @@ -208,7 +208,7 @@ void RangeCheck::OptimizeRangeCheck(BasicBlock* block, GenTreePtr stmt, GenTreeP // If we are not looking at array bounds check, bail. GenTreePtr tree = treeParent->gtOp.gtOp1; - if (tree->gtOper != GT_ARR_BOUNDS_CHECK) + if (!tree->OperIsBoundsCheck()) { return; } @@ -233,6 +233,9 @@ void RangeCheck::OptimizeRangeCheck(BasicBlock* block, GenTreePtr stmt, GenTreeP } } else +#ifdef FEATURE_SIMD + if (tree->gtOper != GT_SIMD_CHK) +#endif // FEATURE_SIMD { arrSize = GetArrLength(arrLenVn); } diff --git a/src/jit/rationalize.cpp b/src/jit/rationalize.cpp index 03e0c9a27e..7f5a26fa1f 100644 --- a/src/jit/rationalize.cpp +++ b/src/jit/rationalize.cpp @@ -16,44 +16,6 @@ struct SplitData Rationalizer* thisPhase; }; -//------------------------------------------------------------------------------ -// isNodeCallArg - given a context (stack of parent nodes), determine if the TOS is an arg to a call -//------------------------------------------------------------------------------ - -GenTree* isNodeCallArg(ArrayStack<GenTree*>* parentStack) -{ - for (int i = 1; // 0 is current node, so start at 1 - i < parentStack->Height(); i++) - { - GenTree* node = parentStack->Index(i); - switch (node->OperGet()) - { - case GT_LIST: - case GT_ARGPLACE: - break; - case GT_NOP: - // Currently there's an issue when the rationalizer performs - // the fixup of a call argument: the case is when we remove an - // inserted NOP as a parent of a call introduced by fgMorph; - // when then the rationalizer removes it, the tree stack in the - // walk is not consistent with the node it was just deleted, so the - // solution is just to go 1 level deeper. - // TODO-Cleanup: This has to be fixed in a proper way: make the rationalizer - // correctly modify the evaluation stack when removing treenodes. - if (node->gtOp.gtOp1->gtOper == GT_CALL) - { - return node->gtOp.gtOp1; - } - break; - case GT_CALL: - return node; - default: - return nullptr; - } - } - return nullptr; -} - // return op that is the store equivalent of the given load opcode genTreeOps storeForm(genTreeOps loadForm) { @@ -109,54 +71,6 @@ void copyFlags(GenTree* dst, GenTree* src, unsigned mask) dst->gtFlags |= (src->gtFlags & mask); } -// call args have other pointers to them which must be fixed up if -// they are replaced -void Compiler::fgFixupIfCallArg(ArrayStack<GenTree*>* parentStack, GenTree* oldChild, GenTree* newChild) -{ - GenTree* parentCall = isNodeCallArg(parentStack); - if (!parentCall) - { - return; - } - - // we have replaced an arg, so update pointers in argtable - fgFixupArgTabEntryPtr(parentCall, oldChild, newChild); -} - -//------------------------------------------------------------------------ -// fgFixupArgTabEntryPtr: Fixup the fgArgTabEntryPtr of parentCall after -// replacing oldArg with newArg -// -// Arguments: -// parentCall - a pointer to the parent call node -// oldArg - the original argument node -// newArg - the replacement argument node -// - -void Compiler::fgFixupArgTabEntryPtr(GenTreePtr parentCall, GenTreePtr oldArg, GenTreePtr newArg) -{ - assert(parentCall != nullptr); - assert(oldArg != nullptr); - assert(newArg != nullptr); - - JITDUMP("parent call was :\n"); - DISPNODE(parentCall); - - JITDUMP("old child was :\n"); - DISPNODE(oldArg); - - if (oldArg->gtFlags & GTF_LATE_ARG) - { - newArg->gtFlags |= GTF_LATE_ARG; - } - else - { - fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(parentCall, oldArg); - assert(fp->node == oldArg); - fp->node = newArg; - } -} - // Rewrite a SIMD indirection as GT_IND(GT_LEA(obj.op1)), or as a simple // lclVar if possible. // @@ -191,8 +105,8 @@ void Rationalizer::RewriteSIMDOperand(LIR::Use& use, bool keepBlk) return; } - // If the operand of is a GT_ADDR(GT_LCL_VAR) and LclVar is known to be of simdType, - // replace obj by GT_LCL_VAR. + // If we have GT_IND(GT_LCL_VAR_ADDR) and the GT_LCL_VAR_ADDR is TYP_BYREF/TYP_I_IMPL, + // and the var is a SIMD type, replace the expression by GT_LCL_VAR. GenTree* addr = tree->AsIndir()->Addr(); if (addr->OperIsLocalAddr() && comp->isAddrOfSIMDType(addr)) { @@ -202,6 +116,17 @@ void Rationalizer::RewriteSIMDOperand(LIR::Use& use, bool keepBlk) addr->gtType = simdType; use.ReplaceWith(comp, addr); } +#if defined(_TARGET_X86_) + // For x86, if we have GT_IND(GT_ADDR(GT_SIMD)), remove the GT_IND(GT_ADDR()), leaving just + // the GT_SIMD. + else if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->OperGet() == GT_SIMD)) + { + BlockRange().Remove(tree); + BlockRange().Remove(addr); + + use.ReplaceWith(comp, addr->gtGetOp1()); + } +#endif // defined(_TARGET_X86_) else if (!keepBlk) { tree->SetOper(GT_IND); @@ -242,13 +167,32 @@ void Rationalizer::RewriteNodeAsCall(GenTree** use, // Create the call node GenTreeCall* call = comp->gtNewCallNode(CT_USER_FUNC, callHnd, tree->gtType, args); - call = comp->fgMorphArgs(call); + +#if DEBUG + CORINFO_SIG_INFO sig; + comp->eeGetMethodSig(callHnd, &sig); + assert(JITtype2varType(sig.retType) == tree->gtType); +#endif // DEBUG + + call = comp->fgMorphArgs(call); + // Determine if this call has changed any codegen requirements. + comp->fgCheckArgCnt(); + #ifdef FEATURE_READYTORUN_COMPILER call->gtCall.setEntryPoint(entryPoint); #endif // Replace "tree" with "call" - *use = call; + if (data->parentStack->Height() > 1) + { + data->parentStack->Index(1)->ReplaceOperand(use, call); + } + else + { + // If there's no parent, the tree being replaced is the root of the + // statement (and no special handling is necessary). + *use = call; + } // Rebuild the evaluation order. comp->gtSetStmtInfo(root); @@ -278,8 +222,6 @@ void Rationalizer::RewriteNodeAsCall(GenTree** use, treeNextNode->gtPrev = treeLastNode; } - comp->fgFixupIfCallArg(data->parentStack, tree, call); - // Propagate flags of "call" to its parents. // 0 is current node, so start at 1 for (int i = 1; i < data->parentStack->Height(); i++) @@ -510,33 +452,77 @@ void Rationalizer::RewriteAssignment(LIR::Use& use) genTreeOps locationOp = location->OperGet(); -#ifdef FEATURE_SIMD - if (varTypeIsSIMD(location) && assignment->OperIsInitBlkOp()) + if (assignment->OperIsBlkOp()) { - if (location->OperGet() == GT_LCL_VAR) +#ifdef FEATURE_SIMD + if (varTypeIsSIMD(location) && assignment->OperIsInitBlkOp()) { - var_types simdType = location->TypeGet(); - GenTree* initVal = assignment->gtOp.gtOp2; - var_types baseType = comp->getBaseTypeOfSIMDLocal(location); - if (baseType != TYP_UNKNOWN) + if (location->OperGet() == GT_LCL_VAR) { - GenTreeSIMD* simdTree = new (comp, GT_SIMD) - GenTreeSIMD(simdType, initVal, SIMDIntrinsicInit, baseType, genTypeSize(simdType)); - assignment->gtOp.gtOp2 = simdTree; - value = simdTree; - initVal->gtNext = simdTree; - simdTree->gtPrev = initVal; - - simdTree->gtNext = location; - location->gtPrev = simdTree; + var_types simdType = location->TypeGet(); + GenTree* initVal = assignment->gtOp.gtOp2; + var_types baseType = comp->getBaseTypeOfSIMDLocal(location); + if (baseType != TYP_UNKNOWN) + { + GenTreeSIMD* simdTree = new (comp, GT_SIMD) + GenTreeSIMD(simdType, initVal, SIMDIntrinsicInit, baseType, genTypeSize(simdType)); + assignment->gtOp.gtOp2 = simdTree; + value = simdTree; + initVal->gtNext = simdTree; + simdTree->gtPrev = initVal; + + simdTree->gtNext = location; + location->gtPrev = simdTree; + } } } - else +#endif // FEATURE_SIMD + if ((location->TypeGet() == TYP_STRUCT) && !assignment->IsPhiDefn() && !value->IsMultiRegCall()) { - assert(location->OperIsBlk()); + if ((location->OperGet() == GT_LCL_VAR)) + { + // We need to construct a block node for the location. + // Modify lcl to be the address form. + location->SetOper(addrForm(locationOp)); + LclVarDsc* varDsc = &(comp->lvaTable[location->AsLclVarCommon()->gtLclNum]); + location->gtType = TYP_BYREF; + GenTreeBlk* storeBlk = nullptr; + unsigned int size = varDsc->lvExactSize; + + if (varDsc->lvStructGcCount != 0) + { + CORINFO_CLASS_HANDLE structHnd = varDsc->lvVerTypeInfo.GetClassHandle(); + GenTreeObj* objNode = comp->gtNewObjNode(structHnd, location)->AsObj(); + unsigned int slots = (unsigned)(roundUp(size, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE); + + objNode->SetGCInfo(varDsc->lvGcLayout, varDsc->lvStructGcCount, slots); + objNode->ChangeOper(GT_STORE_OBJ); + objNode->SetData(value); + comp->fgMorphUnsafeBlk(objNode); + storeBlk = objNode; + } + else + { + storeBlk = new (comp, GT_STORE_BLK) GenTreeBlk(GT_STORE_BLK, TYP_STRUCT, location, value, size); + } + storeBlk->gtFlags |= (GTF_REVERSE_OPS | GTF_ASG); + storeBlk->gtFlags |= ((location->gtFlags | value->gtFlags) & GTF_ALL_EFFECT); + + GenTree* insertionPoint = location->gtNext; + BlockRange().InsertBefore(insertionPoint, storeBlk); + use.ReplaceWith(comp, storeBlk); + BlockRange().Remove(assignment); + JITDUMP("After transforming local struct assignment into a block op:\n"); + DISPTREERANGE(BlockRange(), use.Def()); + JITDUMP("\n"); + return; + } + else + { + assert(location->OperIsBlk()); + } } } -#endif // FEATURE_SIMD switch (locationOp) { @@ -605,10 +591,10 @@ void Rationalizer::RewriteAssignment(LIR::Use& use) } JITDUMP("Rewriting GT_ASG(%s(X), Y) to %s(X,Y):\n", GenTree::NodeName(location->gtOper), GenTree::NodeName(storeOper)); - storeBlk->gtOper = storeOper; + storeBlk->SetOperRaw(storeOper); storeBlk->gtFlags &= ~GTF_DONT_CSE; storeBlk->gtFlags |= (assignment->gtFlags & (GTF_ALL_EFFECT | GTF_REVERSE_OPS | GTF_BLK_VOLATILE | - GTF_BLK_UNALIGNED | GTF_BLK_INIT | GTF_DONT_CSE)); + GTF_BLK_UNALIGNED | GTF_DONT_CSE)); storeBlk->gtBlk.Data() = value; // Replace the assignment node with the store @@ -693,21 +679,20 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStack<G const bool isLateArg = (node->gtFlags & GTF_LATE_ARG) != 0; #endif - // First, remove any preceeding GT_LIST nodes, which are not otherwise visited by the tree walk. + // First, remove any preceeding list nodes, which are not otherwise visited by the tree walk. // - // NOTE: GT_LIST nodes that are used as aggregates, by block ops, and by phi nodes will in fact be visited. - for (GenTree* prev = node->gtPrev; - prev != nullptr && prev->OperGet() == GT_LIST && !(prev->AsArgList()->IsAggregate()); - prev = node->gtPrev) + // NOTE: GT_FIELD_LIST head nodes, and GT_LIST nodes used by phi nodes will in fact be visited. + for (GenTree* prev = node->gtPrev; prev != nullptr && prev->OperIsAnyList() && !(prev->OperIsFieldListHead()); + prev = node->gtPrev) { BlockRange().Remove(prev); } // In addition, remove the current node if it is a GT_LIST node that is not an aggregate. - if (node->OperGet() == GT_LIST) + if (node->OperIsAnyList()) { GenTreeArgList* list = node->AsArgList(); - if (!list->IsAggregate()) + if (!list->OperIsFieldListHead()) { BlockRange().Remove(list); } @@ -741,6 +726,11 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStack<G RewriteAddress(use); break; + case GT_IND: + // Clear the `GTF_IND_ASG_LHS` flag, which overlaps with `GTF_IND_REQ_ADDR_IN_REG`. + node->gtFlags &= ~GTF_IND_ASG_LHS; + break; + case GT_NOP: // fgMorph sometimes inserts NOP nodes between defs and uses // supposedly 'to prevent constant folding'. In this case, remove the @@ -931,19 +921,27 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStack<G #endif // FEATURE_SIMD default: + // JCC nodes should not be present in HIR. + assert(node->OperGet() != GT_JCC); break; } // Do some extra processing on top-level nodes to remove unused local reads. - if (use.IsDummyUse() && node->OperIsLocalRead()) + if (node->OperIsLocalRead()) { - assert((node->gtFlags & GTF_ALL_EFFECT) == 0); - - comp->lvaDecRefCnts(node); - BlockRange().Remove(node); + if (use.IsDummyUse()) + { + comp->lvaDecRefCnts(node); + BlockRange().Remove(node); + } + else + { + // Local reads are side-effect-free; clear any flags leftover from frontend transformations. + node->gtFlags &= ~GTF_ALL_EFFECT; + } } - assert(isLateArg == ((node->gtFlags & GTF_LATE_ARG) != 0)); + assert(isLateArg == ((use.Def()->gtFlags & GTF_LATE_ARG) != 0)); return Compiler::WALK_CONTINUE; } diff --git a/src/jit/regalloc.cpp b/src/jit/regalloc.cpp index 9dd7299906..8a7ad5a163 100644 --- a/src/jit/regalloc.cpp +++ b/src/jit/regalloc.cpp @@ -53,8 +53,6 @@ regMaskTP Compiler::raConfigRestrictMaskFP() return result; } -#ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead. - #if DOUBLE_ALIGN DWORD Compiler::getCanDoubleAlign() { @@ -67,8 +65,84 @@ DWORD Compiler::getCanDoubleAlign() return DEFAULT_DOUBLE_ALIGN; #endif } + +//------------------------------------------------------------------------ +// shouldDoubleAlign: Determine whether to double-align the frame +// +// Arguments: +// refCntStk - sum of ref counts for all stack based variables +// refCntEBP - sum of ref counts for EBP enregistered variables +// refCntWtdEBP - sum of wtd ref counts for EBP enregistered variables +// refCntStkParam - sum of ref counts for all stack based parameters +// refCntWtdStkDbl - sum of wtd ref counts for stack based doubles (including structs +// with double fields). +// +// Return Value: +// Returns true if this method estimates that a double-aligned frame would be beneficial +// +// Notes: +// The impact of a double-aligned frame is computed as follows: +// - We save a byte of code for each parameter reference (they are frame-pointer relative) +// - We pay a byte of code for each non-parameter stack reference. +// - We save the misalignment penalty and possible cache-line crossing penalty. +// This is estimated as 0 for SMALL_CODE, 16 for FAST_CODE and 4 otherwise. +// - We pay 7 extra bytes for: +// MOV EBP,ESP, +// LEA ESP,[EBP-offset] +// AND ESP,-8 to double align ESP +// - We pay one extra memory reference for each variable that could have been enregistered in EBP (refCntWtdEBP). +// +// If the misalignment penalty is estimated to be less than the bytes used, we don't double align. +// Otherwise, we compare the weighted ref count of ebp-enregistered variables aginst double the +// ref count for double-aligned values. +// +bool Compiler::shouldDoubleAlign( + unsigned refCntStk, unsigned refCntEBP, unsigned refCntWtdEBP, unsigned refCntStkParam, unsigned refCntWtdStkDbl) +{ + bool doDoubleAlign = false; + const unsigned DBL_ALIGN_SETUP_SIZE = 7; + + unsigned bytesUsed = refCntStk + refCntEBP - refCntStkParam + DBL_ALIGN_SETUP_SIZE; + unsigned misaligned_weight = 4; + + if (compCodeOpt() == Compiler::SMALL_CODE) + misaligned_weight = 0; + + if (compCodeOpt() == Compiler::FAST_CODE) + misaligned_weight *= 4; + + JITDUMP("\nDouble alignment:\n"); + JITDUMP(" Bytes that could be saved by not using EBP frame: %i\n", bytesUsed); + JITDUMP(" Sum of weighted ref counts for EBP enregistered variables: %i\n", refCntWtdEBP); + JITDUMP(" Sum of weighted ref counts for weighted stack based doubles: %i\n", refCntWtdStkDbl); + + if (bytesUsed > ((refCntWtdStkDbl * misaligned_weight) / BB_UNITY_WEIGHT)) + { + JITDUMP(" Predicting not to double-align ESP to save %d bytes of code.\n", bytesUsed); + } + else if (refCntWtdEBP > refCntWtdStkDbl * 2) + { + // TODO-CQ: On P4 2 Proc XEON's, SciMark.FFT degrades if SciMark.FFT.transform_internal is + // not double aligned. + // Here are the numbers that make this not double-aligned. + // refCntWtdStkDbl = 0x164 + // refCntWtdEBP = 0x1a4 + // We think we do need to change the heuristic to be in favor of double-align. + + JITDUMP(" Predicting not to double-align ESP to allow EBP to be used to enregister variables.\n"); + } + else + { + // OK we passed all of the benefit tests, so we'll predict a double aligned frame. + JITDUMP(" Predicting to create a double-aligned frame\n"); + doDoubleAlign = true; + } + return doDoubleAlign; +} #endif // DOUBLE_ALIGN +#ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead. + void Compiler::raInit() { #if FEATURE_STACK_FP_X87 @@ -2415,12 +2489,6 @@ regMaskTP Compiler::rpPredictTreeRegUse(GenTreePtr tree, { case GT_ASG: - if (tree->OperIsBlkOp()) - { - interferingRegs |= rpPredictBlkAsgRegUse(tree, predictReg, lockedRegs, rsvdRegs); - regMask = 0; - goto RETURN_CHECK; - } /* Is the value being assigned into a LCL_VAR? */ if (op1->gtOper == GT_LCL_VAR) { @@ -2486,6 +2554,12 @@ regMaskTP Compiler::rpPredictTreeRegUse(GenTreePtr tree, } } } + else if (tree->OperIsBlkOp()) + { + interferingRegs |= rpPredictBlkAsgRegUse(tree, predictReg, lockedRegs, rsvdRegs); + regMask = 0; + goto RETURN_CHECK; + } __fallthrough; case GT_CHS: @@ -4384,6 +4458,13 @@ regMaskTP Compiler::rpPredictTreeRegUse(GenTreePtr tree, case GT_ARR_LENGTH: goto GENERIC_UNARY; + case GT_INIT_VAL: + // This unary operator simply passes through the value from its child (much like GT_NOP) + // and thus won't need a scratch register. + regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs); + tree->gtUsedRegs = op1->gtUsedRegs; + goto RETURN_CHECK; + default: #ifdef DEBUG gtDispTree(tree); @@ -4525,7 +4606,7 @@ regMaskTP Compiler::rpPredictTreeRegUse(GenTreePtr tree, curArgMask = RBM_NONE; // Set of argument registers that are going to be setup by this arg tmpMask = RBM_NONE; // Set of additional temp registers that are need only to setup the current arg - assert(list->IsList()); + assert(list->OperIsList()); args = list->Current(); list = list->Rest(); @@ -5840,114 +5921,14 @@ regMaskTP Compiler::rpPredictAssignRegVars(regMaskTP regAvail) if (getCanDoubleAlign() == CAN_DOUBLE_ALIGN && (refCntWtdStkDbl > 0)) { - /* OK, there may be some benefit to double-aligning the frame */ - /* But let us compare the benefits vs. the costs of this */ - - /* - One cost to consider is the benefit of smaller code - when using EBP as a frame pointer register - - Each stack variable reference is an extra byte of code - if we use a double-aligned frame, parameters are - accessed via EBP for a double-aligned frame so they - don't use an extra byte of code. - - We pay one byte of code for each refCntStk and we pay - one byte or more for each refCntEBP but we save one - byte for each refCntStkParam. - - Our savings are the elimination of a possible misaligned - access and a possible DCU spilt when an access crossed - a cache-line boundry. - - We use the loop weighted value of - refCntWtdStkDbl * misaligned_weight (0, 4, 16) - to represent this savings. - */ - - // We also pay 7 extra bytes for the MOV EBP,ESP, - // LEA ESP,[EBP-0x10] and the AND ESP,-8 to double align ESP - const unsigned DBL_ALIGN_SETUP_SIZE = 7; - - unsigned bytesUsed = refCntStk + refCntEBP - refCntStkParam + DBL_ALIGN_SETUP_SIZE; - unsigned misaligned_weight = 4; - - if (compCodeOpt() == SMALL_CODE) - misaligned_weight = 0; - - if (compCodeOpt() == FAST_CODE) - misaligned_weight *= 4; - -#ifdef DEBUG - if (verbose) - { - printf("; Double alignment:\n"); - printf("; Bytes that could be save by not using EBP frame: %i\n", bytesUsed); - printf("; Sum of weighted ref counts for EBP enregistered variables: %i\n", refCntWtdEBP); - printf("; Sum of weighted ref counts for weighted stack based doubles: %i\n", refCntWtdStkDbl); - } -#endif - - if (bytesUsed > ((refCntWtdStkDbl * misaligned_weight) / BB_UNITY_WEIGHT)) - { - /* It's probably better to use EBP as a frame pointer */ - CLANG_FORMAT_COMMENT_ANCHOR; - -#ifdef DEBUG - if (verbose) - printf("; Predicting not to double-align ESP to save %d bytes of code.\n", bytesUsed); -#endif - goto NO_DOUBLE_ALIGN; - } - - /* - Another cost to consider is the benefit of using EBP to enregister - one or more integer variables - - We pay one extra memory reference for each refCntWtdEBP - - Our savings are the elimination of a possible misaligned - access and a possible DCU spilt when an access crossed - a cache-line boundry. - - */ - - // <BUGNUM> - // VSW 346717: On P4 2 Proc XEON's, SciMark.FFT degrades if SciMark.FFT.transform_internal is - // not double aligned. - // Here are the numbers that make this not double-aligned. - // refCntWtdStkDbl = 0x164 - // refCntWtdEBP = 0x1a4 - // We think we do need to change the heuristic to be in favor of double-align. - // </BUGNUM> - - if (refCntWtdEBP > refCntWtdStkDbl * 2) + if (shouldDoubleAlign(refCntStk, refCntEBP, refCntWtdEBP, refCntStkParam, refCntWtdStkDbl)) { - /* It's probably better to use EBP to enregister integer variables */ - CLANG_FORMAT_COMMENT_ANCHOR; - -#ifdef DEBUG - if (verbose) - printf("; Predicting not to double-align ESP to allow EBP to be used to enregister variables\n"); -#endif - goto NO_DOUBLE_ALIGN; + rpFrameType = FT_DOUBLE_ALIGN_FRAME; + goto REVERSE_EBP_ENREG; } - -#ifdef DEBUG - if (verbose) - printf("; Predicting to create a double-aligned frame\n"); -#endif - /* - OK we passed all of the benefit tests - so we'll predict a double aligned frame - */ - - rpFrameType = FT_DOUBLE_ALIGN_FRAME; - goto REVERSE_EBP_ENREG; } } -NO_DOUBLE_ALIGN: #endif // DOUBLE_ALIGN if (!codeGen->isFramePointerRequired() && !codeGen->isFrameRequired()) @@ -6673,8 +6654,6 @@ void Compiler::raMarkStkVars() #endif // FEATURE_FIXED_OUT_ARGS -#ifdef DEBUGGING_SUPPORT - #ifdef DEBUG /* For debugging, note that we have to reserve space even for unused variables if they are ever in scope. However, this is not @@ -6709,7 +6688,6 @@ void Compiler::raMarkStkVars() varDsc->lvMustInit = true; } } -#endif // DEBUGGING_SUPPORT #ifndef LEGACY_BACKEND varDsc->lvOnFrame = needSlot; diff --git a/src/jit/regalloc.h b/src/jit/regalloc.h index 7e2d7c7eb1..5054b4568e 100644 --- a/src/jit/regalloc.h +++ b/src/jit/regalloc.h @@ -17,6 +17,18 @@ enum FrameType #endif }; +#if DOUBLE_ALIGN +enum CanDoubleAlign +{ + CANT_DOUBLE_ALIGN, + CAN_DOUBLE_ALIGN, + MUST_DOUBLE_ALIGN, + COUNT_DOUBLE_ALIGN, + + DEFAULT_DOUBLE_ALIGN = CAN_DOUBLE_ALIGN +}; +#endif + #ifdef LEGACY_BACKEND #include "varset.h" @@ -94,18 +106,6 @@ enum rpPredictReg #endif // _TARGET_ }; -#if DOUBLE_ALIGN -enum CanDoubleAlign -{ - CANT_DOUBLE_ALIGN, - CAN_DOUBLE_ALIGN, - MUST_DOUBLE_ALIGN, - COUNT_DOUBLE_ALIGN, - - DEFAULT_DOUBLE_ALIGN = CAN_DOUBLE_ALIGN -}; -#endif - #endif // LEGACY_BACKEND #endif // REGALLOC_H_ diff --git a/src/jit/registerfp.cpp b/src/jit/registerfp.cpp index 997c223ed4..3a3143e629 100644 --- a/src/jit/registerfp.cpp +++ b/src/jit/registerfp.cpp @@ -326,10 +326,8 @@ void CodeGen::genFloatAssign(GenTree* tree) bool unaligned = false; // Is this an unaligned store regNumber op2reg = REG_NA; -#ifdef DEBUGGING_SUPPORT unsigned lclVarNum = compiler->lvaCount; unsigned lclILoffs = DUMMY_INIT(0); -#endif noway_assert(tree->OperGet() == GT_ASG); @@ -358,7 +356,6 @@ void CodeGen::genFloatAssign(GenTree* tree) noway_assert(varNum < compiler->lvaCount); varDsc = compiler->lvaTable + varNum; -#ifdef DEBUGGING_SUPPORT // For non-debuggable code, every definition of a lcl-var has // to be checked to see if we need to open a new scope for it. // Remember the local var info to call siCheckVarScope @@ -369,7 +366,6 @@ void CodeGen::genFloatAssign(GenTree* tree) lclVarNum = varNum; lclILoffs = op1->gtLclVar.gtLclILoffs; } -#endif // Dead Store assert (with min opts we may have dead stores) // @@ -607,13 +603,11 @@ DONE_ASG: genUpdateLife(tree); -#ifdef DEBUGGING_SUPPORT /* For non-debuggable code, every definition of a lcl-var has * to be checked to see if we need to open a new scope for it. */ if (lclVarNum < compiler->lvaCount) siCheckVarScope(lclVarNum, lclILoffs); -#endif } void CodeGen::genCodeForTreeFloat(GenTreePtr tree, RegSet::RegisterPreference* pref) diff --git a/src/jit/regset.cpp b/src/jit/regset.cpp index 2980f96813..0d0ac3e0ce 100644 --- a/src/jit/regset.cpp +++ b/src/jit/regset.cpp @@ -3175,6 +3175,16 @@ var_types Compiler::tmpNormalizeType(var_types type) type = genActualType(type); +#if defined(FEATURE_SIMD) && !defined(_TARGET_64BIT_) + // For SIMD on 32-bit platforms, we always spill SIMD12 to a 16-byte SIMD16 temp. + // This is because we don't have a single instruction to store 12 bytes. We also + // allocate non-argument locals as 16 bytes; see lvSize(). + if (type == TYP_SIMD12) + { + type = TYP_SIMD16; + } +#endif // defined(FEATURE_SIMD) && !defined(_TARGET_64BIT_) + #else // LEGACY_BACKEND if (!varTypeIsGC(type)) { diff --git a/src/jit/scopeinfo.cpp b/src/jit/scopeinfo.cpp index f2a7902317..29c18f941c 100644 --- a/src/jit/scopeinfo.cpp +++ b/src/jit/scopeinfo.cpp @@ -58,10 +58,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "emit.h" #include "codegen.h" -/*****************************************************************************/ -#ifdef DEBUGGING_SUPPORT -/*****************************************************************************/ - bool Compiler::siVarLoc::vlIsInReg(regNumber reg) { switch (vlType) @@ -1050,7 +1046,6 @@ void CodeGen::psiBegProlog() void CodeGen::psiAdjustStackLevel(unsigned size) { -#ifdef DEBUGGING_SUPPORT if (!compiler->opts.compScopeInfo || (compiler->info.compVarScopesCount == 0)) { return; @@ -1082,7 +1077,6 @@ void CodeGen::psiAdjustStackLevel(unsigned size) } #endif // ACCURATE_PROLOG_DEBUG_INFO -#endif // DEBUGGING_SUPPORT } /***************************************************************************** @@ -1094,7 +1088,6 @@ void CodeGen::psiAdjustStackLevel(unsigned size) void CodeGen::psiMoveESPtoEBP() { -#ifdef DEBUGGING_SUPPORT if (!compiler->opts.compScopeInfo || (compiler->info.compVarScopesCount == 0)) { return; @@ -1127,7 +1120,6 @@ void CodeGen::psiMoveESPtoEBP() } #endif // ACCURATE_PROLOG_DEBUG_INFO -#endif // DEBUGGING_SUPPORT } /***************************************************************************** @@ -1141,7 +1133,6 @@ void CodeGen::psiMoveESPtoEBP() void CodeGen::psiMoveToReg(unsigned varNum, regNumber reg, regNumber otherReg) { -#ifdef DEBUGGING_SUPPORT assert(compiler->compGeneratingProlog); if (!compiler->opts.compScopeInfo) @@ -1195,7 +1186,6 @@ void CodeGen::psiMoveToReg(unsigned varNum, regNumber reg, regNumber otherReg) !"Parameter scope not found (Assert doesnt always indicate error)"); #endif // ACCURATE_PROLOG_DEBUG_INFO -#endif // DEBUGGING_SUPPORT } /***************************************************************************** @@ -1207,7 +1197,6 @@ void CodeGen::psiMoveToReg(unsigned varNum, regNumber reg, regNumber otherReg) void CodeGen::psiMoveToStack(unsigned varNum) { -#ifdef DEBUGGING_SUPPORT if (!compiler->opts.compScopeInfo || (compiler->info.compVarScopesCount == 0)) { return; @@ -1248,7 +1237,6 @@ void CodeGen::psiMoveToStack(unsigned varNum) !"Parameter scope not found (Assert doesnt always indicate error)"); #endif // ACCURATE_PROLOG_DEBUG_INFO -#endif // DEBUGGING_SUPPORT } /***************************************************************************** @@ -1264,8 +1252,4 @@ void CodeGen::psiEndProlog() { psiEndPrologScope(scope); } -} - -/*****************************************************************************/ -#endif // DEBUGGING_SUPPORT -/*****************************************************************************/ +}
\ No newline at end of file diff --git a/src/jit/sideeffects.h b/src/jit/sideeffects.h index 33fac16f05..e14b2925ed 100644 --- a/src/jit/sideeffects.h +++ b/src/jit/sideeffects.h @@ -136,6 +136,12 @@ public: // SideEffectSet: // Represents a set of side effects for the purposes of analyzing code // motion. +// Note that for non-fixed-size frames without a frame pointer (currently +// x86-only), we don't track the modification of the stack level that occurs +// with a GT_PUTARG_STK as a side-effect. If we ever support general code +// reordering, that would have to be taken into account. As it happens, +// we currently do not reorder any other side-effecting nodes relative to +// these. // class SideEffectSet final { diff --git a/src/jit/simd.cpp b/src/jit/simd.cpp index 1f0c867b55..39664c47bf 100644 --- a/src/jit/simd.cpp +++ b/src/jit/simd.cpp @@ -77,10 +77,10 @@ int Compiler::getSIMDVectorLength(CORINFO_CLASS_HANDLE typeHnd) // int Compiler::getSIMDTypeAlignment(var_types simdType) { -#ifdef _TARGET_AMD64_ +#ifdef _TARGET_XARCH_ // Fixed length vectors have the following alignment preference - // Vector2/3 = 8 byte alignment - // Vector4 = 16-byte alignment + // Vector2 = 8 byte alignment + // Vector3/4 = 16-byte alignment unsigned size = genTypeSize(simdType); // preferred alignment for SSE2 128-bit vectors is 16-bytes @@ -88,13 +88,16 @@ int Compiler::getSIMDTypeAlignment(var_types simdType) { return 8; } - - // As per Intel manual, AVX vectors preferred alignment is 32-bytes but on Amd64 - // RSP/EBP is aligned at 16-bytes, therefore to align SIMD types at 32-bytes we need even - // RSP/EBP to be 32-byte aligned. It is not clear whether additional stack space used in - // aligning stack is worth the benefit and for now will use 16-byte alignment for AVX - // 256-bit vectors with unaligned load/stores to/from memory. - return 16; + else if (size <= 16) + { + assert((size == 12) || (size == 16)); + return 16; + } + else + { + assert(size == 32); + return 32; + } #else assert(!"getSIMDTypeAlignment() unimplemented on target arch"); unreached(); @@ -391,7 +394,6 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in CORINFO_CLASS_HANDLE typeHnd = *inOutTypeHnd; *baseType = getBaseTypeAndSizeOfSIMDType(typeHnd, sizeBytes); - bool isHWAcceleratedIntrinsic = false; if (typeHnd == SIMDVectorHandle) { // All of the supported intrinsics on this static class take a first argument that's a vector, @@ -424,6 +426,16 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in return nullptr; } +#ifdef _TARGET_X86_ + // NYI: support LONG type SIMD intrinsics. Need support in long decomposition. + // (Don't use NYI fallback mechanism; just call the function.) + if ((*baseType == TYP_LONG) || (*baseType == TYP_ULONG)) + { + JITDUMP("NYI: x86 long base type SIMD intrinsics\n"); + return nullptr; + } +#endif // _TARGET_X86_ + // account for implicit "this" arg *argCount = sig->numArgs; if (sig->hasThis()) @@ -525,7 +537,8 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in // We don't check anything in that case. if (!isThisPtr || !isNewObj) { - GenTreePtr arg = impStackTop(stackIndex).val; + GenTreePtr arg = impStackTop(stackIndex).val; + var_types argType = arg->TypeGet(); var_types expectedArgType; if (argIndex < fixedArgCnt) @@ -540,6 +553,7 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in { // The type of the argument will be genActualType(*baseType). expectedArgType = genActualType(*baseType); + argType = genActualType(argType); } } else @@ -547,7 +561,6 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in expectedArgType = *baseType; } - var_types argType = arg->TypeGet(); if (!isThisPtr && argType == TYP_I_IMPL) { // The reference implementation has a constructor that takes a pointer. @@ -715,7 +728,7 @@ GenTreeSIMD* Compiler::impSIMDGetFixed(var_types simdType, var_types baseType, u return simdTree; } -#ifdef _TARGET_AMD64_ +#ifdef _TARGET_XARCH_ // impSIMDLongRelOpEqual: transforms operands and returns the SIMD intrinsic to be applied on // transformed operands to obtain == comparison result. // @@ -741,7 +754,7 @@ SIMDIntrinsicID Compiler::impSIMDLongRelOpEqual(CORINFO_CLASS_HANDLE typeHnd, // // Equality(v1, v2): // tmp = (v1 == v2) i.e. compare for equality as if v1 and v2 are vector<int> - // result = BitwiseAnd(t, shuffle(t, (2, 3, 1 0))) + // result = BitwiseAnd(t, shuffle(t, (2, 3, 0, 1))) // Shuffle is meant to swap the comparison results of low-32-bits and high 32-bits of respective long elements. // Compare vector<long> as if they were vector<int> and assign the result to a temp @@ -755,7 +768,7 @@ SIMDIntrinsicID Compiler::impSIMDLongRelOpEqual(CORINFO_CLASS_HANDLE typeHnd, // op2 = Shuffle(tmp, 0xB1) // IntrinsicId = BitwiseAnd *pOp1 = gtNewOperNode(GT_COMMA, simdType, asg, tmp); - *pOp2 = gtNewSIMDNode(simdType, gtNewLclvNode(lclNum, simdType), gtNewIconNode(SHUFFLE_ZWYX, TYP_INT), + *pOp2 = gtNewSIMDNode(simdType, gtNewLclvNode(lclNum, simdType), gtNewIconNode(SHUFFLE_ZWXY, TYP_INT), SIMDIntrinsicShuffleSSE2, TYP_INT, size); return SIMDIntrinsicBitwiseAnd; } @@ -971,7 +984,7 @@ SIMDIntrinsicID Compiler::impSIMDIntegralRelOpGreaterThanOrEqual( return SIMDIntrinsicBitwiseOr; } -#endif //_TARGET_AMD64_ +#endif // _TARGET_XARCH_ // Transforms operands and returns the SIMD intrinsic to be applied on // transformed operands to obtain given relop result. @@ -999,7 +1012,7 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, assert(isRelOpSIMDIntrinsic(relOpIntrinsicId)); -#ifdef _TARGET_AMD64_ +#ifdef _TARGET_XARCH_ SIMDIntrinsicID intrinsicID = relOpIntrinsicId; var_types baseType = *inOutBaseType; @@ -1076,7 +1089,7 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, // // We need to treat op1 and op2 as signed for comparison purpose after // the transformation. - ssize_t constVal = 0; + __int64 constVal = 0; switch (baseType) { case TYP_UBYTE: @@ -1105,9 +1118,19 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, if (intrinsicID != SIMDIntrinsicEqual) { // For constructing const vector use either long or int base type. - var_types tempBaseType = (baseType == TYP_ULONG) ? TYP_LONG : TYP_INT; - GenTree* initVal = gtNewIconNode(constVal); - initVal->gtType = tempBaseType; + var_types tempBaseType; + GenTree* initVal; + if (baseType == TYP_ULONG) + { + tempBaseType = TYP_LONG; + initVal = gtNewLconNode(constVal); + } + else + { + tempBaseType = TYP_INT; + initVal = gtNewIconNode((ssize_t)constVal); + } + initVal->gtType = tempBaseType; GenTree* constVector = gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, tempBaseType, size); // Assign constVector to a temp, since we intend to use it more than once @@ -1127,10 +1150,10 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, } return intrinsicID; -#else +#else // !_TARGET_XARCH_ assert(!"impSIMDRelOp() unimplemented on target arch"); unreached(); -#endif //_TARGET_AMD64_ +#endif // !_TARGET_XARCH_ } // Creates a GT_SIMD tree for Select operation @@ -1210,7 +1233,7 @@ GenTreePtr Compiler::impSIMDMinMax(SIMDIntrinsicID intrinsicId, var_types simdType = op1->TypeGet(); assert(op2->TypeGet() == simdType); -#ifdef _TARGET_AMD64_ +#ifdef _TARGET_XARCH_ // SSE2 has direct support for float/double/signed word/unsigned byte. // For other integer types we compute min/max as follows // @@ -1347,10 +1370,10 @@ GenTreePtr Compiler::impSIMDMinMax(SIMDIntrinsicID intrinsicId, assert(simdTree != nullptr); return simdTree; -#else +#else // !_TARGET_XARCH_ assert(!"impSIMDMinMax() unimplemented on target arch"); unreached(); -#endif //_TARGET_AMD64_ +#endif // !_TARGET_XARCH_ } //------------------------------------------------------------------------ @@ -1791,6 +1814,8 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, int length = getSIMDVectorLength(clsHnd); GenTreeIntCon* intConstTree = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, length); retVal = intConstTree; + + intConstTree->gtFlags |= GTF_ICON_SIMD_COUNT; } break; @@ -2223,7 +2248,11 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, assert(op2->TypeGet() == simdType); simdTree = gtNewSIMDNode(genActualType(callType), op1, op2, SIMDIntrinsicOpEquality, baseType, size); - retVal = simdTree; + if (simdType == TYP_SIMD12) + { + simdTree->gtFlags |= GTF_SIMD12_OP; + } + retVal = simdTree; } break; @@ -2234,7 +2263,11 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, op2 = impSIMDPopStack(simdType); op1 = impSIMDPopStack(simdType, instMethod); simdTree = gtNewSIMDNode(genActualType(callType), op1, op2, SIMDIntrinsicOpInEquality, baseType, size); - retVal = simdTree; + if (simdType == TYP_SIMD12) + { + simdTree->gtFlags |= GTF_SIMD12_OP; + } + retVal = simdTree; } break; @@ -2262,7 +2295,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, case SIMDIntrinsicBitwiseOr: case SIMDIntrinsicBitwiseXor: { -#if defined(_TARGET_AMD64_) && defined(DEBUG) +#if defined(_TARGET_XARCH_) && defined(DEBUG) // check for the cases where we don't support intrinsics. // This check should be done before we make modifications to type stack. // Note that this is more of a double safety check for robustness since @@ -2290,7 +2323,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, return nullptr; } } -#endif //_TARGET_AMD64_ && DEBUG +#endif // _TARGET_XARCH_ && DEBUG // op1 is the first operand; if instance method, op1 is "this" arg // op2 is the second operand @@ -2331,9 +2364,9 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, { // op1 is a SIMD variable that is "this" arg // op2 is an index of TYP_INT - op2 = impSIMDPopStack(TYP_INT); - op1 = impSIMDPopStack(simdType, instMethod); - unsigned int vectorLength = getSIMDVectorLength(size, baseType); + op2 = impSIMDPopStack(TYP_INT); + op1 = impSIMDPopStack(simdType, instMethod); + int vectorLength = getSIMDVectorLength(size, baseType); if (!op2->IsCnsIntOrI() || op2->AsIntCon()->gtIconVal >= vectorLength) { // We need to bounds-check the length of the vector. @@ -2366,15 +2399,15 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, case SIMDIntrinsicDotProduct: { -#if defined(_TARGET_AMD64_) && defined(DEBUG) - // Right now dot product is supported only for float vectors. - // See SIMDIntrinsicList.h for supported base types for this intrinsic. - if (!varTypeIsFloating(baseType)) +#if defined(_TARGET_XARCH_) + // Right now dot product is supported only for float/double vectors and + // int vectors on SSE4/AVX. + if (!varTypeIsFloating(baseType) && + !(baseType == TYP_INT && getSIMDInstructionSet() >= InstructionSet_SSE3_4)) { - assert(!"Dot product on integer type vectors not supported"); return nullptr; } -#endif //_TARGET_AMD64_ && DEBUG +#endif // _TARGET_XARCH_ // op1 is a SIMD variable that is the first source and also "this" arg. // op2 is a SIMD variable which is the second source. @@ -2382,13 +2415,17 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, op1 = impSIMDPopStack(simdType, instMethod); simdTree = gtNewSIMDNode(baseType, op1, op2, simdIntrinsicID, baseType, size); - retVal = simdTree; + if (simdType == TYP_SIMD12) + { + simdTree->gtFlags |= GTF_SIMD12_OP; + } + retVal = simdTree; } break; case SIMDIntrinsicSqrt: { -#if defined(_TARGET_AMD64_) && defined(DEBUG) +#if defined(_TARGET_XARCH_) && defined(DEBUG) // SSE/AVX doesn't support sqrt on integer type vectors and hence // should never be seen as an intrinsic here. See SIMDIntrinsicList.h // for supported base types for this intrinsic. @@ -2397,7 +2434,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, assert(!"Sqrt not supported on integer vectors\n"); return nullptr; } -#endif // _TARGET_AMD64_ && DEBUG +#endif // _TARGET_XARCH_ && DEBUG op1 = impSIMDPopStack(simdType); @@ -2409,7 +2446,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, { op1 = impSIMDPopStack(simdType); -#ifdef _TARGET_AMD64_ +#ifdef _TARGET_XARCH_ if (varTypeIsFloating(baseType)) { // Abs(vf) = vf & new SIMDVector<float>(0x7fffffff); @@ -2448,10 +2485,10 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, unreached(); } -#else //!_TARGET_AMD64_ - assert(!"Abs intrinsic on non-Amd64 target not implemented"); +#else // !_TARGET_XARCH_ + assert(!"Abs intrinsic on non-xarch target not implemented"); unreached(); -#endif //!_TARGET_AMD64_ +#endif // !_TARGET_XARCH_ } break; @@ -2524,15 +2561,15 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode, return nullptr; } -#ifdef _TARGET_AMD64_ - // Amd64: also indicate that we use floating point registers. +#ifdef _TARGET_XARCH_ + // XArch: also indicate that we use floating point registers. // The need for setting this here is that a method may not have SIMD // type lclvars, but might be exercising SIMD intrinsics on fields of // SIMD type. // // e.g. public Vector<float> ComplexVecFloat::sqabs() { return this.r * this.r + this.i * this.i; } compFloatingPointUsed = true; -#endif +#endif // _TARGET_XARCH_ // At this point, we have a tree that we are going to store into a destination. // TODO-1stClassStructs: This should be a simple store or assignment, and should not require diff --git a/src/jit/simd.h b/src/jit/simd.h index c68899e412..c4a8866b07 100644 --- a/src/jit/simd.h +++ b/src/jit/simd.h @@ -29,13 +29,18 @@ struct SIMDIntrinsicInfo var_types supportedBaseTypes[SIMD_INTRINSIC_MAX_BASETYPE_COUNT]; }; -#ifdef _TARGET_AMD64_ +#ifdef _TARGET_XARCH_ // SSE2 Shuffle control byte to shuffle vector <W, Z, Y, X> // These correspond to shuffle immediate byte in shufps SSE2 instruction. -#define SHUFFLE_XXXX 0x00 -#define SHUFFLE_ZWYX 0xB1 -#define SHUFFLE_WWYY 0xF5 -#define SHUFFLE_ZZXX 0xA0 +#define SHUFFLE_XXXX 0x00 // 00 00 00 00 +#define SHUFFLE_XXWW 0x0F // 00 00 11 11 +#define SHUFFLE_XYZW 0x1B // 00 01 10 11 +#define SHUFFLE_YXYX 0x44 // 01 00 01 00 +#define SHUFFLE_YYZZ 0x5A // 01 01 10 10 +#define SHUFFLE_ZXXY 0x81 // 10 00 00 01 +#define SHUFFLE_ZWXY 0xB1 // 10 11 00 01 +#define SHUFFLE_WWYY 0xF5 // 11 11 01 01 +#define SHUFFLE_ZZXX 0xA0 // 10 10 00 00 #endif #endif // FEATURE_SIMD diff --git a/src/jit/simdcodegenxarch.cpp b/src/jit/simdcodegenxarch.cpp index 702f967aad..ec933fd5d7 100644 --- a/src/jit/simdcodegenxarch.cpp +++ b/src/jit/simdcodegenxarch.cpp @@ -17,7 +17,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator. -#ifdef _TARGET_AMD64_ +#ifdef _TARGET_XARCH_ #include "emit.h" #include "codegen.h" #include "sideeffects.h" @@ -62,7 +62,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type // AVX supports broadcast instructions to populate YMM reg with a single float/double value from memory. // AVX2 supports broadcast instructions to populate YMM reg with a single value from memory or mm reg. // If we decide to use AVX2 only, we can remove this assert. - if ((compiler->opts.eeFlags & CORJIT_FLG_USE_AVX2) == 0) + if (!compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_USE_AVX2)) { assert(baseType == TYP_FLOAT || baseType == TYP_DOUBLE); } @@ -205,12 +205,9 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type { result = INS_pmullw; } - else if (compiler->canUseAVX()) + else if ((baseType == TYP_INT) && (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4)) { - if (baseType == TYP_INT) - { - result = INS_pmulld; - } + result = INS_pmulld; } break; @@ -300,7 +297,8 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type { result = INS_pcmpeqb; } - else if (compiler->canUseAVX() && (baseType == TYP_ULONG || baseType == TYP_LONG)) + else if ((baseType == TYP_ULONG || baseType == TYP_LONG) && + (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4)) { result = INS_pcmpeqq; } @@ -359,7 +357,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type { result = INS_pcmpgtb; } - else if (compiler->canUseAVX() && (baseType == TYP_LONG)) + else if ((baseType == TYP_LONG) && (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4)) { result = INS_pcmpgtq; } @@ -464,7 +462,8 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type // to target mm reg, zeroing out the upper bits if and only if specified. // // Arguments: -// type the type of value to be moved +// targetType the target type +// baseType the base type of value to be moved // targetReg the target reg // srcReg the src reg // moveType action to be performed on target upper bits @@ -475,10 +474,10 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type // Notes: // This is currently only supported for floating point types. // -void CodeGen::genSIMDScalarMove(var_types type, regNumber targetReg, regNumber srcReg, SIMDScalarMoveType moveType) +void CodeGen::genSIMDScalarMove( + var_types targetType, var_types baseType, regNumber targetReg, regNumber srcReg, SIMDScalarMoveType moveType) { - var_types targetType = compiler->getSIMDVectorType(); - assert(varTypeIsFloating(type)); + assert(varTypeIsFloating(baseType)); #ifdef FEATURE_AVX_SUPPORT if (compiler->getSIMDInstructionSet() == InstructionSet_AVX) { @@ -487,17 +486,17 @@ void CodeGen::genSIMDScalarMove(var_types type, regNumber targetReg, regNumber s case SMT_PreserveUpper: if (srcReg != targetReg) { - instruction ins = ins_Store(type); + instruction ins = ins_Store(baseType); if (getEmitter()->IsThreeOperandMoveAVXInstruction(ins)) { // In general, when we use a three-operands move instruction, we want to merge the src with // itself. This is an exception in that we actually want the "merge" behavior, so we must // specify it with all 3 operands. - inst_RV_RV_RV(ins, targetReg, targetReg, srcReg, emitTypeSize(targetType)); + inst_RV_RV_RV(ins, targetReg, targetReg, srcReg, emitTypeSize(baseType)); } else { - inst_RV_RV(ins, targetReg, srcReg, targetType, emitTypeSize(targetType)); + inst_RV_RV(ins, targetReg, srcReg, baseType, emitTypeSize(baseType)); } } break; @@ -516,9 +515,9 @@ void CodeGen::genSIMDScalarMove(var_types type, regNumber targetReg, regNumber s case SMT_ZeroInitUpper_SrcHasUpperZeros: if (srcReg != targetReg) { - instruction ins = ins_Copy(type); + instruction ins = ins_Copy(baseType); assert(!getEmitter()->IsThreeOperandMoveAVXInstruction(ins)); - inst_RV_RV(ins, targetReg, srcReg, targetType, emitTypeSize(targetType)); + inst_RV_RV(ins, targetReg, srcReg, baseType, emitTypeSize(baseType)); } break; @@ -536,7 +535,7 @@ void CodeGen::genSIMDScalarMove(var_types type, regNumber targetReg, regNumber s case SMT_PreserveUpper: if (srcReg != targetReg) { - inst_RV_RV(ins_Store(type), targetReg, srcReg, targetType, emitTypeSize(targetType)); + inst_RV_RV(ins_Store(baseType), targetReg, srcReg, baseType, emitTypeSize(baseType)); } break; @@ -545,22 +544,22 @@ void CodeGen::genSIMDScalarMove(var_types type, regNumber targetReg, regNumber s { // There is no guarantee that upper bits of op1Reg are zero. // We achieve this by using left logical shift 12-bytes and right logical shift 12 bytes. - instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, type); + instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, baseType); getEmitter()->emitIns_R_I(ins, EA_16BYTE, srcReg, 12); - ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, type); + ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, baseType); getEmitter()->emitIns_R_I(ins, EA_16BYTE, srcReg, 12); } else { genSIMDZero(targetType, TYP_FLOAT, targetReg); - inst_RV_RV(ins_Store(type), targetReg, srcReg); + inst_RV_RV(ins_Store(baseType), targetReg, srcReg); } break; case SMT_ZeroInitUpper_SrcHasUpperZeros: if (srcReg != targetReg) { - inst_RV_RV(ins_Copy(type), targetReg, srcReg, targetType, emitTypeSize(targetType)); + inst_RV_RV(ins_Copy(baseType), targetReg, srcReg, baseType, emitTypeSize(baseType)); } break; @@ -676,7 +675,7 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) SIMDScalarMoveType moveType = op1->IsCnsFltOrDbl() || op1->isMemoryOp() ? SMT_ZeroInitUpper_SrcHasUpperZeros : SMT_ZeroInitUpper; - genSIMDScalarMove(TYP_FLOAT, targetReg, op1Reg, moveType); + genSIMDScalarMove(targetType, TYP_FLOAT, targetReg, op1Reg, moveType); if (size == 8) { @@ -786,7 +785,7 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) { getEmitter()->emitIns_R_I(insLeftShift, EA_16BYTE, vectorReg, baseTypeSize); } - genSIMDScalarMove(baseType, vectorReg, operandReg, SMT_PreserveUpper); + genSIMDScalarMove(targetType, baseType, vectorReg, operandReg, SMT_PreserveUpper); offset += baseTypeSize; } @@ -1033,11 +1032,10 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) { - GenTree* op1 = simdNode->gtGetOp1(); - GenTree* op2 = simdNode->gtGetOp2(); - var_types baseType = simdNode->gtSIMDBaseType; - regNumber targetReg = simdNode->gtRegNum; - assert(targetReg != REG_NA); + GenTree* op1 = simdNode->gtGetOp1(); + GenTree* op2 = simdNode->gtGetOp2(); + var_types baseType = simdNode->gtSIMDBaseType; + regNumber targetReg = simdNode->gtRegNum; var_types targetType = simdNode->TypeGet(); InstructionSet iset = compiler->getSIMDInstructionSet(); @@ -1051,8 +1049,16 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) case SIMDIntrinsicEqual: case SIMDIntrinsicGreaterThan: { - // SSE2: vector<(u)long> relation op should be implemented in terms of TYP_INT comparison operations - assert(((iset == InstructionSet_AVX) || (baseType != TYP_LONG)) && (baseType != TYP_ULONG)); + assert(targetReg != REG_NA); + +#ifdef DEBUG + // SSE2: vector<(u)long> relational op should be implemented in terms of + // TYP_INT comparison operations + if (baseType == TYP_LONG || baseType == TYP_ULONG) + { + assert(iset >= InstructionSet_SSE3_4); + } +#endif // Greater-than: Floating point vectors use "<" with swapped operands if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGreaterThan) @@ -1093,6 +1099,8 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) case SIMDIntrinsicLessThan: case SIMDIntrinsicLessThanOrEqual: { + assert(targetReg != REG_NA); + // Int vectors use ">" and ">=" with swapped operands assert(varTypeIsFloating(baseType)); @@ -1115,17 +1123,6 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) case SIMDIntrinsicOpEquality: case SIMDIntrinsicOpInEquality: { - assert(genIsValidIntReg(targetReg)); - - // We need two additional XMM register as scratch - assert(simdNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(simdNode->gtRsvdRegs) == 2); - - regMaskTP tmpRegsMask = simdNode->gtRsvdRegs; - regMaskTP tmpReg1Mask = genFindLowestBit(tmpRegsMask); - tmpRegsMask &= ~tmpReg1Mask; - regNumber tmpReg1 = genRegNumFromMask(tmpReg1Mask); - regNumber tmpReg2 = genRegNumFromMask(tmpRegsMask); var_types simdType = op1->TypeGet(); // TODO-1stClassStructs: Temporary to minimize asmDiffs if (simdType == TYP_DOUBLE) @@ -1140,96 +1137,111 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) simdType = TYP_SIMD16; } - // tmpReg1 = (op1Reg == op2Reg) - // Call this value of tmpReg1 as 'compResult' for further reference below. - regNumber otherReg = op2Reg; - if (tmpReg1 != op2Reg) + // On SSE4/AVX, we can generate optimal code for (in)equality against zero using ptest. + if ((compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4) && op2->IsIntegralConstVector(0)) { - if (tmpReg1 != op1Reg) - { - inst_RV_RV(ins_Copy(simdType), tmpReg1, op1Reg, simdType, emitActualTypeSize(simdType)); - } + assert(op2->isContained()); + inst_RV_RV(INS_ptest, op1->gtRegNum, op1->gtRegNum, simdType, emitActualTypeSize(simdType)); } else { - otherReg = op1Reg; - } + // We need one additional SIMD register to store the result of the SIMD compare. + regNumber tmpReg1 = genRegNumFromMask(simdNode->gtRsvdRegs & RBM_ALLFLOAT); - // For all integer types we can use TYP_INT comparison. - unsigned ival = 0; - instruction ins = - getOpForSIMDIntrinsic(SIMDIntrinsicEqual, varTypeIsFloating(baseType) ? baseType : TYP_INT, &ival); + // tmpReg1 = (op1Reg == op2Reg) + // Call this value of tmpReg1 as 'compResult' for further reference below. + regNumber otherReg = op2Reg; + if (tmpReg1 != op2Reg) + { + if (tmpReg1 != op1Reg) + { + inst_RV_RV(ins_Copy(simdType), tmpReg1, op1Reg, simdType, emitActualTypeSize(simdType)); + } + } + else + { + otherReg = op1Reg; + } - if (varTypeIsFloating(baseType)) - { - getEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(simdType), tmpReg1, otherReg, ival); - } - else - { - inst_RV_RV(ins, tmpReg1, otherReg, simdType, emitActualTypeSize(simdType)); + // For all integer types we can use TYP_INT comparison. + unsigned ival = 0; + instruction ins = + getOpForSIMDIntrinsic(SIMDIntrinsicEqual, varTypeIsFloating(baseType) ? baseType : TYP_INT, &ival); + + if (varTypeIsFloating(baseType)) + { + getEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(simdType), tmpReg1, otherReg, ival); + } + else + { + inst_RV_RV(ins, tmpReg1, otherReg, simdType, emitActualTypeSize(simdType)); + } + + regNumber intReg; + if (targetReg == REG_NA) + { + // If we are not materializing result into a register, + // we would have reserved an int type internal register. + intReg = genRegNumFromMask(simdNode->gtRsvdRegs & RBM_ALLINT); + } + else + { + // We can use targetReg for setting flags. + intReg = targetReg; + + // Must have not reserved any int type internal registers. + assert(genCountBits(simdNode->gtRsvdRegs & RBM_ALLINT) == 0); + } + + inst_RV_RV(INS_pmovmskb, intReg, tmpReg1, simdType, emitActualTypeSize(simdType)); + // There's no pmovmskw/pmovmskd/pmovmskq but they're not needed anyway. Vector compare + // instructions produce "all ones"/"all zeroes" components and pmovmskb extracts a + // subset of each component's ones/zeroes. In the end we need to know if the result is + // "all ones" where the number of ones is given by the vector byte size, not by the + // vector component count. So, for AVX registers we need to compare to 0xFFFFFFFF and + // for SSE registers we need to compare to 0x0000FFFF. + // The SIMD12 case is handled specially, because we can't rely on the upper bytes being + // zero, so we must compare only the lower 3 floats (hence the byte mask of 0xFFF). + // Note that -1 is used instead of 0xFFFFFFFF, on x64 emit doesn't correctly recognize + // that 0xFFFFFFFF can be encoded in a single byte and emits the longer 3DFFFFFFFF + // encoding instead of 83F8FF. + ssize_t mask; + if ((simdNode->gtFlags & GTF_SIMD12_OP) != 0) + { + mask = 0x00000FFF; + getEmitter()->emitIns_R_I(INS_and, EA_4BYTE, intReg, mask); + } + else if (emitActualTypeSize(simdType) == 32) + { + mask = -1; + } + else + { + mask = 0x0000FFFF; + } + getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, intReg, mask); } - // If we have 32 bytes, start by anding the two 16-byte halves to get a 16-byte result. - if (compiler->canUseAVX() && (simdType == TYP_SIMD32)) + if (targetReg != REG_NA) { - // Reduce tmpReg1 from 256-bits to 128-bits bitwise-Anding the lower and uppper 128-bits + // If we need to materialize result into a register, targetReg needs to + // be set to 1 on true and zero on false. + // Equality: + // cmp targetReg, 0xFFFFFFFF or 0xFFFF + // sete targetReg + // movzx targetReg, targetReg // - // Generated code sequence - // - vextractf128 tmpReg2, tmpReg1, 0x01 - // tmpReg2[128..255] <- 0 - // tmpReg2[0..127] <- tmpReg1[128..255] - // - vandps tmpReg1, tempReg2 - // This will zero-out upper portion of tmpReg1 and - // lower portion of tmpReg1 is and of upper and lower 128-bit comparison result. - getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, tmpReg2, tmpReg1, 0x01); - inst_RV_RV(INS_andps, tmpReg1, tmpReg2, simdType, emitActualTypeSize(simdType)); - } - // Next, if we have more than 8 bytes, and the two 8-byte halves to get a 8-byte result. - if (simdType != TYP_SIMD8) - { - // tmpReg2 = Shuffle(tmpReg1, (1,0,3,2)) - // Note: vpshufd is a 128-bit only instruction. Therefore, explicitly pass EA_16BYTE - getEmitter()->emitIns_R_R_I(INS_pshufd, EA_16BYTE, tmpReg2, tmpReg1, 0x4E); - - // tmpReg1 = BitwiseAnd(tmpReg1, tmpReg2) + // InEquality: + // cmp targetReg, 0xFFFFFFFF or 0xFFFF + // setne targetReg + // movzx targetReg, targetReg // - // Note that what we have computed is as follows at this point: - // tmpReg1[0] = compResult[0] & compResult[2] - // tmpReg1[1] = compResult[1] & compResult[3] - inst_RV_RV(INS_andps, tmpReg1, tmpReg2, simdType, emitActualTypeSize(simdType)); + assert(simdNode->TypeGet() == TYP_INT); + inst_RV((simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality) ? INS_sete : INS_setne, targetReg, + TYP_INT, EA_1BYTE); + // Set the higher bytes to 0 + inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE)); } - // At this point, we have either reduced the result to 8 bytes: tmpReg1[0] and tmpReg1[1], - // OR we have a Vector2 (TYP_SIMD8) in tmpReg1, which has only those two fields. - - // tmpReg2 = Shuffle(tmpReg1, (0,0,0,1)) - // tmpReg2[0] = compResult[1] & compResult[3] - getEmitter()->emitIns_R_R_I(INS_pshufd, EA_16BYTE, tmpReg2, tmpReg1, 0x1); - - // tmpReg1 = BitwiseAnd(tmpReg1, tmpReg2) - // That is tmpReg1[0] = compResult[0] & compResult[1] & compResult[2] & compResult[3] - inst_RV_RV(INS_pand, tmpReg1, tmpReg2, simdType, emitActualTypeSize(simdType)); // ??? INS_andps?? - - // targetReg = lower 32-bits of tmpReg1 = compResult[0] & compResult[1] & compResult[2] & compResult[3] - // (Note that for mov_xmm2i, the int register is always in the reg2 position. - inst_RV_RV(INS_mov_xmm2i, tmpReg1, targetReg, TYP_INT); - - // Since we need to compute a bool result, targetReg needs to be set to 1 on true and zero on false. - // Equality: - // cmp targetReg, 0xFFFFFFFF - // sete targetReg - // movzx targetReg, targetReg - // - // InEquality: - // cmp targetReg, 0xFFFFFFFF - // setne targetReg - // movzx targetReg, targetReg - // - getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, targetReg, 0xFFFFFFFF); - inst_RV((simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality) ? INS_sete : INS_setne, targetReg, TYP_INT, - EA_1BYTE); - assert(simdNode->TypeGet() == TYP_INT); - // Set the higher bytes to 0 - inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE)); } break; @@ -1267,45 +1279,68 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) regNumber targetReg = simdNode->gtRegNum; assert(targetReg != REG_NA); - // DotProduct is only supported on floating point types. var_types targetType = simdNode->TypeGet(); assert(targetType == baseType); - assert(varTypeIsFloating(baseType)); genConsumeOperands(simdNode); - regNumber op1Reg = op1->gtRegNum; - regNumber op2Reg = op2->gtRegNum; + regNumber op1Reg = op1->gtRegNum; + regNumber op2Reg = op2->gtRegNum; + regNumber tmpReg1 = REG_NA; + regNumber tmpReg2 = REG_NA; - regNumber tmpReg = REG_NA; - // For SSE, or AVX with 32-byte vectors, we need an additional Xmm register as scratch. - // However, it must be distinct from targetReg, so we request two from the register allocator. - // Note that if this is a TYP_SIMD16 or smaller on AVX, then we don't need a tmpReg. - if ((compiler->getSIMDInstructionSet() == InstructionSet_SSE2) || (simdEvalType == TYP_SIMD32)) + InstructionSet iset = compiler->getSIMDInstructionSet(); + + // Dot product intrinsic is supported only on float/double vectors + // and 32-byte int vectors on AVX. + // + // Float/Double Vectors: + // For SSE, or AVX with 32-byte vectors, we need one additional Xmm register + // different from targetReg as scratch. Note that if this is a TYP_SIMD16 or + // smaller on AVX, then we don't need a tmpReg. + // + // 32-byte integer vector on AVX: we need two additional Xmm registers + // different from targetReg as scratch. + // + // 16-byte integer vector on SSE4: we need one additional Xmm register + // different from targetReg as scratch. + if (varTypeIsFloating(baseType)) { - assert(simdNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(simdNode->gtRsvdRegs) == 2); + if ((compiler->getSIMDInstructionSet() == InstructionSet_SSE2) || (simdEvalType == TYP_SIMD32)) + { + assert(simdNode->gtRsvdRegs != RBM_NONE); + assert(genCountBits(simdNode->gtRsvdRegs) == 1); - regMaskTP tmpRegsMask = simdNode->gtRsvdRegs; - regMaskTP tmpReg1Mask = genFindLowestBit(tmpRegsMask); - tmpRegsMask &= ~tmpReg1Mask; - regNumber tmpReg1 = genRegNumFromMask(tmpReg1Mask); - regNumber tmpReg2 = genRegNumFromMask(tmpRegsMask); + tmpReg1 = genRegNumFromMask(simdNode->gtRsvdRegs); + assert(tmpReg1 != REG_NA); + assert(tmpReg1 != targetReg); + } + else + { + assert(simdNode->gtRsvdRegs == RBM_NONE); + } + } + else + { + assert(baseType == TYP_INT); + assert(iset >= InstructionSet_SSE3_4); - // Choose any register different from targetReg as tmpReg - if (tmpReg1 != targetReg) + if (iset == InstructionSet_SSE3_4) { - tmpReg = tmpReg1; + // Must have reserved 1 scratch register. + assert(genCountBits(simdNode->gtRsvdRegs) == 1); + tmpReg1 = genRegNumFromMask(simdNode->gtRsvdRegs); } else { - assert(targetReg != tmpReg2); - tmpReg = tmpReg2; + // Must have reserved 2 scratch registers. + assert(genCountBits(simdNode->gtRsvdRegs) == 2); + regMaskTP tmpRegMask = genFindLowestBit(simdNode->gtRsvdRegs); + tmpReg1 = genRegNumFromMask(tmpRegMask); + tmpReg2 = genRegNumFromMask(simdNode->gtRsvdRegs & ~tmpRegMask); } - assert(tmpReg != REG_NA); - assert(tmpReg != targetReg); } - if (compiler->getSIMDInstructionSet() == InstructionSet_SSE2) + if (iset == InstructionSet_SSE2) { // We avoid reg move if either op1Reg == targetReg or op2Reg == targetReg if (op1Reg == targetReg) @@ -1323,96 +1358,187 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) } // DotProduct(v1, v2) - // Here v0 = targetReg, v1 = op1Reg, v2 = op2Reg and tmp = tmpReg - if (baseType == TYP_FLOAT) + // Here v0 = targetReg, v1 = op1Reg, v2 = op2Reg and tmp = tmpReg1 + if ((simdNode->gtFlags & GTF_SIMD12_OP) != 0) + { + assert(baseType == TYP_FLOAT); + // v0 = v1 * v2 + // tmp = v0 // v0 = (3, 2, 1, 0) - each element is given by its + // // position + // tmp = shuffle(tmp, tmp, SHUFFLE_ZXXY) // tmp = (2, 0, 0, 1) - don't really care what's in upper + // // bits + // v0 = v0 + tmp // v0 = (3+2, 0+2, 1+0, 0+1) + // tmp = shuffle(tmp, tmp, SHUFFLE_XXWW) // tmp = ( 1, 1, 2, 2) + // v0 = v0 + tmp // v0 = (1+2+3, 0+1+2, 0+1+2, 0+1+2) + // + inst_RV_RV(INS_mulps, targetReg, op2Reg); + inst_RV_RV(INS_movaps, tmpReg1, targetReg); + inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg1, tmpReg1, SHUFFLE_ZXXY); + inst_RV_RV(INS_addps, targetReg, tmpReg1); + inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg1, tmpReg1, SHUFFLE_XXWW); + inst_RV_RV(INS_addps, targetReg, tmpReg1); + } + else if (baseType == TYP_FLOAT) { // v0 = v1 * v2 // tmp = v0 // v0 = (3, 2, 1, 0) - each element is given by its // // position - // tmp = shuffle(tmp, tmp, Shuffle(2,3,0,1)) // tmp = (2, 3, 0, 1) + // tmp = shuffle(tmp, tmp, SHUFFLE_ZWXY) // tmp = (2, 3, 0, 1) // v0 = v0 + tmp // v0 = (3+2, 2+3, 1+0, 0+1) // tmp = v0 - // tmp = shuffle(tmp, tmp, Shuffle(0,1,2,3)) // tmp = (0+1, 1+0, 2+3, 3+2) + // tmp = shuffle(tmp, tmp, SHUFFLE_XYZW) // tmp = (0+1, 1+0, 2+3, 3+2) // v0 = v0 + tmp // v0 = (0+1+2+3, 0+1+2+3, 0+1+2+3, 0+1+2+3) // // Essentially horizontal addtion of all elements. // // We could achieve the same using SSEv3 instruction // // HADDPS. // inst_RV_RV(INS_mulps, targetReg, op2Reg); - inst_RV_RV(INS_movaps, tmpReg, targetReg); - inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg, tmpReg, 0xb1); - inst_RV_RV(INS_addps, targetReg, tmpReg); - inst_RV_RV(INS_movaps, tmpReg, targetReg); - inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg, tmpReg, 0x1b); - inst_RV_RV(INS_addps, targetReg, tmpReg); + inst_RV_RV(INS_movaps, tmpReg1, targetReg); + inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg1, tmpReg1, SHUFFLE_ZWXY); + inst_RV_RV(INS_addps, targetReg, tmpReg1); + inst_RV_RV(INS_movaps, tmpReg1, targetReg); + inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg1, tmpReg1, SHUFFLE_XYZW); + inst_RV_RV(INS_addps, targetReg, tmpReg1); } - else if (baseType == TYP_DOUBLE) + else { + assert(baseType == TYP_DOUBLE); + // v0 = v1 * v2 // tmp = v0 // v0 = (1, 0) - each element is given by its position // tmp = shuffle(tmp, tmp, Shuffle(0,1)) // tmp = (0, 1) // v0 = v0 + tmp // v0 = (1+0, 0+1) inst_RV_RV(INS_mulpd, targetReg, op2Reg); - inst_RV_RV(INS_movaps, tmpReg, targetReg); - inst_RV_RV_IV(INS_shufpd, EA_16BYTE, tmpReg, tmpReg, 0x01); - inst_RV_RV(INS_addpd, targetReg, tmpReg); - } - else - { - unreached(); + inst_RV_RV(INS_movaps, tmpReg1, targetReg); + inst_RV_RV_IV(INS_shufpd, EA_16BYTE, tmpReg1, tmpReg1, 0x01); + inst_RV_RV(INS_addpd, targetReg, tmpReg1); } } else { - // We avoid reg move if either op1Reg == targetReg or op2Reg == targetReg. - // Note that this is a duplicate of the code above for SSE, but in the AVX case we can eventually - // use the 3-op form, so that we can avoid these copies. - // TODO-CQ: Add inst_RV_RV_RV_IV(). - if (op1Reg == targetReg) - { - // Best case - // nothing to do, we have registers in the right place - } - else if (op2Reg == targetReg) + assert(iset >= InstructionSet_SSE3_4); + + if (varTypeIsFloating(baseType)) { - op2Reg = op1Reg; + // We avoid reg move if either op1Reg == targetReg or op2Reg == targetReg. + // Note that this is a duplicate of the code above for SSE, but in the AVX case we can eventually + // use the 3-op form, so that we can avoid these copies. + // TODO-CQ: Add inst_RV_RV_RV_IV(). + if (op1Reg == targetReg) + { + // Best case + // nothing to do, we have registers in the right place + } + else if (op2Reg == targetReg) + { + op2Reg = op1Reg; + } + else + { + inst_RV_RV(ins_Copy(simdType), targetReg, op1Reg, simdEvalType, emitActualTypeSize(simdType)); + } + + emitAttr emitSize = emitActualTypeSize(simdEvalType); + if (baseType == TYP_FLOAT) + { + // dpps computes the dot product of the upper & lower halves of the 32-byte register. + // Notice that if this is a TYP_SIMD16 or smaller on AVX, then we don't need a tmpReg. + unsigned mask = ((simdNode->gtFlags & GTF_SIMD12_OP) != 0) ? 0x71 : 0xf1; + inst_RV_RV_IV(INS_dpps, emitSize, targetReg, op2Reg, mask); + // dpps computes the dot product of the upper & lower halves of the 32-byte register. + // Notice that if this is a TYP_SIMD16 or smaller on AVX, then we don't need a tmpReg. + // If this is TYP_SIMD32, we need to combine the lower & upper results. + if (simdEvalType == TYP_SIMD32) + { + getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, tmpReg1, targetReg, 0x01); + inst_RV_RV(INS_addps, targetReg, tmpReg1, targetType, emitTypeSize(targetType)); + } + } + else if (baseType == TYP_DOUBLE) + { + if (simdEvalType == TYP_SIMD32) + { + // targetReg = targetReg * op2Reg + // targetReg = vhaddpd(targetReg, targetReg) ; horizontal sum of lower & upper halves + // tmpReg = vextractf128(targetReg, 1) ; Moves the upper sum into tempReg + // targetReg = targetReg + tmpReg1 + inst_RV_RV(INS_mulpd, targetReg, op2Reg, simdEvalType, emitActualTypeSize(simdType)); + inst_RV_RV(INS_haddpd, targetReg, targetReg, simdEvalType, emitActualTypeSize(simdType)); + getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, tmpReg1, targetReg, 0x01); + inst_RV_RV(INS_addpd, targetReg, tmpReg1, targetType, emitTypeSize(targetType)); + } + else + { + // On AVX, we have no 16-byte vectors of double. Note that, if we did, we could use + // dppd directly. + assert(iset == InstructionSet_SSE3_4); + inst_RV_RV_IV(INS_dppd, emitSize, targetReg, op2Reg, 0x31); + } + } } else { - inst_RV_RV(ins_Copy(simdType), targetReg, op1Reg, simdEvalType, emitActualTypeSize(simdType)); - } + // Dot product of 32-byte int vector on SSE4/AVX. + assert(baseType == TYP_INT); + assert(simdEvalType == TYP_SIMD16 || simdEvalType == TYP_SIMD32); + +#ifdef DEBUG + // SSE4: We need 1 scratch register. + // AVX2: We need 2 scratch registers. + if (simdEvalType == TYP_SIMD16) + { + assert(tmpReg1 != REG_NA); + } + else + { + assert(tmpReg1 != REG_NA); + assert(tmpReg2 != REG_NA); + } +#endif + + // tmpReg1 = op1 * op2 + if (iset == InstructionSet_AVX) + { + // On AVX take advantage 3 operand form of pmulld + inst_RV_RV_RV(INS_pmulld, tmpReg1, op1Reg, op2Reg, emitTypeSize(simdEvalType)); + } + else + { + inst_RV_RV(ins_Copy(simdEvalType), tmpReg1, op1Reg, simdEvalType); + inst_RV_RV(INS_pmulld, tmpReg1, op2Reg, simdEvalType); + } - emitAttr emitSize = emitActualTypeSize(simdEvalType); - if (baseType == TYP_FLOAT) - { - // dpps computes the dot product of the upper & lower halves of the 32-byte register. - // Notice that if this is a TYP_SIMD16 or smaller on AVX, then we don't need a tmpReg. - inst_RV_RV_IV(INS_dpps, emitSize, targetReg, op2Reg, 0xf1); - // If this is TYP_SIMD32, we need to combine the lower & upper results. if (simdEvalType == TYP_SIMD32) { - getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, tmpReg, targetReg, 0x01); - inst_RV_RV(INS_addps, targetReg, tmpReg, targetType, emitTypeSize(targetType)); + // tmpReg2[127..0] = Upper 128-bits of tmpReg1 + getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, tmpReg2, tmpReg1, 0x01); + + // tmpReg1[127..0] = tmpReg1[127..0] + tmpReg2[127..0] + // This will compute + // tmpReg1[0] = op1[0]*op2[0] + op1[4]*op2[4] + // tmpReg1[1] = op1[1]*op2[1] + op1[5]*op2[5] + // tmpReg1[2] = op1[2]*op2[2] + op1[6]*op2[6] + // tmpReg1[4] = op1[4]*op2[4] + op1[7]*op2[7] + inst_RV_RV(INS_paddd, tmpReg1, tmpReg2, TYP_SIMD16, EA_16BYTE); } - } - else if (baseType == TYP_DOUBLE) - { - // On AVX, we have no 16-byte vectors of double. Note that, if we did, we could use - // dppd directly. - assert(simdType == TYP_SIMD32); - - // targetReg = targetReg * op2Reg - // targetReg = vhaddpd(targetReg, targetReg) ; horizontal sum of lower & upper halves - // tmpReg = vextractf128(targetReg, 1) ; Moves the upper sum into tempReg - // targetReg = targetReg + tmpReg - inst_RV_RV(INS_mulpd, targetReg, op2Reg, simdEvalType, emitActualTypeSize(simdType)); - inst_RV_RV(INS_haddpd, targetReg, targetReg, simdEvalType, emitActualTypeSize(simdType)); - getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, tmpReg, targetReg, 0x01); - inst_RV_RV(INS_addpd, targetReg, tmpReg, targetType, emitTypeSize(targetType)); - } - else - { - unreached(); + + // This horizontal add will compute + // + // TYP_SIMD16: + // tmpReg1[0] = tmpReg1[2] = op1[0]*op2[0] + op1[1]*op2[1] + // tmpReg1[1] = tmpReg1[3] = op1[2]*op2[2] + op1[4]*op2[4] + // + // TYP_SIMD32: + // tmpReg1[0] = tmpReg1[2] = op1[0]*op2[0] + op1[4]*op2[4] + op1[1]*op2[1] + op1[5]*op2[5] + // tmpReg1[1] = tmpReg1[3] = op1[2]*op2[2] + op1[6]*op2[6] + op1[4]*op2[4] + op1[7]*op2[7] + inst_RV_RV(INS_phaddd, tmpReg1, tmpReg1, TYP_SIMD16, EA_16BYTE); + + // DotProduct(op1, op2) = tmpReg1[0] = tmpReg1[0] + tmpReg1[1] + inst_RV_RV(INS_phaddd, tmpReg1, tmpReg1, TYP_SIMD16, EA_16BYTE); + + // TargetReg = integer result from tmpReg1 + // (Note that for mov_xmm2i, the int register is always in the reg2 position) + inst_RV_RV(INS_mov_xmm2i, tmpReg1, targetReg, TYP_INT); } } @@ -1456,6 +1582,59 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) genConsumeOperands(simdNode); regNumber srcReg = op1->gtRegNum; + // Optimize the case of op1 is in memory and trying to access ith element. + if (op1->isMemoryOp()) + { + assert(op1->isContained()); + + regNumber baseReg; + regNumber indexReg; + int offset = 0; + + if (op1->OperGet() == GT_LCL_FLD) + { + // There are three parts to the total offset here: + // {offset of local} + {offset of SIMD Vector field} + {offset of element within SIMD vector}. + bool isEBPbased; + unsigned varNum = op1->gtLclVarCommon.gtLclNum; + offset += compiler->lvaFrameAddress(varNum, &isEBPbased); + offset += op1->gtLclFld.gtLclOffs; + + baseReg = (isEBPbased) ? REG_EBP : REG_ESP; + } + else + { + // Require GT_IND addr to be not contained. + assert(op1->OperGet() == GT_IND); + + GenTree* addr = op1->AsIndir()->Addr(); + assert(!addr->isContained()); + baseReg = addr->gtRegNum; + } + + if (op2->isContainedIntOrIImmed()) + { + indexReg = REG_NA; + offset += (int)op2->AsIntConCommon()->IconValue() * genTypeSize(baseType); + } + else + { + indexReg = op2->gtRegNum; + assert(genIsValidIntReg(indexReg)); + } + + // Now, load the desired element. + getEmitter()->emitIns_R_ARX(ins_Move_Extend(baseType, false), // Load + emitTypeSize(baseType), // Of the vector baseType + targetReg, // To targetReg + baseReg, // Base Reg + indexReg, // Indexed + genTypeSize(baseType), // by the size of the baseType + offset); + genProduceReg(simdNode); + return; + } + // SSE2 doesn't have an instruction to implement this intrinsic if the index is not a constant. // For the non-constant case, we will use the SIMD temp location to store the vector, and // the load the desired element. @@ -1839,26 +2018,9 @@ void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode) // Need an addtional Xmm register to read upper 4 bytes, which is different from targetReg assert(treeNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(treeNode->gtRsvdRegs) == 2); - - regNumber tmpReg = REG_NA; - regMaskTP tmpRegsMask = treeNode->gtRsvdRegs; - regMaskTP tmpReg1Mask = genFindLowestBit(tmpRegsMask); - tmpRegsMask &= ~tmpReg1Mask; - regNumber tmpReg1 = genRegNumFromMask(tmpReg1Mask); - regNumber tmpReg2 = genRegNumFromMask(tmpRegsMask); + assert(genCountBits(treeNode->gtRsvdRegs) == 1); - // Choose any register different from targetReg as tmpReg - if (tmpReg1 != targetReg) - { - tmpReg = tmpReg1; - } - else - { - assert(targetReg != tmpReg2); - tmpReg = tmpReg2; - } - assert(tmpReg != REG_NA); + regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); assert(tmpReg != targetReg); // Load upper 4 bytes in tmpReg @@ -1868,7 +2030,7 @@ void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode) getEmitter()->emitIns_R_AR(ins_Load(TYP_DOUBLE), EA_8BYTE, targetReg, operandReg, 0); // combine upper 4 bytes and lower 8 bytes in targetReg - getEmitter()->emitIns_R_R_I(INS_shufps, emitActualTypeSize(TYP_SIMD16), targetReg, tmpReg, 0x44); + getEmitter()->emitIns_R_R_I(INS_shufps, emitActualTypeSize(TYP_SIMD16), targetReg, tmpReg, SHUFFLE_YXYX); genProduceReg(treeNode); } @@ -1912,9 +2074,9 @@ void CodeGen::genStoreLclFldTypeSIMD12(GenTree* treeNode) } //----------------------------------------------------------------------------- -// genLoadLclFldTypeSIMD12: load a TYP_SIMD12 (i.e. Vector3) type field. -// Since Vector3 is not a hardware supported write size, it is performed -// as two reads: 8 byte followed by 4-byte. +// genLoadLclTypeSIMD12: load a TYP_SIMD12 (i.e. Vector3) type field. +// Since Vector3 is not a hardware supported read size, it is performed +// as two reads: 4 byte followed by 8 byte. // // Arguments: // treeNode - tree node that is attempting to load TYP_SIMD12 field @@ -1922,37 +2084,26 @@ void CodeGen::genStoreLclFldTypeSIMD12(GenTree* treeNode) // Return Value: // None. // -void CodeGen::genLoadLclFldTypeSIMD12(GenTree* treeNode) +void CodeGen::genLoadLclTypeSIMD12(GenTree* treeNode) { - assert(treeNode->OperGet() == GT_LCL_FLD); + assert((treeNode->OperGet() == GT_LCL_FLD) || (treeNode->OperGet() == GT_LCL_VAR)); regNumber targetReg = treeNode->gtRegNum; - unsigned offs = treeNode->gtLclFld.gtLclOffs; + unsigned offs = 0; unsigned varNum = treeNode->gtLclVarCommon.gtLclNum; assert(varNum < compiler->lvaCount); - // Need an addtional Xmm register to read upper 4 bytes - assert(treeNode->gtRsvdRegs != RBM_NONE); - assert(genCountBits(treeNode->gtRsvdRegs) == 2); - - regNumber tmpReg = REG_NA; - regMaskTP tmpRegsMask = treeNode->gtRsvdRegs; - regMaskTP tmpReg1Mask = genFindLowestBit(tmpRegsMask); - tmpRegsMask &= ~tmpReg1Mask; - regNumber tmpReg1 = genRegNumFromMask(tmpReg1Mask); - regNumber tmpReg2 = genRegNumFromMask(tmpRegsMask); - - // Choose any register different from targetReg as tmpReg - if (tmpReg1 != targetReg) + if (treeNode->OperGet() == GT_LCL_FLD) { - tmpReg = tmpReg1; + offs = treeNode->gtLclFld.gtLclOffs; } - else - { - assert(targetReg != tmpReg2); - tmpReg = tmpReg2; - } - assert(tmpReg != REG_NA); + + // Need an additional Xmm register that is different from + // targetReg to read upper 4 bytes. + assert(treeNode->gtRsvdRegs != RBM_NONE); + assert(genCountBits(treeNode->gtRsvdRegs) == 1); + + regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); assert(tmpReg != targetReg); // Read upper 4 bytes to tmpReg @@ -1962,11 +2113,54 @@ void CodeGen::genLoadLclFldTypeSIMD12(GenTree* treeNode) getEmitter()->emitIns_R_S(ins_Move_Extend(TYP_DOUBLE, false), EA_8BYTE, targetReg, varNum, offs); // combine upper 4 bytes and lower 8 bytes in targetReg - getEmitter()->emitIns_R_R_I(INS_shufps, emitActualTypeSize(TYP_SIMD16), targetReg, tmpReg, 0x44); + getEmitter()->emitIns_R_R_I(INS_shufps, emitActualTypeSize(TYP_SIMD16), targetReg, tmpReg, SHUFFLE_YXYX); genProduceReg(treeNode); } +#ifdef _TARGET_X86_ + +//----------------------------------------------------------------------------- +// genPutArgStkSIMD12: store a TYP_SIMD12 (i.e. Vector3) type field. +// Since Vector3 is not a hardware supported write size, it is performed +// as two stores: 8 byte followed by 4-byte. +// +// Arguments: +// treeNode - tree node that is attempting to store TYP_SIMD12 field +// +// Return Value: +// None. +// +void CodeGen::genPutArgStkSIMD12(GenTree* treeNode) +{ + assert(treeNode->OperGet() == GT_PUTARG_STK); + + GenTreePtr op1 = treeNode->gtOp.gtOp1; + assert(!op1->isContained()); + regNumber operandReg = genConsumeReg(op1); + + // Need an addtional Xmm register to extract upper 4 bytes from data. + assert(treeNode->gtRsvdRegs != RBM_NONE); + assert(genCountBits(treeNode->gtRsvdRegs) == 1); + regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); + + // Subtract from ESP; create space for argument. + // TODO-CQ: use 'push' instead? + inst_RV_IV(INS_sub, REG_SPBASE, 12, EA_PTRSIZE); + genStackLevel += 12; + + // 8-byte write + getEmitter()->emitIns_AR_R(ins_Store(TYP_DOUBLE), EA_8BYTE, operandReg, REG_SPBASE, 0); + + // Extract upper 4-bytes from data + getEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(TYP_SIMD16), tmpReg, operandReg, 0x02); + + // 4-byte write + getEmitter()->emitIns_AR_R(ins_Store(TYP_FLOAT), EA_4BYTE, tmpReg, REG_SPBASE, 8); +} + +#endif // _TARGET_X86_ + //----------------------------------------------------------------------------- // genSIMDIntrinsicUpperSave: save the upper half of a TYP_SIMD32 vector to // the given register, if any, or to memory. @@ -2139,5 +2333,5 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) } #endif // FEATURE_SIMD -#endif //_TARGET_AMD64_ +#endif //_TARGET_XARCH_ #endif // !LEGACY_BACKEND diff --git a/src/jit/simdintrinsiclist.h b/src/jit/simdintrinsiclist.h index a44fb9d0a1..c81f7b4bf0 100644 --- a/src/jit/simdintrinsiclist.h +++ b/src/jit/simdintrinsiclist.h @@ -20,7 +20,7 @@ e) TODO-Cleanup: when we plumb TYP_SIMD through front-end, replace TYP_STRUCT with TYP_SIMD. */ -#ifdef _TARGET_AMD64_ +#ifdef _TARGET_XARCH_ // Max number of parameters that we model in the table for SIMD intrinsic methods. #define SIMD_INTRINSIC_MAX_MODELED_PARAM_COUNT 3 @@ -111,7 +111,8 @@ SIMD_INTRINSIC("op_BitwiseOr", false, BitwiseOr, SIMD_INTRINSIC("op_ExclusiveOr", false, BitwiseXor, "^", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) // Dot Product -SIMD_INTRINSIC("Dot", false, DotProduct, "Dot", TYP_UNKNOWN, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) +// Is supported only on Vector<int> on AVX. +SIMD_INTRINSIC("Dot", false, DotProduct, "Dot", TYP_UNKNOWN, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) // Select SIMD_INTRINSIC("ConditionalSelect", false, Select, "Select", TYP_STRUCT, 3, {TYP_STRUCT, TYP_STRUCT, TYP_STRUCT}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) @@ -137,9 +138,9 @@ SIMD_INTRINSIC("UpperRestore", false, UpperRestore, SIMD_INTRINSIC(nullptr, false, Invalid, "Invalid", TYP_UNDEF, 0, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}, {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) #undef SIMD_INTRINSIC -#else //_TARGET_AMD64_ +#else //_TARGET_XARCH_ #error SIMD intrinsics not defined for target arch -#endif //!_TARGET_AMD64_ +#endif //!_TARGET_XARCH_ #endif //FEATURE_SIMD // clang-format on diff --git a/src/jit/ssabuilder.cpp b/src/jit/ssabuilder.cpp index 2da6902464..f0ee461c45 100644 --- a/src/jit/ssabuilder.cpp +++ b/src/jit/ssabuilder.cpp @@ -27,87 +27,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX namespace { /** - * Visits basic blocks in the depth first order and arranges them in the order of - * their DFS finish time. - * - * @param block The fgFirstBB or entry block. - * @param comp A pointer to compiler. - * @param visited In pointer initialized to false and of size at least fgMaxBBNum. - * @param count Out pointer for count of all nodes reachable by DFS. - * @param postOrder Out poitner to arrange the blocks and of size at least fgMaxBBNum. - */ -static void TopologicalSortHelper(BasicBlock* block, Compiler* comp, bool* visited, int* count, BasicBlock** postOrder) -{ - visited[block->bbNum] = true; - - ArrayStack<BasicBlock*> blocks(comp); - ArrayStack<AllSuccessorIter> iterators(comp); - ArrayStack<AllSuccessorIter> ends(comp); - - // there are three stacks used here and all should be same height - // the first is for blocks - // the second is the iterator to keep track of what succ of the block we are looking at - // and the third is the end marker iterator - blocks.Push(block); - iterators.Push(block->GetAllSuccs(comp).begin()); - ends.Push(block->GetAllSuccs(comp).end()); - - while (blocks.Height() > 0) - { - block = blocks.Top(); - -#ifdef DEBUG - if (comp->verboseSsa) - { - printf("[SsaBuilder::TopologicalSortHelper] Visiting BB%02u: ", block->bbNum); - printf("["); - unsigned numSucc = block->NumSucc(comp); - for (unsigned i = 0; i < numSucc; ++i) - { - printf("BB%02u, ", block->GetSucc(i, comp)->bbNum); - } - EHSuccessorIter end = block->GetEHSuccs(comp).end(); - for (EHSuccessorIter ehsi = block->GetEHSuccs(comp).begin(); ehsi != end; ++ehsi) - { - printf("[EH]BB%02u, ", (*ehsi)->bbNum); - } - printf("]\n"); - } -#endif - - if (iterators.TopRef() != ends.TopRef()) - { - // if the block on TOS still has unreached successors, visit them - AllSuccessorIter& iter = iterators.TopRef(); - BasicBlock* succ = *iter; - ++iter; - // push the child - - if (!visited[succ->bbNum]) - { - blocks.Push(succ); - iterators.Push(succ->GetAllSuccs(comp).begin()); - ends.Push(succ->GetAllSuccs(comp).end()); - visited[succ->bbNum] = true; - } - } - else - { - // all successors have been visited - blocks.Pop(); - iterators.Pop(); - ends.Pop(); - - postOrder[*count] = block; - block->bbPostOrderNum = *count; - *count += 1; - - DBG_SSA_JITDUMP("postOrder[%d] = [%p] and BB%02u\n", *count, dspPtr(block), block->bbNum); - } - } -} - -/** * Method that finds a common IDom parent, much like least common ancestor. * * @param finger1 A basic block that might share IDom ancestor with finger2. @@ -184,6 +103,8 @@ void Compiler::fgResetForSsa() { lvaTable[i].lvPerSsaData.Reset(); } + lvHeapPerSsaData.Reset(); + m_heapSsaMap = nullptr; for (BasicBlock* blk = fgFirstBB; blk != nullptr; blk = blk->bbNext) { // Eliminate phis. @@ -197,6 +118,32 @@ void Compiler::fgResetForSsa() blk->bbTreeList->gtPrev = last; } } + + // Clear post-order numbers and SSA numbers; SSA construction will overwrite these, + // but only for reachable code, so clear them to avoid analysis getting confused + // by stale annotations in unreachable code. + blk->bbPostOrderNum = 0; + for (GenTreeStmt* stmt = blk->firstStmt(); stmt != nullptr; stmt = stmt->getNextStmt()) + { + for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree != nullptr; tree = tree->gtNext) + { + if (tree->IsLocal()) + { + tree->gtLclVarCommon.SetSsaNum(SsaConfig::RESERVED_SSA_NUM); + continue; + } + + Compiler::IndirectAssignmentAnnotation* pIndirAssign = nullptr; + if ((tree->OperGet() != GT_ASG) || !GetIndirAssignMap()->Lookup(tree, &pIndirAssign) || + (pIndirAssign == nullptr)) + { + continue; + } + + pIndirAssign->m_defSsaNum = SsaConfig::RESERVED_SSA_NUM; + pIndirAssign->m_useSsaNum = SsaConfig::RESERVED_SSA_NUM; + } + } } } @@ -222,27 +169,97 @@ SsaBuilder::SsaBuilder(Compiler* pCompiler, IAllocator* pIAllocator) { } -/** - * Topologically sort the graph and return the number of nodes visited. - * - * @param postOrder The array in which the arranged basic blocks have to be returned. - * @param count The size of the postOrder array. - * - * @return The number of nodes visited while performing DFS on the graph. - */ +//------------------------------------------------------------------------ +// TopologicalSort: Topologically sort the graph and return the number of nodes visited. +// +// Arguments: +// postOrder - The array in which the arranged basic blocks have to be returned. +// count - The size of the postOrder array. +// +// Return Value: +// The number of nodes visited while performing DFS on the graph. + int SsaBuilder::TopologicalSort(BasicBlock** postOrder, int count) { - // Allocate and initialize visited flags. - bool* visited = (bool*)alloca(count * sizeof(bool)); - memset(visited, 0, count * sizeof(bool)); + Compiler* comp = m_pCompiler; + + BitVecTraits traits(comp->fgBBNumMax + 1, comp); + BitVec BITVEC_INIT_NOCOPY(visited, BitVecOps::MakeEmpty(&traits)); // Display basic blocks. - DBEXEC(VERBOSE, m_pCompiler->fgDispBasicBlocks()); - DBEXEC(VERBOSE, m_pCompiler->fgDispHandlerTab()); + DBEXEC(VERBOSE, comp->fgDispBasicBlocks()); + DBEXEC(VERBOSE, comp->fgDispHandlerTab()); - // Call the recursive helper. - int postIndex = 0; - TopologicalSortHelper(m_pCompiler->fgFirstBB, m_pCompiler, visited, &postIndex, postOrder); + // Compute order. + int postIndex = 0; + BasicBlock* block = comp->fgFirstBB; + BitVecOps::AddElemD(&traits, visited, block->bbNum); + + ArrayStack<BasicBlock*> blocks(comp); + ArrayStack<AllSuccessorIter> iterators(comp); + ArrayStack<AllSuccessorIter> ends(comp); + + // there are three stacks used here and all should be same height + // the first is for blocks + // the second is the iterator to keep track of what succ of the block we are looking at + // and the third is the end marker iterator + blocks.Push(block); + iterators.Push(block->GetAllSuccs(comp).begin()); + ends.Push(block->GetAllSuccs(comp).end()); + + while (blocks.Height() > 0) + { + block = blocks.Top(); + +#ifdef DEBUG + if (comp->verboseSsa) + { + printf("[SsaBuilder::TopologicalSort] Visiting BB%02u: ", block->bbNum); + printf("["); + unsigned numSucc = block->NumSucc(comp); + for (unsigned i = 0; i < numSucc; ++i) + { + printf("BB%02u, ", block->GetSucc(i, comp)->bbNum); + } + EHSuccessorIter end = block->GetEHSuccs(comp).end(); + for (EHSuccessorIter ehsi = block->GetEHSuccs(comp).begin(); ehsi != end; ++ehsi) + { + printf("[EH]BB%02u, ", (*ehsi)->bbNum); + } + printf("]\n"); + } +#endif + + if (iterators.TopRef() != ends.TopRef()) + { + // if the block on TOS still has unreached successors, visit them + AllSuccessorIter& iter = iterators.TopRef(); + BasicBlock* succ = *iter; + ++iter; + + // push the children + if (!BitVecOps::IsMember(&traits, visited, succ->bbNum)) + { + blocks.Push(succ); + iterators.Push(succ->GetAllSuccs(comp).begin()); + ends.Push(succ->GetAllSuccs(comp).end()); + BitVecOps::AddElemD(&traits, visited, succ->bbNum); + } + } + else + { + // all successors have been visited + blocks.Pop(); + iterators.Pop(); + ends.Pop(); + + postOrder[postIndex] = block; + block->bbPostOrderNum = postIndex; + postIndex += 1; + + DBG_SSA_JITDUMP("postOrder[%d] = [%p] and BB%02u\n", postIndex, dspPtr(block), block->bbNum); + } + } // In the absence of EH (because catch/finally have no preds), this should be valid. // assert(postIndex == (count - 1)); @@ -1686,7 +1703,17 @@ void SsaBuilder::Build() JITDUMP("[SsaBuilder] Max block count is %d.\n", blockCount); // Allocate the postOrder array for the graph. - BasicBlock** postOrder = (BasicBlock**)alloca(blockCount * sizeof(BasicBlock*)); + + BasicBlock** postOrder; + + if (blockCount > DEFAULT_MIN_OPTS_BB_COUNT) + { + postOrder = new (m_pCompiler->getAllocator()) BasicBlock*[blockCount]; + } + else + { + postOrder = (BasicBlock**)alloca(blockCount * sizeof(BasicBlock*)); + } // Topologically sort the graph. int count = TopologicalSort(postOrder, blockCount); diff --git a/src/jit/stackfp.cpp b/src/jit/stackfp.cpp index f975822740..43c463039e 100644 --- a/src/jit/stackfp.cpp +++ b/src/jit/stackfp.cpp @@ -1406,8 +1406,6 @@ void CodeGen::genCodeForTreeStackFP_Asg(GenTreePtr tree) assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1NonCom->gtFlags & GTF_VAR_DEATH)); #endif -#ifdef DEBUGGING_SUPPORT - /* For non-debuggable code, every definition of a lcl-var has * to be checked to see if we need to open a new scope for it. */ @@ -1416,7 +1414,6 @@ void CodeGen::genCodeForTreeStackFP_Asg(GenTreePtr tree) { siCheckVarScope(op1NonCom->gtLclVarCommon.gtLclNum, op1NonCom->gtLclVar.gtLclILoffs); } -#endif } assert(op2); @@ -2827,7 +2824,7 @@ void CodeGen::genCondJumpFltStackFP(GenTreePtr cond, BasicBlock* jumpTrue, Basic BasicBlock* CodeGen::genTransitionBlockStackFP(FlatFPStateX87* pState, BasicBlock* pFrom, BasicBlock* pTarget) { // Fast paths where a transition block is not necessary - if (pTarget->bbFPStateX87 && FlatFPStateX87::AreEqual(pState, pTarget->bbFPStateX87) || pState->IsEmpty()) + if ((pTarget->bbFPStateX87 && FlatFPStateX87::AreEqual(pState, pTarget->bbFPStateX87)) || pState->IsEmpty()) { return pTarget; } @@ -4143,8 +4140,26 @@ void Compiler::raEnregisterVarsPostPassStackFP() { raSetRegLclBirthDeath(tree, lastlife, false); } + + // Model implicit use (& hence last use) of frame list root at pinvokes. + if (tree->gtOper == GT_CALL) + { + GenTreeCall* call = tree->AsCall(); + if (call->IsUnmanaged() && !opts.ShouldUsePInvokeHelpers()) + { + LclVarDsc* frameVarDsc = &lvaTable[info.compLvFrameListRoot]; + + if (frameVarDsc->lvTracked && ((call->gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH) != 0)) + { + // Frame var dies here + unsigned varIndex = frameVarDsc->lvVarIndex; + VarSetOps::RemoveElemD(this, lastlife, varIndex); + } + } + } } } + assert(VarSetOps::Equal(this, lastlife, block->bbLiveOut)); } compCurBB = NULL; diff --git a/src/jit/standalone/CMakeLists.txt b/src/jit/standalone/CMakeLists.txt index 2e6317098e..f20d3790c7 100644 --- a/src/jit/standalone/CMakeLists.txt +++ b/src/jit/standalone/CMakeLists.txt @@ -1,22 +1,27 @@ project(ryujit) + add_definitions(-DFEATURE_NO_HOST) add_definitions(-DSELF_NO_HOST) add_definitions(-DFEATURE_READYTORUN_COMPILER) remove_definitions(-DFEATURE_MERGE_JIT_AND_ENGINE) -if(CLR_CMAKE_TARGET_ARCH_I386 OR CLR_CMAKE_TARGET_ARCH_ARM) +if(CLR_CMAKE_TARGET_ARCH_ARM) add_definitions(-DLEGACY_BACKEND) endif() -add_library_clr(${JIT_BASE_NAME} +if(WIN32) + add_definitions(-DFX_VER_INTERNALNAME_STR=clrjit.dll) +endif(WIN32) + +add_library_clr(clrjit SHARED ${SHARED_LIB_SOURCES} ) -add_dependencies(${JIT_BASE_NAME} jit_exports) +add_dependencies(clrjit jit_exports) -set_property(TARGET ${JIT_BASE_NAME} APPEND_STRING PROPERTY LINK_FLAGS ${JIT_EXPORTS_LINKER_OPTION}) -set_property(TARGET ${JIT_BASE_NAME} APPEND_STRING PROPERTY LINK_DEPENDS ${JIT_EXPORTS_FILE}) +set_property(TARGET clrjit APPEND_STRING PROPERTY LINK_FLAGS ${JIT_EXPORTS_LINKER_OPTION}) +set_property(TARGET clrjit APPEND_STRING PROPERTY LINK_DEPENDS ${JIT_EXPORTS_FILE}) set(RYUJIT_LINK_LIBRARIES utilcodestaticnohost @@ -47,12 +52,12 @@ else() ) endif(CLR_CMAKE_PLATFORM_UNIX) -target_link_libraries(${JIT_BASE_NAME} +target_link_libraries(clrjit ${RYUJIT_LINK_LIBRARIES} ) # add the install targets -install_clr(${JIT_BASE_NAME}) +install_clr(clrjit) # Enable profile guided optimization -add_pgo(${JIT_BASE_NAME}) +add_pgo(clrjit) diff --git a/src/jit/target.h b/src/jit/target.h index fa0b18af3e..a726525488 100644 --- a/src/jit/target.h +++ b/src/jit/target.h @@ -6,11 +6,6 @@ #ifndef _TARGET_H_ #define _TARGET_H_ -// Inform includers that we're in a context in which a target has been set. -#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) || defined(_TARGET_ARM_) -#define _TARGET_SET_ -#endif - // If the UNIX_AMD64_ABI is defined make sure that _TARGET_AMD64_ is also defined. #if defined(UNIX_AMD64_ABI) #if !defined(_TARGET_AMD64_) @@ -365,6 +360,9 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #endif // !LEGACY_BACKEND +#ifdef FEATURE_SIMD + #define ALIGN_SIMD_TYPES 1 // whether SIMD type locals are to be aligned +#endif // FEATURE_SIMD #define FEATURE_WRITE_BARRIER 1 // Generate the proper WriteBarrier calls for GC #define FEATURE_FIXED_OUT_ARGS 0 // X86 uses push instructions to pass args @@ -585,7 +583,14 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define RBM_CALLEE_TRASH_NOGC RBM_EDX #endif // NOGC_WRITE_BARRIERS - // IL stub's secret parameter (CORJIT_FLG_PUBLISH_SECRET_PARAM) + // GenericPInvokeCalliHelper unmanaged target parameter + #define REG_PINVOKE_TARGET_PARAM REG_EAX + #define RBM_PINVOKE_TARGET_PARAM RBM_EAX + + // GenericPInvokeCalliHelper cookie parameter + #define REG_PINVOKE_COOKIE_PARAM REG_STK + + // IL stub's secret parameter (JitFlags::JIT_FLAG_PUBLISH_SECRET_PARAM) #define REG_SECRET_STUB_PARAM REG_EAX #define RBM_SECRET_STUB_PARAM RBM_EAX @@ -594,6 +599,10 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define RBM_VIRTUAL_STUB_PARAM RBM_EAX #define PREDICT_REG_VIRTUAL_STUB_PARAM PREDICT_REG_EAX + // VSD target address register + #define REG_VIRTUAL_STUB_TARGET REG_EAX + #define RBM_VIRTUAL_STUB_TARGET RBM_EAX + // Registers used by PInvoke frame setup #define REG_PINVOKE_FRAME REG_EDI // EDI is p/invoke "Frame" pointer argument to CORINFO_HELP_INIT_PINVOKE_FRAME helper #define RBM_PINVOKE_FRAME RBM_EDI @@ -670,6 +679,12 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define RBM_ARG_REGS (RBM_ARG_0|RBM_ARG_1) + // The registers trashed by profiler enter/leave/tailcall hook + // See vm\i386\asmhelpers.asm for more details. + #define RBM_PROFILER_ENTER_TRASH RBM_NONE + #define RBM_PROFILER_LEAVE_TRASH RBM_NONE + #define RBM_PROFILER_TAILCALL_TRASH (RBM_ALLINT & ~RBM_ARG_REGS) + // What sort of reloc do we use for [disp32] address mode #define IMAGE_REL_BASED_DISP32 IMAGE_REL_BASED_HIGHLOW @@ -968,7 +983,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define RBM_PINVOKE_TARGET_PARAM RBM_R10 #define PREDICT_REG_PINVOKE_TARGET_PARAM PREDICT_REG_R10 - // IL stub's secret MethodDesc parameter (CORJIT_FLG_PUBLISH_SECRET_PARAM) + // IL stub's secret MethodDesc parameter (JitFlags::JIT_FLAG_PUBLISH_SECRET_PARAM) #define REG_SECRET_STUB_PARAM REG_R10 #define RBM_SECRET_STUB_PARAM RBM_R10 @@ -1111,9 +1126,10 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #endif // !UNIX_AMD64_ABI // The registers trashed by profiler enter/leave/tailcall hook - // See vm\amd64\amshelpers.asm for more details. - #define RBM_PROFILER_ENTER_TRASH RBM_CALLEE_TRASH - #define RBM_PROFILER_LEAVE_TRASH (RBM_CALLEE_TRASH & ~(RBM_FLOATRET | RBM_INTRET)) + // See vm\amd64\asmhelpers.asm for more details. + #define RBM_PROFILER_ENTER_TRASH RBM_CALLEE_TRASH + #define RBM_PROFILER_LEAVE_TRASH (RBM_CALLEE_TRASH & ~(RBM_FLOATRET | RBM_INTRET)) + #define RBM_PROFILER_TAILCALL_TRASH RBM_PROFILER_LEAVE_TRASH // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper. #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING @@ -1339,7 +1355,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define RBM_PINVOKE_TARGET_PARAM RBM_R12 #define PREDICT_REG_PINVOKE_TARGET_PARAM PREDICT_REG_R12 - // IL stub's secret MethodDesc parameter (CORJIT_FLG_PUBLISH_SECRET_PARAM) + // IL stub's secret MethodDesc parameter (JitFlags::JIT_FLAG_PUBLISH_SECRET_PARAM) #define REG_SECRET_STUB_PARAM REG_R12 #define RBM_SECRET_STUB_PARAM RBM_R12 @@ -1447,6 +1463,9 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define JMP_DIST_SMALL_MAX_NEG (-2048) #define JMP_DIST_SMALL_MAX_POS (+2046) + #define CALL_DIST_MAX_NEG (-16777216) + #define CALL_DIST_MAX_POS (+16777214) + #define JCC_DIST_SMALL_MAX_NEG (-256) #define JCC_DIST_SMALL_MAX_POS (+254) @@ -1617,7 +1636,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define RBM_PINVOKE_TARGET_PARAM RBM_R14 #define PREDICT_REG_PINVOKE_TARGET_PARAM PREDICT_REG_R14 - // IL stub's secret MethodDesc parameter (CORJIT_FLG_PUBLISH_SECRET_PARAM) + // IL stub's secret MethodDesc parameter (JitFlags::JIT_FLAG_PUBLISH_SECRET_PARAM) #define REG_SECRET_STUB_PARAM REG_R12 #define RBM_SECRET_STUB_PARAM RBM_R12 @@ -2277,6 +2296,9 @@ inline regNumber regNextOfType(regNumber reg, var_types type) inline bool isRegPairType(int /* s/b "var_types" */ type) { +#if !CPU_LONG_USES_REGPAIR + return false; +#else #ifdef _TARGET_64BIT_ return false; #elif CPU_HAS_FP_SUPPORT @@ -2284,6 +2306,7 @@ inline bool isRegPairType(int /* s/b "var_types" */ type) #else return type == TYP_LONG || type == TYP_DOUBLE; #endif +#endif // CPU_LONG_USES_REGPAIR } inline bool isFloatRegType(int /* s/b "var_types" */ type) diff --git a/src/jit/tinyarray.h b/src/jit/tinyarray.h index 17d7e044b2..bee59bdb59 100644 --- a/src/jit/tinyarray.h +++ b/src/jit/tinyarray.h @@ -71,7 +71,7 @@ public: // only use this for clearing it void operator=(void* rhs) { - assert(rhs == NULL); + assert(rhs == nullptr); data = 0; } }; diff --git a/src/jit/unwindamd64.cpp b/src/jit/unwindamd64.cpp index 89abdff2b3..14eba8cb50 100644 --- a/src/jit/unwindamd64.cpp +++ b/src/jit/unwindamd64.cpp @@ -481,6 +481,13 @@ void Compiler::unwindSetFrameRegWindows(regNumber reg, unsigned offset) } #ifdef UNIX_AMD64_ABI +//------------------------------------------------------------------------ +// Compiler::unwindSetFrameRegCFI: Record a cfi info for a frame register set. +// +// Arguments: +// reg - The register being set as the frame register. +// offset - The offset from the current stack pointer that the frame pointer will point at. +// void Compiler::unwindSetFrameRegCFI(regNumber reg, unsigned offset) { assert(compGeneratingProlog); @@ -492,7 +499,13 @@ void Compiler::unwindSetFrameRegCFI(regNumber reg, unsigned offset) createCfiCode(func, cbProlog, CFI_DEF_CFA_REGISTER, mapRegNumToDwarfReg(reg)); if (offset != 0) { - createCfiCode(func, cbProlog, CFI_ADJUST_CFA_OFFSET, DWARF_REG_ILLEGAL, offset); + // before: cfa = rsp + old_cfa_offset; + // rbp = rsp + offset; + // after: cfa should be based on rbp, but points to the old address: + // rsp + old_cfa_offset == rbp + old_cfa_offset + adjust; + // adjust = -offset; + int adjust = -(int)offset; + createCfiCode(func, cbProlog, CFI_ADJUST_CFA_OFFSET, DWARF_REG_ILLEGAL, adjust); } } #endif // UNIX_AMD64_ABI diff --git a/src/jit/utils.cpp b/src/jit/utils.cpp index 9934416412..3a45039aa7 100644 --- a/src/jit/utils.cpp +++ b/src/jit/utils.cpp @@ -657,7 +657,7 @@ void dumpILRange(const BYTE* const codeAddr, unsigned codeSize) // in bytes for (IL_OFFSET offs = 0; offs < codeSize;) { char prefix[100]; - sprintf(prefix, "IL_%04x ", offs); + sprintf_s(prefix, _countof(prefix), "IL_%04x ", offs); unsigned codeBytesDumped = dumpSingleInstr(codeAddr, offs, prefix); offs += codeBytesDumped; } @@ -665,11 +665,9 @@ void dumpILRange(const BYTE* const codeAddr, unsigned codeSize) // in bytes /***************************************************************************** * - * Display a variable set (which may be a 32-bit or 64-bit number); only - * one or two of these can be used at once. + * Display a variable set. */ - -const char* genES2str(EXPSET_TP set) +const char* genES2str(BitVecTraits* traits, EXPSET_TP set) { const int bufSize = 17; static char num1[bufSize]; @@ -682,11 +680,7 @@ const char* genES2str(EXPSET_TP set) nump = (nump == num1) ? num2 : num1; -#if EXPSET_SZ == 32 - sprintf_s(temp, bufSize, "%08X", set); -#else - sprintf_s(temp, bufSize, "%08X%08X", (int)(set >> 32), (int)set); -#endif + sprintf_s(temp, bufSize, "%s", BitVecOps::ToString(traits, set)); return temp; } @@ -876,7 +870,7 @@ void ConfigMethodRange::InitRanges(const wchar_t* rangeStr, unsigned capacity) #endif // defined(DEBUG) || defined(INLINE_DATA) -#if CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_NODE_SIZE +#if CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_NODE_SIZE || MEASURE_MEM_ALLOC /***************************************************************************** * Histogram class. @@ -896,7 +890,10 @@ Histogram::Histogram(IAllocator* allocator, const unsigned* const sizeTable) Histogram::~Histogram() { - m_allocator->Free(m_counts); + if (m_counts != nullptr) + { + m_allocator->Free(m_counts); + } } // We need to lazy allocate the histogram data so static `Histogram` variables don't try to @@ -1414,6 +1411,9 @@ void HelperCallProperties::init() case CORINFO_HELP_GETGENERICS_GCSTATIC_BASE: case CORINFO_HELP_GETGENERICS_NONGCSTATIC_BASE: case CORINFO_HELP_READYTORUN_STATIC_BASE: +#if COR_JIT_EE_VERSION > 460 + case CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE: +#endif // COR_JIT_EE_VERSION > 460 // These may invoke static class constructors // These can throw InvalidProgram exception if the class can not be constructed diff --git a/src/jit/valuenum.cpp b/src/jit/valuenum.cpp index 5bc96ed4a9..f7cc0c9a23 100644 --- a/src/jit/valuenum.cpp +++ b/src/jit/valuenum.cpp @@ -76,7 +76,6 @@ ValueNumStore::ValueNumStore(Compiler* comp, IAllocator* alloc) , m_VNFunc2Map(nullptr) , m_VNFunc3Map(nullptr) , m_VNFunc4Map(nullptr) - , m_uPtrToLocNotAFieldCount(1) { // We have no current allocation chunks. for (unsigned i = 0; i < TYP_COUNT; i++) @@ -604,6 +603,7 @@ ValueNumStore::Chunk::Chunk( switch (attribs) { case CEA_None: + case CEA_NotAField: break; // Nothing to do. case CEA_Const: switch (typ) @@ -911,6 +911,7 @@ class Object* ValueNumStore::s_specialRefConsts[] = {nullptr, nullptr, nullptr}; ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func) { assert(VNFuncArity(func) == 0); + assert(func != VNF_NotAField); ValueNum res; @@ -1029,9 +1030,9 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V { if (typ != TYP_BYREF) // We don't want/need to optimize a zero byref { - genTreeOps oper = genTreeOps(func); - ValueNum ZeroVN, OneVN; // We may need to create one of these in the switch below. - switch (oper) + ValueNum resultVN = NoVN; + ValueNum ZeroVN, OneVN; // We may need to create one of these in the switch below. + switch (genTreeOps(func)) { case GT_ADD: // This identity does not apply for floating point (when x == -0.0) @@ -1041,11 +1042,11 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V ZeroVN = VNZeroForType(typ); if (arg0VN == ZeroVN) { - return arg1VN; + resultVN = arg1VN; } else if (arg1VN == ZeroVN) { - return arg0VN; + resultVN = arg0VN; } } break; @@ -1055,7 +1056,7 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V ZeroVN = VNZeroForType(typ); if (arg1VN == ZeroVN) { - return arg0VN; + resultVN = arg0VN; } break; @@ -1066,11 +1067,11 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V { if (arg0VN == OneVN) { - return arg1VN; + resultVN = arg1VN; } else if (arg1VN == OneVN) { - return arg0VN; + resultVN = arg0VN; } } @@ -1080,11 +1081,11 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V ZeroVN = VNZeroForType(typ); if (arg0VN == ZeroVN) { - return ZeroVN; + resultVN = ZeroVN; } else if (arg1VN == ZeroVN) { - return ZeroVN; + resultVN = ZeroVN; } } break; @@ -1097,7 +1098,7 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V { if (arg1VN == OneVN) { - return arg0VN; + resultVN = arg0VN; } } break; @@ -1109,11 +1110,11 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V ZeroVN = VNZeroForType(typ); if (arg0VN == ZeroVN) { - return arg1VN; + resultVN = arg1VN; } else if (arg1VN == ZeroVN) { - return arg0VN; + resultVN = arg0VN; } break; @@ -1122,11 +1123,11 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V ZeroVN = VNZeroForType(typ); if (arg0VN == ZeroVN) { - return ZeroVN; + resultVN = ZeroVN; } else if (arg1VN == ZeroVN) { - return ZeroVN; + resultVN = ZeroVN; } break; @@ -1142,7 +1143,7 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V ZeroVN = VNZeroForType(typ); if (arg1VN == ZeroVN) { - return arg0VN; + resultVN = arg0VN; } break; @@ -1150,30 +1151,35 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V // (x == x) => true (unless x is NaN) if (!varTypeIsFloating(TypeOfVN(arg0VN)) && (arg0VN != NoVN) && (arg0VN == arg1VN)) { - return VNOneForType(typ); + resultVN = VNOneForType(typ); } if ((arg0VN == VNForNull() && IsKnownNonNull(arg1VN)) || (arg1VN == VNForNull() && IsKnownNonNull(arg0VN))) { - return VNZeroForType(typ); + resultVN = VNZeroForType(typ); } break; case GT_NE: // (x != x) => false (unless x is NaN) if (!varTypeIsFloating(TypeOfVN(arg0VN)) && (arg0VN != NoVN) && (arg0VN == arg1VN)) { - return VNZeroForType(typ); + resultVN = VNZeroForType(typ); } if ((arg0VN == VNForNull() && IsKnownNonNull(arg1VN)) || (arg1VN == VNForNull() && IsKnownNonNull(arg0VN))) { - return VNOneForType(typ); + resultVN = VNOneForType(typ); } break; default: break; } + + if ((resultVN != NoVN) && (TypeOfVN(resultVN) == typ)) + { + return resultVN; + } } } else // must be a VNF_ function @@ -2072,10 +2078,11 @@ bool ValueNumStore::CanEvalForConstantArgs(VNFunc vnf) case GT_MKREFANY: // We can't evaluate these. case GT_RETFILT: case GT_LIST: + case GT_FIELD_LIST: case GT_ARR_LENGTH: return false; case GT_MULHI: - // should be rare, not worth the complexity and risk of getting it wrong + assert(false && "Unexpected GT_MULHI node encountered before lowering"); return false; default: return true; @@ -2545,6 +2552,11 @@ ValueNumPair ValueNumStore::VNPairApplySelectors(ValueNumPair map, FieldSeqNode* return ValueNumPair(liberalVN, conservVN); } +bool ValueNumStore::IsVNNotAField(ValueNum vn) +{ + return m_chunks.GetNoExpand(GetChunkNum(vn))->m_attribs == CEA_NotAField; +} + ValueNum ValueNumStore::VNForFieldSeq(FieldSeqNode* fieldSeq) { if (fieldSeq == nullptr) @@ -2553,7 +2565,11 @@ ValueNum ValueNumStore::VNForFieldSeq(FieldSeqNode* fieldSeq) } else if (fieldSeq == FieldSeqStore::NotAField()) { - return VNForNotAField(); + // We always allocate a new, unique VN in this call. + Chunk* c = GetAllocChunk(TYP_REF, CEA_NotAField); + unsigned offsetWithinChunk = c->AllocVN(); + ValueNum result = c->m_baseVN + offsetWithinChunk; + return result; } else { @@ -2585,22 +2601,22 @@ FieldSeqNode* ValueNumStore::FieldSeqVNToFieldSeq(ValueNum vn) { return nullptr; } - else if (vn == VNForNotAField()) + + assert(IsVNFunc(vn)); + + VNFuncApp funcApp; + GetVNFunc(vn, &funcApp); + if (funcApp.m_func == VNF_NotAField) { return FieldSeqStore::NotAField(); } - else - { - assert(IsVNFunc(vn)); - VNFuncApp funcApp; - GetVNFunc(vn, &funcApp); - assert(funcApp.m_func == VNF_FieldSeq); - ssize_t fieldHndVal = ConstantValue<ssize_t>(funcApp.m_args[0]); - FieldSeqNode* head = - m_pComp->GetFieldSeqStore()->CreateSingleton(reinterpret_cast<CORINFO_FIELD_HANDLE>(fieldHndVal)); - FieldSeqNode* tail = FieldSeqVNToFieldSeq(funcApp.m_args[1]); - return m_pComp->GetFieldSeqStore()->Append(head, tail); - } + + assert(funcApp.m_func == VNF_FieldSeq); + const ssize_t fieldHndVal = ConstantValue<ssize_t>(funcApp.m_args[0]); + FieldSeqNode* head = + m_pComp->GetFieldSeqStore()->CreateSingleton(reinterpret_cast<CORINFO_FIELD_HANDLE>(fieldHndVal)); + FieldSeqNode* tail = FieldSeqVNToFieldSeq(funcApp.m_args[1]); + return m_pComp->GetFieldSeqStore()->Append(head, tail); } ValueNum ValueNumStore::FieldSeqVNAppend(ValueNum fsVN1, ValueNum fsVN2) @@ -2609,40 +2625,31 @@ ValueNum ValueNumStore::FieldSeqVNAppend(ValueNum fsVN1, ValueNum fsVN2) { return fsVN2; } - else if (fsVN1 == VNForNotAField() || fsVN2 == VNForNotAField()) - { - return VNForNotAField(); - } - else - { - assert(IsVNFunc(fsVN1)); - VNFuncApp funcApp1; - GetVNFunc(fsVN1, &funcApp1); - assert(funcApp1.m_func == VNF_FieldSeq); - ValueNum tailRes = FieldSeqVNAppend(funcApp1.m_args[1], fsVN2); - ValueNum fieldSeqVN = VNForFunc(TYP_REF, VNF_FieldSeq, funcApp1.m_args[0], tailRes); -#ifdef DEBUG - if (m_pComp->verbose) - { - printf(" fieldSeq " STR_VN "%x is ", fieldSeqVN); - vnDump(m_pComp, fieldSeqVN); - printf("\n"); - } -#endif + assert(IsVNFunc(fsVN1)); - return fieldSeqVN; + VNFuncApp funcApp1; + GetVNFunc(fsVN1, &funcApp1); + + if ((funcApp1.m_func == VNF_NotAField) || IsVNNotAField(fsVN2)) + { + return VNForFieldSeq(FieldSeqStore::NotAField()); } -} -ValueNum ValueNumStore::VNForPtrToLoc(var_types typ, ValueNum lclVarVN, ValueNum fieldSeqVN) -{ - if (fieldSeqVN == VNForNotAField()) + assert(funcApp1.m_func == VNF_FieldSeq); + ValueNum tailRes = FieldSeqVNAppend(funcApp1.m_args[1], fsVN2); + ValueNum fieldSeqVN = VNForFunc(TYP_REF, VNF_FieldSeq, funcApp1.m_args[0], tailRes); + +#ifdef DEBUG + if (m_pComp->verbose) { - // To distinguish two different not a fields, append a unique value. - return VNForFunc(typ, VNF_PtrToLoc, lclVarVN, fieldSeqVN, VNForIntCon(++m_uPtrToLocNotAFieldCount)); + printf(" fieldSeq " STR_VN "%x is ", fieldSeqVN); + vnDump(m_pComp, fieldSeqVN); + printf("\n"); } - return VNForFunc(typ, VNF_PtrToLoc, lclVarVN, fieldSeqVN, VNForIntCon(0)); +#endif + + return fieldSeqVN; } ValueNum ValueNumStore::ExtendPtrVN(GenTreePtr opA, GenTreePtr opB) @@ -2650,7 +2657,7 @@ ValueNum ValueNumStore::ExtendPtrVN(GenTreePtr opA, GenTreePtr opB) if (opB->OperGet() == GT_CNS_INT) { FieldSeqNode* fldSeq = opB->gtIntCon.gtFieldSeq; - if ((fldSeq != nullptr) && (fldSeq != FieldSeqStore::NotAField())) + if (fldSeq != nullptr) { return ExtendPtrVN(opA, opB->gtIntCon.gtFieldSeq); } @@ -2660,8 +2667,9 @@ ValueNum ValueNumStore::ExtendPtrVN(GenTreePtr opA, GenTreePtr opB) ValueNum ValueNumStore::ExtendPtrVN(GenTreePtr opA, FieldSeqNode* fldSeq) { + assert(fldSeq != nullptr); + ValueNum res = NoVN; - assert(fldSeq != FieldSeqStore::NotAField()); ValueNum opAvnWx = opA->gtVNPair.GetLiberal(); assert(VNIsValid(opAvnWx)); @@ -2684,7 +2692,7 @@ ValueNum ValueNumStore::ExtendPtrVN(GenTreePtr opA, FieldSeqNode* fldSeq) assert(GetVNFunc(VNNormVal(opA->GetVN(VNK_Conservative)), &consFuncApp) && consFuncApp.Equals(funcApp)); #endif ValueNum fldSeqVN = VNForFieldSeq(fldSeq); - res = VNForPtrToLoc(TYP_BYREF, funcApp.m_args[0], FieldSeqVNAppend(funcApp.m_args[1], fldSeqVN)); + res = VNForFunc(TYP_BYREF, VNF_PtrToLoc, funcApp.m_args[0], FieldSeqVNAppend(funcApp.m_args[1], fldSeqVN)); } else if (funcApp.m_func == VNF_PtrToStatic) { @@ -2917,6 +2925,11 @@ ValueNum Compiler::fgValueNumberArrIndexVal(GenTreePtr tree, var_types ValueNumStore::TypeOfVN(ValueNum vn) { + if (vn == NoVN) + { + return TYP_UNDEF; + } + Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn)); return c->m_typ; } @@ -2936,6 +2949,11 @@ var_types ValueNumStore::TypeOfVN(ValueNum vn) BasicBlock::loopNumber ValueNumStore::LoopOfVN(ValueNum vn) { + if (vn == NoVN) + { + return MAX_LOOP_NUM; + } + Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn)); return c->m_loopNum; } @@ -3388,6 +3406,7 @@ bool ValueNumStore::IsVNFunc(ValueNum vn) Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn)); switch (c->m_attribs) { + case CEA_NotAField: case CEA_Func0: case CEA_Func1: case CEA_Func2: @@ -3401,6 +3420,11 @@ bool ValueNumStore::IsVNFunc(ValueNum vn) bool ValueNumStore::GetVNFunc(ValueNum vn, VNFuncApp* funcApp) { + if (vn == NoVN) + { + return false; + } + Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn)); unsigned offset = ChunkOffset(vn); assert(offset < c->m_numUsed); @@ -3415,8 +3439,8 @@ bool ValueNumStore::GetVNFunc(ValueNum vn, VNFuncApp* funcApp) funcApp->m_args[1] = farg4->m_arg1; funcApp->m_args[2] = farg4->m_arg2; funcApp->m_args[3] = farg4->m_arg3; - } return true; + } case CEA_Func3: { VNDefFunc3Arg* farg3 = &reinterpret_cast<VNDefFunc3Arg*>(c->m_defs)[offset]; @@ -3425,8 +3449,8 @@ bool ValueNumStore::GetVNFunc(ValueNum vn, VNFuncApp* funcApp) funcApp->m_args[0] = farg3->m_arg0; funcApp->m_args[1] = farg3->m_arg1; funcApp->m_args[2] = farg3->m_arg2; - } return true; + } case CEA_Func2: { VNDefFunc2Arg* farg2 = &reinterpret_cast<VNDefFunc2Arg*>(c->m_defs)[offset]; @@ -3434,23 +3458,29 @@ bool ValueNumStore::GetVNFunc(ValueNum vn, VNFuncApp* funcApp) funcApp->m_arity = 2; funcApp->m_args[0] = farg2->m_arg0; funcApp->m_args[1] = farg2->m_arg1; - } return true; + } case CEA_Func1: { VNDefFunc1Arg* farg1 = &reinterpret_cast<VNDefFunc1Arg*>(c->m_defs)[offset]; funcApp->m_func = farg1->m_func; funcApp->m_arity = 1; funcApp->m_args[0] = farg1->m_arg0; - } return true; + } case CEA_Func0: { VNDefFunc0Arg* farg0 = &reinterpret_cast<VNDefFunc0Arg*>(c->m_defs)[offset]; funcApp->m_func = farg0->m_func; funcApp->m_arity = 0; + return true; } + case CEA_NotAField: + { + funcApp->m_func = VNF_NotAField; + funcApp->m_arity = 0; return true; + } default: return false; } @@ -3751,8 +3781,9 @@ static genTreeOps genTreeOpsIllegalAsVNFunc[] = {GT_IND, // When we do heap memo // These need special semantics: GT_COMMA, // == second argument (but with exception(s) from first). GT_ADDR, GT_ARR_BOUNDS_CHECK, - GT_OBJ, // May reference heap memory. - GT_BLK, // May reference heap memory. + GT_OBJ, // May reference heap memory. + GT_BLK, // May reference heap memory. + GT_INIT_VAL, // Not strictly a pass-through. // These control-flow operations need no values. GT_JTRUE, GT_RETURN, GT_SWITCH, GT_RETFILT, GT_CKFINITE}; @@ -3842,10 +3873,9 @@ static const char* s_reservedNameArr[] = { "$VN.No", // -1 NoVN "$VN.Null", // 0 VNForNull() "$VN.ZeroMap", // 1 VNForZeroMap() - "$VN.NotAField", // 2 VNForNotAField() - "$VN.ReadOnlyHeap", // 3 VNForROH() - "$VN.Void", // 4 VNForVoid() - "$VN.EmptyExcSet" // 5 VNForEmptyExcSet() + "$VN.ReadOnlyHeap", // 2 VNForROH() + "$VN.Void", // 3 VNForVoid() + "$VN.EmptyExcSet" // 4 VNForEmptyExcSet() }; // Returns the string name of "vn" when it is a reserved value number, nullptr otherwise @@ -4804,8 +4834,16 @@ void Compiler::fgValueNumberTreeConst(GenTreePtr tree) tree->gtVNPair.SetBoth(vnStore->VNForDoubleCon(tree->gtDblCon.gtDconVal)); break; case TYP_REF: - // Null is the only constant. (Except maybe for String?) - tree->gtVNPair.SetBoth(ValueNumStore::VNForNull()); + if (tree->gtIntConCommon.IconValue() == 0) + { + tree->gtVNPair.SetBoth(ValueNumStore::VNForNull()); + } + else + { + assert(tree->gtFlags == GTF_ICON_STR_HDL); // Constant object can be only frozen string. + tree->gtVNPair.SetBoth( + vnStore->VNForHandle(ssize_t(tree->gtIntConCommon.IconValue()), tree->GetIconHandleFlag())); + } break; case TYP_BYREF: @@ -4903,9 +4941,6 @@ void Compiler::fgValueNumberBlockAssignment(GenTreePtr tree, bool evalAsgLhsInd) } #endif // DEBUG } - // Initblock's are of type void. Give them the void "value" -- they may occur in argument lists, which we - // want to be able to give VN's to. - tree->gtVNPair.SetBoth(ValueNumStore::VNForVoid()); } else { @@ -4913,6 +4948,9 @@ void Compiler::fgValueNumberBlockAssignment(GenTreePtr tree, bool evalAsgLhsInd) // TODO-CQ: Why not be complete, and get this case right? fgMutateHeap(tree DEBUGARG("INITBLK - non local")); } + // Initblock's are of type void. Give them the void "value" -- they may occur in argument lists, which we + // want to be able to give VN's to. + tree->gtVNPair.SetBoth(ValueNumStore::VNForVoid()); } else { @@ -4953,17 +4991,21 @@ void Compiler::fgValueNumberBlockAssignment(GenTreePtr tree, bool evalAsgLhsInd) assert(lhs->OperGet() == GT_IND); lhsAddr = lhs->gtOp.gtOp1; } + // For addr-of-local expressions, lib/cons shouldn't matter. assert(lhsAddr->gtVNPair.BothEqual()); ValueNum lhsAddrVN = lhsAddr->GetVN(VNK_Liberal); // Unpack the PtrToLoc value number of the address. assert(vnStore->IsVNFunc(lhsAddrVN)); + VNFuncApp lhsAddrFuncApp; vnStore->GetVNFunc(lhsAddrVN, &lhsAddrFuncApp); + assert(lhsAddrFuncApp.m_func == VNF_PtrToLoc); assert(vnStore->IsVNConstant(lhsAddrFuncApp.m_args[0]) && vnStore->ConstantValue<unsigned>(lhsAddrFuncApp.m_args[0]) == lhsLclNum); + lhsFldSeq = vnStore->FieldSeqVNToFieldSeq(lhsAddrFuncApp.m_args[1]); } @@ -5598,10 +5640,9 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd) // (we looked in a side table above for its "def" identity). Look up that value. ValueNumPair oldLhsVNPair = lvaTable[lclFld->GetLclNum()].GetPerSsaData(lclFld->GetSsaNum())->m_vnPair; - newLhsVNPair = - vnStore->VNPairApplySelectorsAssign(oldLhsVNPair, lclFld->gtFieldSeq, - rhsVNPair, // Pre-value. - lvaGetActualType(lclFld->gtLclNum), compCurBB); + newLhsVNPair = vnStore->VNPairApplySelectorsAssign(oldLhsVNPair, lclFld->gtFieldSeq, + rhsVNPair, // Pre-value. + lclFld->TypeGet(), compCurBB); } } lvaTable[lclFld->GetLclNum()].GetPerSsaData(lclDefSsaNum)->m_vnPair = newLhsVNPair; @@ -6034,8 +6075,9 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd) if (newVN == ValueNumStore::NoVN) { assert(arg->gtLclVarCommon.GetSsaNum() != ValueNumStore::NoVN); - newVN = vnStore->VNForPtrToLoc(TYP_BYREF, vnStore->VNForIntCon(arg->gtLclVarCommon.GetLclNum()), - vnStore->VNForFieldSeq(fieldSeq)); + newVN = vnStore->VNForFunc(TYP_BYREF, VNF_PtrToLoc, + vnStore->VNForIntCon(arg->gtLclVarCommon.GetLclNum()), + vnStore->VNForFieldSeq(fieldSeq)); } tree->gtVNPair.SetBoth(newVN); } @@ -6240,17 +6282,12 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd) } tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, addrXvnp); } - else if (!varTypeIsStruct(tree) && vnStore->GetVNFunc(addrNvnp.GetLiberal(), &funcApp) && - (funcApp.m_func == VNF_PtrToArrElem)) + else if (vnStore->GetVNFunc(addrNvnp.GetLiberal(), &funcApp) && (funcApp.m_func == VNF_PtrToArrElem)) { - // TODO-1stClassStructs: The above condition need not exclude struct types, but it is - // excluded for now to minimize diffs. fgValueNumberArrIndexVal(tree, &funcApp, addrXvnp.GetLiberal()); } - else if (!varTypeIsStruct(tree) && addr->IsFieldAddr(this, &obj, &staticOffset, &fldSeq2)) + else if (addr->IsFieldAddr(this, &obj, &staticOffset, &fldSeq2)) { - // TODO-1stClassStructs: The above condition need not exclude struct types, but it is - // excluded for now to minimize diffs. if (fldSeq2 == FieldSeqStore::NotAField()) { tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet())); @@ -6522,6 +6559,9 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd) case GT_JTRUE: case GT_LIST: +#ifndef LEGACY_BACKEND + case GT_FIELD_LIST: +#endif // !LEGACY_BACKEND // These nodes never need to have a ValueNumber tree->gtVNPair.SetBoth(ValueNumStore::NoVN); break; @@ -6667,7 +6707,7 @@ void Compiler::fgValueNumberCastTree(GenTreePtr tree) bool srcIsUnsigned = ((tree->gtFlags & GTF_UNSIGNED) != 0); bool hasOverflowCheck = tree->gtOverflowEx(); - assert(genActualType(castToType) == tree->TypeGet()); // Insure that the resultType is correct + assert(genActualType(castToType) == genActualType(tree->TypeGet())); // Insure that the resultType is correct tree->gtVNPair = vnStore->VNPairForCast(srcVNPair, castToType, castFromType, srcIsUnsigned, hasOverflowCheck); } @@ -6816,6 +6856,7 @@ void Compiler::fgValueNumberHelperCallFunc(GenTreeCall* call, VNFunc vnf, ValueN break; case VNF_ReadyToRunStaticBase: + case VNF_ReadyToRunGenericStaticBase: case VNF_ReadyToRunIsInstanceOf: case VNF_ReadyToRunCastClass: { @@ -7061,11 +7102,11 @@ VNFunc Compiler::fgValueNumberHelperMethVNFunc(CorInfoHelpFunc helpFunc) vnf = VNFunc(GT_MOD); break; case CORINFO_HELP_ULDIV: - vnf = VNFunc(GT_DIV); - break; // Is this the right thing? + vnf = VNFunc(GT_UDIV); + break; case CORINFO_HELP_ULMOD: - vnf = VNFunc(GT_MOD); - break; // Is this the right thing? + vnf = VNFunc(GT_UMOD); + break; case CORINFO_HELP_LNG2DBL: vnf = VNF_Lng2Dbl; @@ -7155,6 +7196,11 @@ VNFunc Compiler::fgValueNumberHelperMethVNFunc(CorInfoHelpFunc helpFunc) case CORINFO_HELP_READYTORUN_STATIC_BASE: vnf = VNF_ReadyToRunStaticBase; break; +#if COR_JIT_EE_VERSION > 460 + case CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE: + vnf = VNF_ReadyToRunGenericStaticBase; + break; +#endif // COR_JIT_EE_VERSION > 460 case CORINFO_HELP_GETSHARED_GCSTATIC_BASE_DYNAMICCLASS: vnf = VNF_GetsharedGcstaticBaseDynamicclass; break; diff --git a/src/jit/valuenum.h b/src/jit/valuenum.h index 17dacfbb54..c8a57ff210 100644 --- a/src/jit/valuenum.h +++ b/src/jit/valuenum.h @@ -297,13 +297,6 @@ public: return ValueNum(SRC_ZeroMap); } - // The value number for the special "NotAField" field sequence. - static ValueNum VNForNotAField() - { - // We reserve Chunk 0 for "special" VNs. Let SRC_NotAField (== 2) be the "not a field seq". - return ValueNum(SRC_NotAField); - } - // The ROH map is the map for the "read-only heap". We assume that this is never mutated, and always // has the same value number. static ValueNum VNForROH() @@ -450,7 +443,7 @@ public: // Get a new, unique value number for an expression that we're not equating to some function, // which is the value of a tree in the given block. - ValueNum VNForExpr(BasicBlock *block, var_types typ = TYP_UNKNOWN); + ValueNum VNForExpr(BasicBlock* block, var_types typ = TYP_UNKNOWN); // This controls extra tracing of the "evaluation" of "VNF_MapSelect" functions. #define FEATURE_VN_TRACE_APPLY_SELECTORS 1 @@ -485,13 +478,11 @@ public: ValueNumPair VNPairApplySelectors(ValueNumPair map, FieldSeqNode* fieldSeq, var_types indType); - ValueNumPair VNPairApplySelectorsAssign(ValueNumPair map, - FieldSeqNode* fieldSeq, - ValueNumPair rhs, - var_types indType, - BasicBlock* block) + ValueNumPair VNPairApplySelectorsAssign( + ValueNumPair map, FieldSeqNode* fieldSeq, ValueNumPair rhs, var_types indType, BasicBlock* block) { - return ValueNumPair(VNApplySelectorsAssign(VNK_Liberal, map.GetLiberal(), fieldSeq, rhs.GetLiberal(), indType, block), + return ValueNumPair(VNApplySelectorsAssign(VNK_Liberal, map.GetLiberal(), fieldSeq, rhs.GetLiberal(), indType, + block), VNApplySelectorsAssign(VNK_Conservative, map.GetConservative(), fieldSeq, rhs.GetConservative(), indType, block)); } @@ -506,6 +497,9 @@ public: bool srcIsUnsigned = false, bool hasOverflowCheck = false); + // Returns true iff the VN represents an application of VNF_NotAField. + bool IsVNNotAField(ValueNum vn); + // PtrToLoc values need to express a field sequence as one of their arguments. VN for null represents // empty sequence, otherwise, "FieldSeq(VN(FieldHandle), restOfSeq)". ValueNum VNForFieldSeq(FieldSeqNode* fieldSeq); @@ -518,12 +512,6 @@ public: // concatenation "fsVN1 || fsVN2". ValueNum FieldSeqVNAppend(ValueNum fsVN1, ValueNum fsVN2); - // Requires "lclVarVN" be a value number for a GT_LCL_VAR pointer tree. - // Requires "fieldSeqVN" be a field sequence value number. - // Requires "typ" to be a TYP_REF/TYP_BYREF used for VNF_PtrToLoc. - // When "fieldSeqVN" is VNForNotAField, a unique VN is generated using m_uPtrToLocNotAFieldCount. - ValueNum VNForPtrToLoc(var_types typ, ValueNum lclVarVN, ValueNum fieldSeqVN); - // If "opA" has a PtrToLoc, PtrToArrElem, or PtrToStatic application as its value numbers, and "opB" is an integer // with a "fieldSeq", returns the VN for the pointer form extended with the field sequence; or else NoVN. ValueNum ExtendPtrVN(GenTreePtr opA, GenTreePtr opB); @@ -853,14 +841,15 @@ private: DECLARE_TYPED_ENUM(ChunkExtraAttribs, BYTE) { - CEA_None, // No extra attributes. - CEA_Const, // This chunk contains constant values. - CEA_Handle, // This chunk contains handle constants. - CEA_Func0, // Represents functions of arity 0. - CEA_Func1, // ...arity 1. - CEA_Func2, // ...arity 2. - CEA_Func3, // ...arity 3. - CEA_Func4, // ...arity 4. + CEA_None, // No extra attributes. + CEA_Const, // This chunk contains constant values. + CEA_Handle, // This chunk contains handle constants. + CEA_NotAField, // This chunk contains "not a field" values. + CEA_Func0, // Represents functions of arity 0. + CEA_Func1, // ...arity 1. + CEA_Func2, // ...arity 2. + CEA_Func3, // ...arity 3. + CEA_Func4, // ...arity 4. CEA_Count } END_DECLARE_TYPED_ENUM(ChunkExtraAttribs, BYTE); @@ -883,9 +872,14 @@ private: ChunkExtraAttribs m_attribs; BasicBlock::loopNumber m_loopNum; - // Initialize a chunk, starting at "*baseVN", for the given "typ", "attribs", and "loopNum" (using "alloc" for allocations). + // Initialize a chunk, starting at "*baseVN", for the given "typ", "attribs", and "loopNum" (using "alloc" for + // allocations). // (Increments "*baseVN" by ChunkSize.) - Chunk(IAllocator* alloc, ValueNum* baseVN, var_types typ, ChunkExtraAttribs attribs, BasicBlock::loopNumber loopNum); + Chunk(IAllocator* alloc, + ValueNum* baseVN, + var_types typ, + ChunkExtraAttribs attribs, + BasicBlock::loopNumber loopNum); // Requires that "m_numUsed < ChunkSize." Returns the offset of the allocated VN within the chunk; the // actual VN is this added to the "m_baseVN" of the chunk. @@ -1257,7 +1251,6 @@ private: { SRC_Null, SRC_ZeroMap, - SRC_NotAField, SRC_ReadOnlyHeap, SRC_Void, SRC_EmptyExcSet, @@ -1265,10 +1258,6 @@ private: SRC_NumSpecialRefConsts }; - // Counter to keep track of all the unique not a field sequences that have been assigned to - // PtrToLoc, because the ptr was added to an offset that was not a field. - unsigned m_uPtrToLocNotAFieldCount; - // The "values" of special ref consts will be all be "null" -- their differing meanings will // be carried by the distinct value numbers. static class Object* s_specialRefConsts[SRC_NumSpecialRefConsts]; diff --git a/src/jit/valuenumfuncs.h b/src/jit/valuenumfuncs.h index 064a33707b..eb17aedf28 100644 --- a/src/jit/valuenumfuncs.h +++ b/src/jit/valuenumfuncs.h @@ -11,9 +11,10 @@ ValueNumFuncDef(MapStore, 3, false, false, false) ValueNumFuncDef(MapSelect, 2, false, false, false) ValueNumFuncDef(FieldSeq, 2, false, false, false) // Sequence (VN of null == empty) of (VN's of) field handles. +ValueNumFuncDef(NotAField, 0, false, false, false) // Value number function for FieldSeqStore::NotAField. ValueNumFuncDef(ZeroMap, 0, false, false, false) // The "ZeroMap": indexing at any index yields "zero of the desired type". -ValueNumFuncDef(PtrToLoc, 3, false, false, false) // Pointer (byref) to a local variable. Args: VN's of: 0: var num, 1: FieldSeq, 2: Unique value for this PtrToLoc. +ValueNumFuncDef(PtrToLoc, 2, false, false, false) // Pointer (byref) to a local variable. Args: VN's of: 0: var num, 1: FieldSeq. ValueNumFuncDef(PtrToArrElem, 4, false, false, false) // Pointer (byref) to an array element. Args: 0: array elem type eq class var_types value, VN's of: 1: array, 2: index, 3: FieldSeq. ValueNumFuncDef(PtrToStatic, 1, false, false, false) // Pointer (byref) to a static variable (or possibly a field thereof, if the static variable is a struct). Args: 0: FieldSeq, first element // of which is the static var. @@ -99,6 +100,7 @@ ValueNumFuncDef(GetsharedNongcstaticBase, 2, false, true, true) ValueNumFuncDef(GetsharedGcstaticBaseNoctor, 1, false, true, true) ValueNumFuncDef(GetsharedNongcstaticBaseNoctor, 1, false, true, true) ValueNumFuncDef(ReadyToRunStaticBase, 1, false, true, true) +ValueNumFuncDef(ReadyToRunGenericStaticBase, 2, false, true, true) ValueNumFuncDef(GetsharedGcstaticBaseDynamicclass, 2, false, true, true) ValueNumFuncDef(GetsharedNongcstaticBaseDynamicclass, 2, false, true, true) ValueNumFuncDef(GetgenericsGcthreadstaticBase, 1, false, true, true) |