summaryrefslogtreecommitdiff
path: root/src/jit
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit')
-rw-r--r--src/jit/CMakeLists.txt166
-rw-r--r--src/jit/DIRS.proj4
-rw-r--r--src/jit/ICorJitInfo_API_wrapper.hpp48
-rwxr-xr-xsrc/jit/_typeinfo.h51
-rw-r--r--src/jit/assertionprop.cpp355
-rw-r--r--src/jit/bitset.cpp4
-rw-r--r--src/jit/bitsetasshortlong.h137
-rw-r--r--src/jit/block.h226
-rwxr-xr-xsrc/jit/codegen.h74
-rw-r--r--src/jit/codegenarm.cpp1728
-rw-r--r--src/jit/codegenarm64.cpp1824
-rw-r--r--src/jit/codegenarmarch.cpp1687
-rw-r--r--src/jit/codegenclassic.h8
-rw-r--r--src/jit/codegencommon.cpp265
-rw-r--r--src/jit/codegeninterface.h2
-rw-r--r--src/jit/codegenlegacy.cpp232
-rw-r--r--src/jit/codegenlinear.cpp72
-rw-r--r--src/jit/codegenlinear.h62
-rw-r--r--src/jit/codegenxarch.cpp345
-rw-r--r--src/jit/compatjit/CMakeLists.txt66
-rw-r--r--src/jit/compiler.cpp126
-rw-r--r--src/jit/compiler.h333
-rw-r--r--src/jit/compiler.hpp61
-rw-r--r--src/jit/compilerbitsettraits.h8
-rw-r--r--src/jit/compilerbitsettraits.hpp16
-rw-r--r--src/jit/compphases.h125
-rw-r--r--src/jit/crossgen/CMakeLists.txt2
-rw-r--r--src/jit/decomposelongs.cpp210
-rw-r--r--src/jit/dll/CMakeLists.txt20
-rw-r--r--src/jit/ee_il_dll.cpp185
-rw-r--r--src/jit/ee_il_dll.hpp4
-rw-r--r--src/jit/emit.cpp81
-rw-r--r--src/jit/emit.h53
-rw-r--r--src/jit/emitarm.cpp349
-rw-r--r--src/jit/emitarm.h17
-rw-r--r--src/jit/emitarm64.cpp18
-rw-r--r--src/jit/emitarm64.h4
-rw-r--r--src/jit/emitinl.h6
-rw-r--r--src/jit/emitxarch.cpp354
-rw-r--r--src/jit/emitxarch.h53
-rw-r--r--src/jit/flowgraph.cpp1491
-rw-r--r--src/jit/gcencode.cpp102
-rw-r--r--src/jit/gentree.cpp1379
-rw-r--r--src/jit/gentree.h288
-rw-r--r--src/jit/gtlist.h10
-rw-r--r--src/jit/importer.cpp1101
-rw-r--r--src/jit/inline.def2
-rw-r--r--src/jit/inline.h52
-rw-r--r--src/jit/instr.cpp92
-rw-r--r--src/jit/instrsxarch.h16
-rw-r--r--src/jit/jit.h60
-rw-r--r--src/jit/jit.settings.targets6
-rw-r--r--src/jit/jitconfigvalues.h50
-rw-r--r--src/jit/jitee.h72
-rw-r--r--src/jit/jitgcinfo.h6
-rw-r--r--src/jit/jitpch.h4
-rw-r--r--src/jit/jitstd/type_traits.h5
-rw-r--r--src/jit/lclvars.cpp596
-rw-r--r--src/jit/legacynonjit/.gitmirror (renamed from src/jit/compatjit/.gitmirror)0
-rw-r--r--src/jit/legacynonjit/CMakeLists.txt (renamed from src/jit/legacyjit/CMakeLists.txt)25
-rw-r--r--src/jit/legacynonjit/legacynonjit.def7
-rw-r--r--src/jit/linuxnonjit/CMakeLists.txt71
-rw-r--r--src/jit/liveness.cpp18
-rw-r--r--src/jit/lower.cpp205
-rw-r--r--src/jit/lower.h2
-rw-r--r--src/jit/lowerarm.cpp183
-rw-r--r--src/jit/lowerarm64.cpp298
-rw-r--r--src/jit/lowerarmarch.cpp346
-rw-r--r--src/jit/lsra.cpp218
-rw-r--r--src/jit/lsra.h7
-rw-r--r--src/jit/lsraarm.cpp836
-rw-r--r--src/jit/lsraarm64.cpp765
-rw-r--r--src/jit/lsraarmarch.cpp868
-rw-r--r--src/jit/lsraxarch.cpp221
-rw-r--r--src/jit/morph.cpp629
-rw-r--r--src/jit/optcse.cpp200
-rw-r--r--src/jit/optimizer.cpp22
-rw-r--r--src/jit/protojit/CMakeLists.txt1
-rw-r--r--src/jit/protononjit/.gitmirror1
-rw-r--r--src/jit/protononjit/CMakeLists.txt84
-rw-r--r--src/jit/protononjit/SOURCES10
-rw-r--r--src/jit/protononjit/makefile7
-rw-r--r--src/jit/protononjit/protononjit.def7
-rw-r--r--src/jit/protononjit/protononjit.nativeproj86
-rw-r--r--src/jit/rangecheck.cpp17
-rw-r--r--src/jit/rationalize.cpp29
-rw-r--r--src/jit/regalloc.cpp2
-rw-r--r--src/jit/registerfp.cpp2
-rw-r--r--src/jit/regset.cpp11
-rw-r--r--src/jit/regset.h2
-rw-r--r--src/jit/simd.cpp15
-rw-r--r--src/jit/simdcodegenxarch.cpp38
-rw-r--r--src/jit/stackfp.cpp2
-rw-r--r--src/jit/standalone/CMakeLists.txt1
-rw-r--r--src/jit/target.h51
-rw-r--r--src/jit/unwind.h24
-rw-r--r--src/jit/unwindamd64.cpp12
-rw-r--r--src/jit/utils.cpp75
-rw-r--r--src/jit/utils.h11
-rw-r--r--src/jit/valuenum.cpp375
-rw-r--r--src/jit/valuenum.h23
-rw-r--r--src/jit/valuenumfuncs.h1
102 files changed, 12145 insertions, 8376 deletions
diff --git a/src/jit/CMakeLists.txt b/src/jit/CMakeLists.txt
index db6e5973ba..e2a9ca66ab 100644
--- a/src/jit/CMakeLists.txt
+++ b/src/jit/CMakeLists.txt
@@ -4,14 +4,13 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
include_directories("./jitstd")
include_directories("../inc")
-# Enable the following for UNIX altjit on Windows
-# add_definitions(-DALT_JIT)
-
if (CLR_CMAKE_TARGET_ARCH_AMD64 OR (CLR_CMAKE_TARGET_ARCH_I386 AND NOT CLR_CMAKE_PLATFORM_UNIX))
add_definitions(-DFEATURE_SIMD)
add_definitions(-DFEATURE_AVX_SUPPORT)
endif ()
+# JIT_BUILD disables certain PAL_TRY debugging features
+add_definitions(-DJIT_BUILD=1)
if(WIN32)
set(JIT_RESOURCES Native.rc)
@@ -76,86 +75,99 @@ set( JIT_SOURCES
valuenum.cpp
)
-if(CLR_CMAKE_TARGET_ARCH_AMD64)
- set( ARCH_SOURCES
- codegenxarch.cpp
- emitxarch.cpp
- lowerxarch.cpp
- lsraxarch.cpp
- simd.cpp
- simdcodegenxarch.cpp
- targetamd64.cpp
- unwindamd64.cpp
- )
-elseif(CLR_CMAKE_TARGET_ARCH_ARM)
- set( ARCH_SOURCES
- codegenarm.cpp
- decomposelongs.cpp
- emitarm.cpp
- lowerarm.cpp
- lsraarm.cpp
- targetarm.cpp
- unwindarm.cpp
- )
-elseif(CLR_CMAKE_TARGET_ARCH_I386)
- set( ARCH_SOURCES
- codegenxarch.cpp
- decomposelongs.cpp
- emitxarch.cpp
- lowerxarch.cpp
- lsraxarch.cpp
- simd.cpp
- simdcodegenxarch.cpp
- targetx86.cpp
- unwindx86.cpp
- )
-elseif(CLR_CMAKE_TARGET_ARCH_ARM64)
- set( ARCH_SOURCES
- codegenarm64.cpp
- emitarm64.cpp
- lowerarm64.cpp
- lsraarm64.cpp
- targetarm64.cpp
- unwindarm.cpp
- unwindarm64.cpp
- )
-else()
- clr_unknown_arch()
-endif()
-
# The following defines all the source files used by the "legacy" back-end (#ifdef LEGACY_BACKEND).
# It is always safe to include both legacy and non-legacy files in the build, as everything is properly
# #ifdef'ed, though it makes the build slightly slower to do so. Note there is only a legacy backend for
# x86 and ARM.
-if(CLR_CMAKE_TARGET_ARCH_AMD64)
- set( ARCH_LEGACY_SOURCES
- )
-elseif(CLR_CMAKE_TARGET_ARCH_ARM)
- set( ARCH_LEGACY_SOURCES
+set(JIT_ARM_LEGACY_SOURCES
codegenlegacy.cpp
registerfp.cpp
- )
-elseif(CLR_CMAKE_TARGET_ARCH_I386)
- set( ARCH_LEGACY_SOURCES
+)
+set(JIT_I386_LEGACY_SOURCES
codegenlegacy.cpp
stackfp.cpp
- )
+)
+
+# Define all the architecture-specific source files
+
+set( JIT_AMD64_SOURCES
+ codegenxarch.cpp
+ emitxarch.cpp
+ lowerxarch.cpp
+ lsraxarch.cpp
+ simd.cpp
+ simdcodegenxarch.cpp
+ targetamd64.cpp
+ unwindamd64.cpp
+)
+
+set( JIT_ARM_SOURCES
+ ${JIT_ARM_LEGACY_SOURCES}
+ codegenarmarch.cpp
+ codegenarm.cpp
+ decomposelongs.cpp
+ emitarm.cpp
+ lowerarmarch.cpp
+ lowerarm.cpp
+ lsraarmarch.cpp
+ lsraarm.cpp
+ targetarm.cpp
+ unwindarm.cpp
+)
+
+set( JIT_I386_SOURCES
+ ${JIT_I386_LEGACY_SOURCES}
+ codegenxarch.cpp
+ decomposelongs.cpp
+ emitxarch.cpp
+ lowerxarch.cpp
+ lsraxarch.cpp
+ simd.cpp
+ simdcodegenxarch.cpp
+ targetx86.cpp
+ unwindx86.cpp
+)
+
+set( JIT_ARM64_SOURCES
+ codegenarmarch.cpp
+ codegenarm64.cpp
+ emitarm64.cpp
+ lowerarmarch.cpp
+ lowerarm64.cpp
+ lsraarmarch.cpp
+ lsraarm64.cpp
+ targetarm64.cpp
+ unwindarm.cpp
+ unwindarm64.cpp
+)
+
+if(CLR_CMAKE_TARGET_ARCH_AMD64)
+ set(JIT_ARCH_SOURCES ${JIT_AMD64_SOURCES})
+elseif(CLR_CMAKE_TARGET_ARCH_ARM)
+ set(JIT_ARCH_SOURCES ${JIT_ARM_SOURCES})
+elseif(CLR_CMAKE_TARGET_ARCH_I386)
+ set(JIT_ARCH_SOURCES ${JIT_I386_SOURCES})
elseif(CLR_CMAKE_TARGET_ARCH_ARM64)
- set( ARCH_LEGACY_SOURCES
- )
+ set(JIT_ARCH_SOURCES ${JIT_ARM64_SOURCES})
else()
clr_unknown_arch()
endif()
set( SOURCES
${JIT_SOURCES}
- ${ARCH_SOURCES}
- ${ARCH_LEGACY_SOURCES}
${JIT_RESOURCES}
)
convert_to_absolute_path(SOURCES ${SOURCES})
+convert_to_absolute_path(JIT_ARCH_SOURCES ${JIT_ARCH_SOURCES})
+
+# Also convert the per-architecture sources to absolute paths, if the subdirs want to use them.
+
+convert_to_absolute_path(JIT_AMD64_SOURCES ${JIT_AMD64_SOURCES})
+convert_to_absolute_path(JIT_ARM_SOURCES ${JIT_ARM_SOURCES})
+convert_to_absolute_path(JIT_I386_SOURCES ${JIT_I386_SOURCES})
+convert_to_absolute_path(JIT_ARM64_SOURCES ${JIT_ARM64_SOURCES})
if(WIN32)
add_precompiled_header(jitpch.h ../jitpch.cpp SOURCES)
@@ -200,17 +212,33 @@ endif()
add_custom_target(jit_exports DEPENDS ${JIT_EXPORTS_FILE})
-add_subdirectory(dll)
-add_subdirectory(crossgen)
+if (FEATURE_MERGE_JIT_AND_ENGINE)
+ # Despite the directory being named "dll", it creates a static library "clrjit_static" to link into the VM.
+ add_subdirectory(dll)
+ add_subdirectory(crossgen)
+endif (FEATURE_MERGE_JIT_AND_ENGINE)
+
add_subdirectory(standalone)
if (CLR_CMAKE_PLATFORM_ARCH_ARM)
add_subdirectory(protojit)
endif (CLR_CMAKE_PLATFORM_ARCH_ARM)
+if (CLR_CMAKE_PLATFORM_ARCH_I386 OR CLR_CMAKE_PLATFORM_ARCH_AMD64)
+ # On x86, build RyuJIT/ARM32 cross-compiling altjit.
+ # On amd64, build RyuJIT/ARM64 cross-compiling altjit.
+ add_subdirectory(protononjit)
+endif ()
+
+if ((CLR_CMAKE_PLATFORM_ARCH_I386 OR CLR_CMAKE_PLATFORM_ARCH_AMD64) AND WIN32)
+ # On Windows, build altjit that targets the Linux ABI:
+ # On x86, build Linux/x86 altjit. This enables UNIX_X86_ABI.
+ # On amd64, build Linux/AMD64 altjit. This enables UNIX_AMD64_ABI and FEATURE_UNIX_AMD64_STRUCT_PASSING.
+ add_subdirectory(linuxnonjit)
+endif ()
+
if (CLR_CMAKE_PLATFORM_ARCH_I386 AND WIN32)
- add_subdirectory(legacyjit)
- if (NOT CLR_BUILD_JIT32)
- add_subdirectory(compatjit)
- endif ()
+ # On Windows x86, build altjit generating Windows/ARM32 code using LEGACY_BACKEND.
+ # (Note: we could also create linuxlegacynonjit for generating Linux/ARM32 code using LEGACY_BACKEND, if needed.)
+ add_subdirectory(legacynonjit)
endif (CLR_CMAKE_PLATFORM_ARCH_I386 AND WIN32)
diff --git a/src/jit/DIRS.proj b/src/jit/DIRS.proj
index eb00cc1d64..12ea52fb20 100644
--- a/src/jit/DIRS.proj
+++ b/src/jit/DIRS.proj
@@ -36,13 +36,9 @@
<ProjectFile Condition="'$(BuildArchitecture)' == 'arm'" Include="protojit\protojit.nativeproj" />
<ProjectFile Condition="'$(BuildArchitecture)' == 'amd64'" Include="protojit\protojit.nativeproj" />
- <ProjectFile Condition="'$(BuildArchitecture)' == 'amd64'" Include="ctp\ctpjit.nativeproj" />
<ProjectFile Condition="'$(BuildArchitecture)' == 'amd64'" Include="arm64altjit\arm64altjit.nativeproj" />
<ProjectFile Condition="'$(BuildArchitecture)' == 'i386'" Include="protojit\protojit.nativeproj" />
<ProjectFile Condition="'$(BuildArchitecture)' == 'i386'" Include="protononjit\protononjit.nativeproj" />
-
- <!-- We could build skipjit for all architectures, but we only need it for x86 currently -->
- <ProjectFile Condition="'$(BuildArchitecture)' == 'i386'" Include="skipjit\skipjit.nativeproj" />
</ItemGroup>
<!--Import the targets-->
diff --git a/src/jit/ICorJitInfo_API_wrapper.hpp b/src/jit/ICorJitInfo_API_wrapper.hpp
index 4272b2755c..a3ad21165b 100644
--- a/src/jit/ICorJitInfo_API_wrapper.hpp
+++ b/src/jit/ICorJitInfo_API_wrapper.hpp
@@ -129,8 +129,6 @@ void WrapICorJitInfo::getMethodVTableOffset(
API_LEAVE(getMethodVTableOffset);
}
-#if COR_JIT_EE_VERSION > 460
-
CorInfoIntrinsics WrapICorJitInfo::getIntrinsicID(
CORINFO_METHOD_HANDLE method,
bool* pMustExpand /* OUT */)
@@ -141,18 +139,6 @@ CorInfoIntrinsics WrapICorJitInfo::getIntrinsicID(
return temp;
}
-#else
-
-CorInfoIntrinsics WrapICorJitInfo::getIntrinsicID(CORINFO_METHOD_HANDLE method)
-{
- API_ENTER(getIntrinsicID);
- CorInfoIntrinsics temp = wrapHnd->getIntrinsicID(method);
- API_LEAVE(getIntrinsicID);
- return temp;
-}
-
-#endif
-
bool WrapICorJitInfo::isInSIMDModule(CORINFO_CLASS_HANDLE classHnd)
{
API_ENTER(isInSIMDModule);
@@ -281,8 +267,6 @@ void WrapICorJitInfo::resolveToken(/* IN, OUT */ CORINFO_RESOLVED_TOKEN * pResol
API_LEAVE(resolveToken);
}
-#if COR_JIT_EE_VERSION > 460
-
bool WrapICorJitInfo::tryResolveToken(/* IN, OUT */ CORINFO_RESOLVED_TOKEN * pResolvedToken)
{
API_ENTER(tryResolveToken);
@@ -291,8 +275,6 @@ bool WrapICorJitInfo::tryResolveToken(/* IN, OUT */ CORINFO_RESOLVED_TOKEN * pRe
return success;
}
-#endif
-
void WrapICorJitInfo::findSig(
CORINFO_MODULE_HANDLE module,
unsigned sigTOK,
@@ -617,8 +599,6 @@ CorInfoHelpFunc WrapICorJitInfo::getUnBoxHelper(
return temp;
}
-#if COR_JIT_EE_VERSION > 460
-
bool WrapICorJitInfo::getReadyToRunHelper(
CORINFO_RESOLVED_TOKEN * pResolvedToken,
CORINFO_LOOKUP_KIND * pGenericLookupKind,
@@ -634,27 +614,13 @@ bool WrapICorJitInfo::getReadyToRunHelper(
void WrapICorJitInfo::getReadyToRunDelegateCtorHelper(
CORINFO_RESOLVED_TOKEN * pTargetMethod,
CORINFO_CLASS_HANDLE delegateType,
- CORINFO_CONST_LOOKUP * pLookup)
+ CORINFO_LOOKUP * pLookup)
{
API_ENTER(getReadyToRunDelegateCtorHelper);
wrapHnd->getReadyToRunDelegateCtorHelper(pTargetMethod, delegateType, pLookup);
API_LEAVE(getReadyToRunDelegateCtorHelper);
}
-#else
-
-void WrapICorJitInfo::getReadyToRunHelper(
- CORINFO_RESOLVED_TOKEN * pResolvedToken,
- CorInfoHelpFunc id,
- CORINFO_CONST_LOOKUP * pLookup)
-{
- API_ENTER(getReadyToRunHelper);
- wrapHnd->getReadyToRunHelper(pResolvedToken, id, pLookup);
- API_LEAVE(getReadyToRunHelper);
-}
-
-#endif
-
const char* WrapICorJitInfo::getHelperName(
CorInfoHelpFunc funcNum)
{
@@ -1094,8 +1060,6 @@ size_t WrapICorJitInfo::findNameOfToken(
return result;
}
-#if COR_JIT_EE_VERSION > 460
-
bool WrapICorJitInfo::getSystemVAmd64PassStructInRegisterDescriptor(
/* IN */ CORINFO_CLASS_HANDLE structHnd,
/* OUT */ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structPassInRegDescPtr)
@@ -1106,8 +1070,6 @@ bool WrapICorJitInfo::getSystemVAmd64PassStructInRegisterDescriptor(
return result;
}
-#endif
-
DWORD WrapICorJitInfo::getThreadTLSIndex(
void **ppIndirection)
{
@@ -1271,8 +1233,6 @@ void* WrapICorJitInfo::getAddressOfPInvokeFixup(
return temp;
}
-#if COR_JIT_EE_VERSION > 460
-
void WrapICorJitInfo::getAddressOfPInvokeTarget(
CORINFO_METHOD_HANDLE method,
CORINFO_CONST_LOOKUP *pLookup)
@@ -1282,8 +1242,6 @@ void WrapICorJitInfo::getAddressOfPInvokeTarget(
API_LEAVE(getAddressOfPInvokeTarget);
}
-#endif
-
LPVOID WrapICorJitInfo::GetCookieForPInvokeCalliSig(
CORINFO_SIG_INFO* szMetaSig,
void ** ppIndirection)
@@ -1474,8 +1432,6 @@ void* WrapICorJitInfo::getTailCallCopyArgsThunk(
//
/*********************************************************************************/
-#if COR_JIT_EE_VERSION > 460
-
DWORD WrapICorJitInfo::getJitFlags(CORJIT_FLAGS *jitFlags, DWORD sizeInBytes)
{
API_ENTER(getJitFlags);
@@ -1489,8 +1445,6 @@ bool WrapICorJitInfo::runWithErrorTrap(void(*function)(void*), void *param)
return wrapHnd->runWithErrorTrap(function, param);
}
-#endif
-
IEEMemoryManager* WrapICorJitInfo::getMemoryManager()
{
API_ENTER(getMemoryManager);
diff --git a/src/jit/_typeinfo.h b/src/jit/_typeinfo.h
index 08273adc8d..b024912dda 100755
--- a/src/jit/_typeinfo.h
+++ b/src/jit/_typeinfo.h
@@ -27,8 +27,7 @@ enum ti_types
#define DEF_TI(ti, nm) ti,
#include "titypes.h"
#undef DEF_TI
- TI_ONLY_ENUM = TI_METHOD, // Enum values above this are completely described by the enumeration
- TI_COUNT
+ TI_ONLY_ENUM = TI_METHOD, // Enum values with greater value are completely described by the enumeration.
};
#if defined(_TARGET_64BIT_)
@@ -190,8 +189,6 @@ inline ti_types JITtype2tiType(CorInfoType type)
*
*/
-// TI_COUNT is less than or equal to TI_FLAG_DATA_MASK
-
#define TI_FLAG_DATA_BITS 6
#define TI_FLAG_DATA_MASK ((1 << TI_FLAG_DATA_BITS) - 1)
@@ -225,6 +222,9 @@ inline ti_types JITtype2tiType(CorInfoType type)
// since conversions between them are not verifiable.
#define TI_FLAG_NATIVE_INT 0x00000200
+// This item contains resolved token. It is used for ctor delegate optimization.
+#define TI_FLAG_TOKEN 0x00000400
+
// This item contains the 'this' pointer (used for tracking)
#define TI_FLAG_THIS_PTR 0x00001000
@@ -287,12 +287,13 @@ private:
union {
struct
{
- ti_types type : 6;
+ ti_types type : TI_FLAG_DATA_BITS;
unsigned uninitobj : 1; // used
unsigned byref : 1; // used
unsigned byref_readonly : 1; // used
unsigned nativeInt : 1; // used
- unsigned : 2; // unused
+ unsigned token : 1; // used
+ unsigned : 1; // unused
unsigned thisPtr : 1; // used
unsigned thisPermHome : 1; // used
unsigned generic_type_var : 1; // used
@@ -303,8 +304,10 @@ private:
union {
CORINFO_CLASS_HANDLE m_cls;
- // Valid only for type TI_METHOD
+ // Valid only for type TI_METHOD without IsToken
CORINFO_METHOD_HANDLE m_method;
+ // Valid only for TI_TOKEN with IsToken
+ CORINFO_RESOLVED_TOKEN* m_token;
};
template <typename T>
@@ -368,6 +371,16 @@ public:
m_method = method;
}
+ typeInfo(CORINFO_RESOLVED_TOKEN* token)
+ {
+ assert(token != nullptr);
+ assert(token->hMethod != nullptr);
+ assert(!isInvalidHandle(token->hMethod));
+ m_flags = TI_METHOD;
+ SetIsToken();
+ m_token = token;
+ }
+
#ifdef DEBUG
#if VERBOSE_VERIFY
void Dump() const;
@@ -447,6 +460,12 @@ public:
// Operations
/////////////////////////////////////////////////////////////////////////
+ void SetIsToken()
+ {
+ m_flags |= TI_FLAG_TOKEN;
+ assert(m_bits.token);
+ }
+
void SetIsThisPtr()
{
m_flags |= TI_FLAG_THIS_PTR;
@@ -556,14 +575,17 @@ public:
CORINFO_METHOD_HANDLE GetMethod() const
{
assert(GetType() == TI_METHOD);
+ if (IsToken())
+ {
+ return m_token->hMethod;
+ }
return m_method;
}
- // If FEATURE_CORECLR is enabled, GetMethod can be called
- // before the pointer type is known to be a method pointer type.
- CORINFO_METHOD_HANDLE GetMethod2() const
+ CORINFO_RESOLVED_TOKEN* GetToken() const
{
- return m_method;
+ assert(IsToken());
+ return m_token;
}
// Get this item's type
@@ -626,7 +648,7 @@ public:
// Returns whether this is a method desc
BOOL IsMethod() const
{
- return (GetType() == TI_METHOD);
+ return GetType() == TI_METHOD;
}
BOOL IsStruct() const
@@ -730,6 +752,11 @@ public:
return (m_flags & TI_FLAG_UNINIT_OBJREF);
}
+ BOOL IsToken() const
+ {
+ return IsMethod() && ((m_flags & TI_FLAG_TOKEN) != 0);
+ }
+
private:
// used to make functions that return typeinfo efficient.
typeInfo(DWORD flags, CORINFO_CLASS_HANDLE cls)
diff --git a/src/jit/assertionprop.cpp b/src/jit/assertionprop.cpp
index cb0832fe47..767d63a0df 100644
--- a/src/jit/assertionprop.cpp
+++ b/src/jit/assertionprop.cpp
@@ -511,7 +511,7 @@ ASSERT_TP& Compiler::GetAssertionDep(unsigned lclNum)
ExpandArray<ASSERT_TP>& dep = *optAssertionDep;
if (dep[lclNum] == nullptr)
{
- dep[lclNum] = optNewEmptyAssertSet();
+ dep[lclNum] = BitVecOps::MakeEmpty(apTraits);
}
return dep[lclNum];
}
@@ -524,10 +524,7 @@ ASSERT_TP& Compiler::GetAssertionDep(unsigned lclNum)
void Compiler::optAssertionTraitsInit(AssertionIndex assertionCount)
{
apTraits = new (getAllocator()) BitVecTraits(assertionCount, this);
- apFull = BitVecOps::UninitVal();
- apEmpty = BitVecOps::UninitVal();
- BitVecOps::AssignNoCopy(apTraits, apFull, BitVecOps::MakeFull(apTraits));
- BitVecOps::AssignNoCopy(apTraits, apEmpty, BitVecOps::MakeEmpty(apTraits));
+ apFull = BitVecOps::MakeFull(apTraits);
}
/*****************************************************************************
@@ -792,12 +789,7 @@ void Compiler::optPrintAssertion(AssertionDsc* curAssertion, AssertionIndex asse
if (assertionIndex > 0)
{
printf(" index=#%02u, mask=", assertionIndex);
-
- // This is an hack to reuse a known empty set in order to display
- // a single bit mask.
- BitVecOps::AddElemD(apTraits, apEmpty, assertionIndex - 1);
- printf("%s", BitVecOps::ToString(apTraits, apEmpty));
- BitVecOps::RemoveElemD(apTraits, apEmpty, assertionIndex - 1);
+ printf("%s", BitVecOps::ToString(apTraits, BitVecOps::MakeSingleton(apTraits, assertionIndex - 1)));
}
printf("\n");
}
@@ -828,7 +820,7 @@ Compiler::AssertionDsc* Compiler::optGetAssertion(AssertionIndex assertIndex)
* if they don't care about it. Refer overloaded method optCreateAssertion.
*
*/
-Compiler::AssertionIndex Compiler::optCreateAssertion(GenTreePtr op1, GenTreePtr op2, optAssertionKind assertionKind)
+AssertionIndex Compiler::optCreateAssertion(GenTreePtr op1, GenTreePtr op2, optAssertionKind assertionKind)
{
AssertionDsc assertionDsc;
return optCreateAssertion(op1, op2, assertionKind, &assertionDsc);
@@ -850,10 +842,10 @@ Compiler::AssertionIndex Compiler::optCreateAssertion(GenTreePtr op1, GenTreePtr
* NO_ASSERTION_INDEX and we could not create the assertion.
*
*/
-Compiler::AssertionIndex Compiler::optCreateAssertion(GenTreePtr op1,
- GenTreePtr op2,
- optAssertionKind assertionKind,
- AssertionDsc* assertion)
+AssertionIndex Compiler::optCreateAssertion(GenTreePtr op1,
+ GenTreePtr op2,
+ optAssertionKind assertionKind,
+ AssertionDsc* assertion)
{
memset(assertion, 0, sizeof(AssertionDsc));
//
@@ -955,12 +947,14 @@ Compiler::AssertionIndex Compiler::optCreateAssertion(GenTreePtr op1,
while (vnStore->GetVNFunc(vn, &funcAttr) && (funcAttr.m_func == (VNFunc)GT_ADD) &&
(vnStore->TypeOfVN(vn) == TYP_BYREF))
{
- if (vnStore->IsVNConstant(funcAttr.m_args[1]))
+ if (vnStore->IsVNConstant(funcAttr.m_args[1]) &&
+ varTypeIsIntegral(vnStore->TypeOfVN(funcAttr.m_args[1])))
{
offset += vnStore->CoercedConstantValue<ssize_t>(funcAttr.m_args[1]);
vn = funcAttr.m_args[0];
}
- else if (vnStore->IsVNConstant(funcAttr.m_args[0]))
+ else if (vnStore->IsVNConstant(funcAttr.m_args[0]) &&
+ varTypeIsIntegral(vnStore->TypeOfVN(funcAttr.m_args[0])))
{
offset += vnStore->CoercedConstantValue<ssize_t>(funcAttr.m_args[0]);
vn = funcAttr.m_args[1];
@@ -1491,13 +1485,15 @@ void Compiler::optPrintVnAssertionMapping()
*/
void Compiler::optAddVnAssertionMapping(ValueNum vn, AssertionIndex index)
{
- ASSERT_TP cur;
- if (!optValueNumToAsserts->Lookup(vn, &cur))
+ ASSERT_TP* cur = optValueNumToAsserts->LookupPointer(vn);
+ if (cur == nullptr)
{
- cur = optNewEmptyAssertSet();
- optValueNumToAsserts->Set(vn, cur);
+ optValueNumToAsserts->Set(vn, BitVecOps::MakeSingleton(apTraits, index - 1));
+ }
+ else
+ {
+ BitVecOps::AddElemD(apTraits, *cur, index - 1);
}
- BitVecOps::AddElemD(apTraits, cur, index - 1);
}
/*****************************************************************************
@@ -1538,7 +1534,7 @@ bool Compiler::optAssertionVnInvolvesNan(AssertionDsc* assertion)
* we use to refer to this element.
* If we need to add to the table and the table is full return the value zero
*/
-Compiler::AssertionIndex Compiler::optAddAssertion(AssertionDsc* newAssertion)
+AssertionIndex Compiler::optAddAssertion(AssertionDsc* newAssertion)
{
noway_assert(newAssertion->assertionKind != OAK_INVALID);
@@ -1745,9 +1741,9 @@ void Compiler::optCreateComplementaryAssertion(AssertionIndex assertionIndex, Ge
* for the operands.
*/
-Compiler::AssertionIndex Compiler::optCreateJtrueAssertions(GenTreePtr op1,
- GenTreePtr op2,
- Compiler::optAssertionKind assertionKind)
+AssertionIndex Compiler::optCreateJtrueAssertions(GenTreePtr op1,
+ GenTreePtr op2,
+ Compiler::optAssertionKind assertionKind)
{
AssertionDsc candidateAssertion;
AssertionIndex assertionIndex = optCreateAssertion(op1, op2, assertionKind, &candidateAssertion);
@@ -1760,7 +1756,7 @@ Compiler::AssertionIndex Compiler::optCreateJtrueAssertions(GenTreePtr
return assertionIndex;
}
-Compiler::AssertionIndex Compiler::optCreateJTrueBoundsAssertion(GenTreePtr tree)
+AssertionInfo Compiler::optCreateJTrueBoundsAssertion(GenTreePtr tree)
{
GenTreePtr relop = tree->gtGetOp1();
if ((relop->OperKind() & GTK_RELOP) == 0)
@@ -1771,6 +1767,8 @@ Compiler::AssertionIndex Compiler::optCreateJTrueBoundsAssertion(GenTreePtr tree
GenTreePtr op2 = relop->gtGetOp2();
ValueNum vn = op1->gtVNPair.GetConservative();
+
+ ValueNumStore::ArrLenUnsignedBoundInfo arrLenUnsignedBnd;
// Cases where op1 holds the condition with array arithmetic and op2 is 0.
// Loop condition like: "i < a.len +/-k == 0"
// Assertion: "i < a.len +/- k == 0"
@@ -1826,6 +1824,32 @@ Compiler::AssertionIndex Compiler::optCreateJTrueBoundsAssertion(GenTreePtr tree
optCreateComplementaryAssertion(index, nullptr, nullptr);
return index;
}
+ // Loop condition like "(uint)i < (uint)a.len" or equivalent
+ // Assertion: "no throw" since this condition guarantees that i is both >= 0 and < a.len (on the appropiate edge)
+ else if (vnStore->IsVNArrLenUnsignedBound(relop->gtVNPair.GetConservative(), &arrLenUnsignedBnd))
+ {
+ assert(arrLenUnsignedBnd.vnIdx != ValueNumStore::NoVN);
+ assert((arrLenUnsignedBnd.cmpOper == VNF_LT_UN) || (arrLenUnsignedBnd.cmpOper == VNF_GE_UN));
+ assert(vnStore->IsVNArrLen(arrLenUnsignedBnd.vnLen));
+
+ AssertionDsc dsc;
+ dsc.assertionKind = OAK_NO_THROW;
+ dsc.op1.kind = O1K_ARR_BND;
+ dsc.op1.vn = relop->gtVNPair.GetConservative();
+ dsc.op1.bnd.vnIdx = arrLenUnsignedBnd.vnIdx;
+ dsc.op1.bnd.vnLen = arrLenUnsignedBnd.vnLen;
+ dsc.op2.kind = O2K_INVALID;
+ dsc.op2.vn = ValueNumStore::NoVN;
+
+ AssertionIndex index = optAddAssertion(&dsc);
+ if (arrLenUnsignedBnd.cmpOper == VNF_GE_UN)
+ {
+ // By default JTRUE generated assertions hold on the "jump" edge. We have i >= a.len but we're really
+ // after i < a.len so we need to change the assertion edge to "next".
+ return AssertionInfo::ForNextEdge(index);
+ }
+ return index;
+ }
// Cases where op1 holds the condition bound check and op2 is 0.
// Loop condition like: "i < 100 == 0"
// Assertion: "i < 100 == false"
@@ -1870,7 +1894,7 @@ Compiler::AssertionIndex Compiler::optCreateJTrueBoundsAssertion(GenTreePtr tree
*
* Compute assertions for the JTrue node.
*/
-Compiler::AssertionIndex Compiler::optAssertionGenJtrue(GenTreePtr tree)
+AssertionInfo Compiler::optAssertionGenJtrue(GenTreePtr tree)
{
// Only create assertions for JTRUE when we are in the global phase
if (optLocalAssertionProp)
@@ -1889,10 +1913,10 @@ Compiler::AssertionIndex Compiler::optAssertionGenJtrue(GenTreePtr tree)
GenTreePtr op1 = relop->gtOp.gtOp1;
GenTreePtr op2 = relop->gtOp.gtOp2;
- AssertionIndex index = optCreateJTrueBoundsAssertion(tree);
- if (index != NO_ASSERTION_INDEX)
+ AssertionInfo info = optCreateJTrueBoundsAssertion(tree);
+ if (info.HasAssertion())
{
- return index;
+ return info;
}
// Find assertion kind.
@@ -1974,7 +1998,7 @@ Compiler::AssertionIndex Compiler::optAssertionGenJtrue(GenTreePtr tree)
* from all of the constituent phi operands.
*
*/
-Compiler::AssertionIndex Compiler::optAssertionGenPhiDefn(GenTreePtr tree)
+AssertionIndex Compiler::optAssertionGenPhiDefn(GenTreePtr tree)
{
if (!tree->IsPhiDefn())
{
@@ -2023,19 +2047,19 @@ void Compiler::optAssertionGen(GenTreePtr tree)
// For most of the assertions that we create below
// the assertion is true after the tree is processed
- bool assertionProven = true;
- AssertionIndex assertionIndex = NO_ASSERTION_INDEX;
+ bool assertionProven = true;
+ AssertionInfo assertionInfo;
switch (tree->gtOper)
{
case GT_ASG:
// VN takes care of non local assertions for assignments and data flow.
if (optLocalAssertionProp)
{
- assertionIndex = optCreateAssertion(tree->gtOp.gtOp1, tree->gtOp.gtOp2, OAK_EQUAL);
+ assertionInfo = optCreateAssertion(tree->gtOp.gtOp1, tree->gtOp.gtOp2, OAK_EQUAL);
}
else
{
- assertionIndex = optAssertionGenPhiDefn(tree);
+ assertionInfo = optAssertionGenPhiDefn(tree);
}
break;
@@ -2045,24 +2069,24 @@ void Compiler::optAssertionGen(GenTreePtr tree)
case GT_IND:
case GT_NULLCHECK:
// All indirections create non-null assertions
- assertionIndex = optCreateAssertion(tree->AsIndir()->Addr(), nullptr, OAK_NOT_EQUAL);
+ assertionInfo = optCreateAssertion(tree->AsIndir()->Addr(), nullptr, OAK_NOT_EQUAL);
break;
case GT_ARR_LENGTH:
// An array length is an indirection (but doesn't derive from GenTreeIndir).
- assertionIndex = optCreateAssertion(tree->AsArrLen()->ArrRef(), nullptr, OAK_NOT_EQUAL);
+ assertionInfo = optCreateAssertion(tree->AsArrLen()->ArrRef(), nullptr, OAK_NOT_EQUAL);
break;
case GT_ARR_BOUNDS_CHECK:
if (!optLocalAssertionProp)
{
- assertionIndex = optCreateAssertion(tree, nullptr, OAK_NO_THROW);
+ assertionInfo = optCreateAssertion(tree, nullptr, OAK_NO_THROW);
}
break;
case GT_ARR_ELEM:
// An array element reference can create a non-null assertion
- assertionIndex = optCreateAssertion(tree->gtArrElem.gtArrObj, nullptr, OAK_NOT_EQUAL);
+ assertionInfo = optCreateAssertion(tree->gtArrElem.gtArrObj, nullptr, OAK_NOT_EQUAL);
break;
case GT_CALL:
@@ -2071,7 +2095,7 @@ void Compiler::optAssertionGen(GenTreePtr tree)
if ((tree->gtFlags & GTF_CALL_NULLCHECK) || ((tree->gtFlags & GTF_CALL_VIRT_KIND_MASK) != GTF_CALL_NONVIRT))
{
// Retrieve the 'this' arg
- GenTreePtr thisArg = gtGetThisArg(tree);
+ GenTreePtr thisArg = gtGetThisArg(tree->AsCall());
#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) || defined(_TARGET_ARM_)
if (thisArg == nullptr)
{
@@ -2082,7 +2106,7 @@ void Compiler::optAssertionGen(GenTreePtr tree)
}
#endif // _TARGET_X86_ || _TARGET_AMD64_ || _TARGET_ARM_
noway_assert(thisArg != nullptr);
- assertionIndex = optCreateAssertion(thisArg, nullptr, OAK_NOT_EQUAL);
+ assertionInfo = optCreateAssertion(thisArg, nullptr, OAK_NOT_EQUAL);
}
break;
@@ -2093,13 +2117,13 @@ void Compiler::optAssertionGen(GenTreePtr tree)
// This represets an assertion that we would like to prove to be true. It is not actually a true
// assertion.
// If we can prove this assertion true then we can eliminate this cast.
- assertionIndex = optCreateAssertion(tree->gtOp.gtOp1, tree, OAK_SUBRANGE);
+ assertionInfo = optCreateAssertion(tree->gtOp.gtOp1, tree, OAK_SUBRANGE);
assertionProven = false;
}
break;
case GT_JTRUE:
- assertionIndex = optAssertionGenJtrue(tree);
+ assertionInfo = optAssertionGenJtrue(tree);
break;
default:
@@ -2108,9 +2132,9 @@ void Compiler::optAssertionGen(GenTreePtr tree)
}
// For global assertion prop we must store the assertion number in the tree node
- if ((assertionIndex != NO_ASSERTION_INDEX) && assertionProven && !optLocalAssertionProp)
+ if (assertionInfo.HasAssertion() && assertionProven && !optLocalAssertionProp)
{
- tree->SetAssertion(assertionIndex);
+ tree->SetAssertionInfo(assertionInfo);
}
}
@@ -2134,7 +2158,7 @@ void Compiler::optMapComplementary(AssertionIndex assertionIndex, AssertionIndex
* Given an assertion index, return the assertion index of the complementary
* assertion or 0 if one does not exist.
*/
-Compiler::AssertionIndex Compiler::optFindComplementary(AssertionIndex assertIndex)
+AssertionIndex Compiler::optFindComplementary(AssertionIndex assertIndex)
{
if (assertIndex == NO_ASSERTION_INDEX)
{
@@ -2177,9 +2201,7 @@ Compiler::AssertionIndex Compiler::optFindComplementary(AssertionIndex assertInd
* if one such assertion could not be found in "assertions."
*/
-Compiler::AssertionIndex Compiler::optAssertionIsSubrange(GenTreePtr tree,
- var_types toType,
- ASSERT_VALARG_TP assertions)
+AssertionIndex Compiler::optAssertionIsSubrange(GenTreePtr tree, var_types toType, ASSERT_VALARG_TP assertions)
{
if (!optLocalAssertionProp && BitVecOps::IsEmpty(apTraits, assertions))
{
@@ -2245,9 +2267,7 @@ Compiler::AssertionIndex Compiler::optAssertionIsSubrange(GenTreePtr tree,
* could not be found, then it returns NO_ASSERTION_INDEX.
*
*/
-Compiler::AssertionIndex Compiler::optAssertionIsSubtype(GenTreePtr tree,
- GenTreePtr methodTableArg,
- ASSERT_VALARG_TP assertions)
+AssertionIndex Compiler::optAssertionIsSubtype(GenTreePtr tree, GenTreePtr methodTableArg, ASSERT_VALARG_TP assertions)
{
if (!optLocalAssertionProp && BitVecOps::IsEmpty(apTraits, assertions))
{
@@ -2418,11 +2438,9 @@ GenTreePtr Compiler::optVNConstantPropOnTree(BasicBlock* block, GenTreePtr stmt,
#ifdef _TARGET_64BIT_
if (vnStore->IsVNHandle(vnCns))
{
-#ifdef RELOC_SUPPORT
// Don't perform constant folding that involves a handle that needs
// to be recorded as a relocation with the VM.
if (!opts.compReloc)
-#endif
{
newTree = gtNewIconHandleNode(value, vnStore->GetHandleFlags(vnCns));
newTree->gtVNPair = ValueNumPair(vnLib, vnCns);
@@ -2491,11 +2509,9 @@ GenTreePtr Compiler::optVNConstantPropOnTree(BasicBlock* block, GenTreePtr stmt,
#ifndef _TARGET_64BIT_
if (vnStore->IsVNHandle(vnCns))
{
-#ifdef RELOC_SUPPORT
// Don't perform constant folding that involves a handle that needs
// to be recorded as a relocation with the VM.
if (!opts.compReloc)
-#endif
{
newTree = gtNewIconHandleNode(value, vnStore->GetHandleFlags(vnCns));
newTree->gtVNPair = ValueNumPair(vnLib, vnCns);
@@ -2905,7 +2921,7 @@ GenTreePtr Compiler::optAssertionProp_LclVar(ASSERT_VALARG_TP assertions, const
* op1Kind and lclNum, op2Kind and the constant value and is either equal or
* not equal assertion.
*/
-Compiler::AssertionIndex Compiler::optLocalAssertionIsEqualOrNotEqual(
+AssertionIndex Compiler::optLocalAssertionIsEqualOrNotEqual(
optOp1Kind op1Kind, unsigned lclNum, optOp2Kind op2Kind, ssize_t cnsVal, ASSERT_VALARG_TP assertions)
{
noway_assert((op1Kind == O1K_LCLVAR) || (op1Kind == O1K_EXACT_TYPE) || (op1Kind == O1K_SUBTYPE));
@@ -2947,9 +2963,9 @@ Compiler::AssertionIndex Compiler::optLocalAssertionIsEqualOrNotEqual(
* "op1" == "op2" or "op1" != "op2." Does a value number based comparison.
*
*/
-Compiler::AssertionIndex Compiler::optGlobalAssertionIsEqualOrNotEqual(ASSERT_VALARG_TP assertions,
- GenTreePtr op1,
- GenTreePtr op2)
+AssertionIndex Compiler::optGlobalAssertionIsEqualOrNotEqual(ASSERT_VALARG_TP assertions,
+ GenTreePtr op1,
+ GenTreePtr op2)
{
if (BitVecOps::IsEmpty(apTraits, assertions))
{
@@ -3503,7 +3519,7 @@ bool Compiler::optAssertionIsNonNull(GenTreePtr op,
* from the set of "assertions."
*
*/
-Compiler::AssertionIndex Compiler::optAssertionIsNonNullInternal(GenTreePtr op, ASSERT_VALARG_TP assertions)
+AssertionIndex Compiler::optAssertionIsNonNullInternal(GenTreePtr op, ASSERT_VALARG_TP assertions)
{
// If local assertion prop use lcl comparison, else use VN comparison.
if (!optLocalAssertionProp)
@@ -3562,16 +3578,13 @@ Compiler::AssertionIndex Compiler::optAssertionIsNonNullInternal(GenTreePtr op,
* Returns the modified tree, or nullptr if no assertion prop took place.
*
*/
-GenTreePtr Compiler::optNonNullAssertionProp_Call(ASSERT_VALARG_TP assertions,
- const GenTreePtr tree,
- const GenTreePtr stmt)
+GenTreePtr Compiler::optNonNullAssertionProp_Call(ASSERT_VALARG_TP assertions, GenTreeCall* call, const GenTreePtr stmt)
{
- assert(tree->gtOper == GT_CALL);
- if ((tree->gtFlags & GTF_CALL_NULLCHECK) == 0)
+ if ((call->gtFlags & GTF_CALL_NULLCHECK) == 0)
{
return nullptr;
}
- GenTreePtr op1 = gtGetThisArg(tree);
+ GenTreePtr op1 = gtGetThisArg(call);
noway_assert(op1 != nullptr);
if (op1->gtOper != GT_LCL_VAR)
{
@@ -3589,13 +3602,13 @@ GenTreePtr Compiler::optNonNullAssertionProp_Call(ASSERT_VALARG_TP assertions,
{
(vnBased) ? printf("\nVN based non-null prop in BB%02u:\n", compCurBB->bbNum)
: printf("\nNon-null prop for index #%02u in BB%02u:\n", index, compCurBB->bbNum);
- gtDispTree(tree, nullptr, nullptr, true);
+ gtDispTree(call, nullptr, nullptr, true);
}
#endif
- tree->gtFlags &= ~GTF_CALL_NULLCHECK;
- tree->gtFlags &= ~GTF_EXCEPT;
- noway_assert(tree->gtFlags & GTF_SIDE_EFFECT);
- return tree;
+ call->gtFlags &= ~GTF_CALL_NULLCHECK;
+ call->gtFlags &= ~GTF_EXCEPT;
+ noway_assert(call->gtFlags & GTF_SIDE_EFFECT);
+ return call;
}
return nullptr;
}
@@ -3612,33 +3625,31 @@ GenTreePtr Compiler::optNonNullAssertionProp_Call(ASSERT_VALARG_TP assertions,
*
*/
-GenTreePtr Compiler::optAssertionProp_Call(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt)
+GenTreePtr Compiler::optAssertionProp_Call(ASSERT_VALARG_TP assertions, GenTreeCall* call, const GenTreePtr stmt)
{
- assert(tree->gtOper == GT_CALL);
-
- if (optNonNullAssertionProp_Call(assertions, tree, stmt))
+ if (optNonNullAssertionProp_Call(assertions, call, stmt))
{
- return optAssertionProp_Update(tree, tree, stmt);
+ return optAssertionProp_Update(call, call, stmt);
}
- else if (!optLocalAssertionProp && (tree->gtCall.gtCallType == CT_HELPER))
+ else if (!optLocalAssertionProp && (call->gtCallType == CT_HELPER))
{
- if (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_ISINSTANCEOFINTERFACE) ||
- tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_ISINSTANCEOFARRAY) ||
- tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_ISINSTANCEOFCLASS) ||
- tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_ISINSTANCEOFANY) ||
- tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_CHKCASTINTERFACE) ||
- tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_CHKCASTARRAY) ||
- tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_CHKCASTCLASS) ||
- tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_CHKCASTANY) ||
- tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_CHKCASTCLASS_SPECIAL))
+ if (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ISINSTANCEOFINTERFACE) ||
+ call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ISINSTANCEOFARRAY) ||
+ call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ISINSTANCEOFCLASS) ||
+ call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ISINSTANCEOFANY) ||
+ call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_CHKCASTINTERFACE) ||
+ call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_CHKCASTARRAY) ||
+ call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_CHKCASTCLASS) ||
+ call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_CHKCASTANY) ||
+ call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_CHKCASTCLASS_SPECIAL))
{
- GenTreePtr arg1 = gtArgEntryByArgNum(tree->AsCall(), 1)->node;
+ GenTreePtr arg1 = gtArgEntryByArgNum(call, 1)->node;
if (arg1->gtOper != GT_LCL_VAR)
{
return nullptr;
}
- GenTreePtr arg2 = gtArgEntryByArgNum(tree->AsCall(), 0)->node;
+ GenTreePtr arg2 = gtArgEntryByArgNum(call, 0)->node;
unsigned index = optAssertionIsSubtype(arg1, arg2, assertions);
if (index != NO_ASSERTION_INDEX)
@@ -3647,18 +3658,18 @@ GenTreePtr Compiler::optAssertionProp_Call(ASSERT_VALARG_TP assertions, const Ge
if (verbose)
{
printf("\nDid VN based subtype prop for index #%02u in BB%02u:\n", index, compCurBB->bbNum);
- gtDispTree(tree, nullptr, nullptr, true);
+ gtDispTree(call, nullptr, nullptr, true);
}
#endif
GenTreePtr list = nullptr;
- gtExtractSideEffList(tree, &list, GTF_SIDE_EFFECT, true);
+ gtExtractSideEffList(call, &list, GTF_SIDE_EFFECT, true);
if (list != nullptr)
{
- arg1 = gtNewOperNode(GT_COMMA, tree->TypeGet(), list, arg1);
+ arg1 = gtNewOperNode(GT_COMMA, call->TypeGet(), list, arg1);
fgSetTreeSeq(arg1);
}
- return optAssertionProp_Update(arg1, tree, stmt);
+ return optAssertionProp_Update(arg1, call, stmt);
}
}
}
@@ -3889,7 +3900,7 @@ GenTreePtr Compiler::optAssertionProp(ASSERT_VALARG_TP assertions, const GenTree
return optAssertionProp_Cast(assertions, tree, stmt);
case GT_CALL:
- return optAssertionProp_Call(assertions, tree, stmt);
+ return optAssertionProp_Call(assertions, tree->AsCall(), stmt);
case GT_EQ:
case GT_NE:
@@ -4378,15 +4389,8 @@ public:
JITDUMP("AssertionPropCallback::EndMerge : BB%02d in -> %s\n\n", block->bbNum,
BitVecOps::ToString(apTraits, block->bbAssertionIn));
- // PERF: eliminate this tmp by passing in a OperationTree (AST) to the bitset,
- // so the expr tree is operated on a single bit level. See "expression templates."
- ASSERT_TP tmp = BitVecOps::MakeCopy(apTraits, block->bbAssertionIn);
- BitVecOps::UnionD(apTraits, tmp, block->bbAssertionGen);
- BitVecOps::IntersectionD(apTraits, block->bbAssertionOut, tmp);
-
- BitVecOps::Assign(apTraits, tmp, block->bbAssertionIn);
- BitVecOps::UnionD(apTraits, tmp, mJumpDestGen[block->bbNum]);
- BitVecOps::IntersectionD(apTraits, mJumpDestOut[block->bbNum], tmp);
+ BitVecOps::DataFlowD(apTraits, block->bbAssertionOut, block->bbAssertionGen, block->bbAssertionIn);
+ BitVecOps::DataFlowD(apTraits, mJumpDestOut[block->bbNum], mJumpDestGen[block->bbNum], block->bbAssertionIn);
bool changed = (!BitVecOps::Equal(apTraits, preMergeOut, block->bbAssertionOut) ||
!BitVecOps::Equal(apTraits, preMergeJumpDestOut, mJumpDestOut[block->bbNum]));
@@ -4411,16 +4415,6 @@ public:
}
};
-ASSERT_VALRET_TP Compiler::optNewFullAssertSet()
-{
- return BitVecOps::MakeCopy(apTraits, apFull);
-}
-
-ASSERT_VALRET_TP Compiler::optNewEmptyAssertSet()
-{
- return BitVecOps::MakeCopy(apTraits, apEmpty);
-}
-
/*****************************************************************************
*
* Compute the assertions generated by each block.
@@ -4429,15 +4423,10 @@ ASSERT_TP* Compiler::optComputeAssertionGen()
{
ASSERT_TP* jumpDestGen = fgAllocateTypeForEachBlk<ASSERT_TP>();
- ASSERT_TP valueGen = BitVecOps::MakeEmpty(apTraits);
- ASSERT_TP jumpDestValueGen = BitVecOps::MakeEmpty(apTraits);
-
for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
{
- jumpDestGen[block->bbNum] = BitVecOps::MakeEmpty(apTraits);
-
- BitVecOps::ClearD(apTraits, valueGen);
- BitVecOps::ClearD(apTraits, jumpDestValueGen);
+ ASSERT_TP valueGen = BitVecOps::MakeEmpty(apTraits);
+ GenTree* jtrue = nullptr;
// Walk the statement trees in this basic block.
for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
@@ -4446,47 +4435,77 @@ ASSERT_TP* Compiler::optComputeAssertionGen()
for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
{
- // Store whatever we have accumulated into jumpDest edge's valueGen.
if (tree->gtOper == GT_JTRUE)
{
- BitVecOps::Assign(apTraits, jumpDestValueGen, valueGen);
+ // A GT_TRUE is always the last node in a tree, so we can break here
+ assert((tree->gtNext == nullptr) && (stmt->gtNext == nullptr));
+ jtrue = tree;
+ break;
}
- if (!tree->HasAssertion())
+
+ if (tree->GeneratesAssertion())
{
- continue;
+ AssertionInfo info = tree->GetAssertionInfo();
+ optImpliedAssertions(info.GetAssertionIndex(), valueGen);
+ BitVecOps::AddElemD(apTraits, valueGen, info.GetAssertionIndex() - 1);
}
+ }
+ }
+
+ if (jtrue != nullptr)
+ {
+ // Copy whatever we have accumulated into jumpDest edge's valueGen.
+ ASSERT_TP jumpDestValueGen = BitVecOps::MakeCopy(apTraits, valueGen);
+
+ if (jtrue->GeneratesAssertion())
+ {
+ AssertionInfo info = jtrue->GetAssertionInfo();
+ AssertionIndex valueAssertionIndex;
+ AssertionIndex jumpDestAssertionIndex;
- // For regular trees, just update valueGen. For GT_JTRUE, for false part,
- // update valueGen and true part update jumpDestValueGen.
- AssertionIndex assertionIndex[2] = {(AssertionIndex)tree->GetAssertion(),
- (tree->OperGet() == GT_JTRUE)
- ? optFindComplementary((AssertionIndex)tree->GetAssertion())
- : 0};
+ if (info.IsNextEdgeAssertion())
+ {
+ valueAssertionIndex = info.GetAssertionIndex();
+ jumpDestAssertionIndex = optFindComplementary(info.GetAssertionIndex());
+ }
+ else // is jump edge assertion
+ {
+ valueAssertionIndex = optFindComplementary(info.GetAssertionIndex());
+ jumpDestAssertionIndex = info.GetAssertionIndex();
+ }
- for (unsigned i = 0; i < 2; ++i)
+ if (valueAssertionIndex != NO_ASSERTION_INDEX)
{
- if (assertionIndex[i] > 0)
- {
- // If GT_JTRUE, and true part use jumpDestValueGen.
- ASSERT_TP& gen = (i == 0 && tree->OperGet() == GT_JTRUE) ? jumpDestValueGen : valueGen;
- optImpliedAssertions(assertionIndex[i], gen);
- BitVecOps::AddElemD(apTraits, gen, assertionIndex[i] - 1);
- }
+ // Update valueGen if we have an assertion for the bbNext edge
+ optImpliedAssertions(valueAssertionIndex, valueGen);
+ BitVecOps::AddElemD(apTraits, valueGen, valueAssertionIndex - 1);
+ }
+
+ if (jumpDestAssertionIndex != NO_ASSERTION_INDEX)
+ {
+ // Update jumpDestValueGen if we have an assertion for the bbJumpDest edge
+ optImpliedAssertions(jumpDestAssertionIndex, jumpDestValueGen);
+ BitVecOps::AddElemD(apTraits, jumpDestValueGen, jumpDestAssertionIndex - 1);
}
}
+
+ jumpDestGen[block->bbNum] = jumpDestValueGen;
+ }
+ else
+ {
+ jumpDestGen[block->bbNum] = BitVecOps::MakeEmpty(apTraits);
}
- BitVecOps::Assign(apTraits, block->bbAssertionGen, valueGen);
- BitVecOps::Assign(apTraits, jumpDestGen[block->bbNum], jumpDestValueGen);
+ block->bbAssertionGen = valueGen;
#ifdef DEBUG
if (verbose)
{
- printf("\nBB%02u valueGen = %s", block->bbNum, BitVecOps::ToString(apTraits, valueGen));
+ printf("\nBB%02u valueGen = %s", block->bbNum, BitVecOps::ToString(apTraits, block->bbAssertionGen));
if (block->bbJumpKind == BBJ_COND)
{
printf(" => BB%02u valueGen = %s,", block->bbJumpDest->bbNum,
- BitVecOps::ToString(apTraits, jumpDestValueGen));
+ BitVecOps::ToString(apTraits, jumpDestGen[block->bbNum]));
}
}
#endif
@@ -4509,7 +4528,7 @@ ASSERT_TP* Compiler::optInitAssertionDataflowFlags()
// apFull (i.e. all possible bits set), we need to set the bits only for valid
// assertions (note that at this point we are not creating any new assertions).
// Also note that assertion indices start from 1.
- ASSERT_TP apValidFull = optNewEmptyAssertSet();
+ ASSERT_TP apValidFull = BitVecOps::MakeEmpty(apTraits);
for (int i = 1; i <= optAssertionCount; i++)
{
BitVecOps::AddElemD(apTraits, apValidFull, i - 1);
@@ -4523,20 +4542,21 @@ ASSERT_TP* Compiler::optInitAssertionDataflowFlags()
// edges.
for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
{
- block->bbAssertionIn = optNewEmptyAssertSet();
- if (!bbIsHandlerBeg(block))
+ if (bbIsHandlerBeg(block))
+ {
+ block->bbAssertionIn = BitVecOps::MakeEmpty(apTraits);
+ }
+ else
{
- BitVecOps::Assign(apTraits, block->bbAssertionIn, apValidFull);
+ block->bbAssertionIn = BitVecOps::MakeCopy(apTraits, apValidFull);
}
- block->bbAssertionGen = optNewEmptyAssertSet();
- block->bbAssertionOut = optNewEmptyAssertSet();
- BitVecOps::Assign(apTraits, block->bbAssertionOut, apValidFull);
- jumpDestOut[block->bbNum] = optNewEmptyAssertSet();
- BitVecOps::Assign(apTraits, jumpDestOut[block->bbNum], apValidFull);
+ block->bbAssertionGen = BitVecOps::MakeEmpty(apTraits);
+ block->bbAssertionOut = BitVecOps::MakeCopy(apTraits, apValidFull);
+ jumpDestOut[block->bbNum] = BitVecOps::MakeCopy(apTraits, apValidFull);
}
// Compute the data flow values for all tracked expressions
// IN and OUT never change for the initial basic block B1
- BitVecOps::Assign(apTraits, fgFirstBB->bbAssertionIn, apEmpty);
+ BitVecOps::ClearD(apTraits, fgFirstBB->bbAssertionIn);
return jumpDestOut;
}
@@ -4839,7 +4859,7 @@ void Compiler::optVnNonNullPropCurStmt(BasicBlock* block, GenTreePtr stmt, GenTr
GenTreePtr newTree = nullptr;
if (tree->OperGet() == GT_CALL)
{
- newTree = optNonNullAssertionProp_Call(empty, tree, stmt);
+ newTree = optNonNullAssertionProp_Call(empty, tree->AsCall(), stmt);
}
else if (tree->OperIsIndir())
{
@@ -5032,10 +5052,12 @@ void Compiler::optAssertionPropMain()
}
#endif // DEBUG
+ ASSERT_TP assertions = BitVecOps::MakeEmpty(apTraits);
+
// Perform assertion propagation (and constant folding)
for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
{
- ASSERT_TP assertions = BitVecOps::MakeCopy(apTraits, block->bbAssertionIn);
+ BitVecOps::Assign(apTraits, assertions, block->bbAssertionIn);
// TODO-Review: EH successor/predecessor iteration seems broken.
// SELF_HOST_TESTS_ARM\jit\Directed\ExcepFilters\fault\fault.exe
@@ -5070,9 +5092,16 @@ void Compiler::optAssertionPropMain()
// and thus we must morph, set order, re-link
for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
{
+ if (tree->OperIs(GT_JTRUE))
+ {
+ // A GT_TRUE is always the last node in a tree, so we can break here
+ assert((tree->gtNext == nullptr) && (stmt->gtNext == nullptr));
+ break;
+ }
+
JITDUMP("Propagating %s assertions for BB%02d, stmt [%06d], tree [%06d], tree -> %d\n",
BitVecOps::ToString(apTraits, assertions), block->bbNum, dspTreeID(stmt), dspTreeID(tree),
- tree->GetAssertion());
+ tree->GetAssertionInfo().GetAssertionIndex());
GenTreePtr newTree = optAssertionProp(assertions, tree, stmt);
if (newTree)
@@ -5081,16 +5110,12 @@ void Compiler::optAssertionPropMain()
tree = newTree;
}
- // Is this an assignment to a local variable
- GenTreeLclVarCommon* lclVarTree = nullptr;
-
// If this tree makes an assertion - make it available.
- if (tree->HasAssertion())
+ if (tree->GeneratesAssertion())
{
- BitVecOps::AddElemD(apTraits, assertions, tree->GetAssertion() - 1);
-
- // Also include any implied assertions for the tree node.
- optImpliedAssertions((AssertionIndex)tree->GetAssertion(), assertions);
+ AssertionInfo info = tree->GetAssertionInfo();
+ optImpliedAssertions(info.GetAssertionIndex(), assertions);
+ BitVecOps::AddElemD(apTraits, assertions, info.GetAssertionIndex() - 1);
}
}
diff --git a/src/jit/bitset.cpp b/src/jit/bitset.cpp
index 90ef253199..785061f44c 100644
--- a/src/jit/bitset.cpp
+++ b/src/jit/bitset.cpp
@@ -98,9 +98,9 @@ void BitSetSupport::RunTests(Env env)
class TestBitSetTraits
{
public:
- static IAllocator* GetAllocator(IAllocator* alloc)
+ static void* Alloc(IAllocator* alloc, size_t byteSize)
{
- return alloc;
+ return alloc->Alloc(byteSize);
}
static unsigned GetSize(IAllocator* alloc)
{
diff --git a/src/jit/bitsetasshortlong.h b/src/jit/bitsetasshortlong.h
index ec437e189c..163cb366cb 100644
--- a/src/jit/bitsetasshortlong.h
+++ b/src/jit/bitsetasshortlong.h
@@ -38,6 +38,7 @@ private:
static BitSetShortLongRep MakeCopyLong(Env env, BitSetShortLongRep bs);
static bool IsEmptyLong(Env env, BitSetShortLongRep bs);
static unsigned CountLong(Env env, BitSetShortLongRep bs);
+ static bool IsEmptyUnionLong(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2);
static void UnionDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2);
static void DiffDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2);
static void AddElemDLong(Env env, BitSetShortLongRep& bs, unsigned i);
@@ -51,6 +52,15 @@ private:
static bool IsSubsetLong(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2);
static bool IsEmptyIntersectionLong(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2);
static void IntersectionDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2);
+ static void DataFlowDLong(Env env,
+ BitSetShortLongRep& out,
+ const BitSetShortLongRep gen,
+ const BitSetShortLongRep in);
+ static void LivenessDLong(Env env,
+ BitSetShortLongRep& in,
+ const BitSetShortLongRep def,
+ const BitSetShortLongRep use,
+ const BitSetShortLongRep out);
#ifdef DEBUG
static const char* ToStringLong(Env env, BitSetShortLongRep bs);
#endif
@@ -176,6 +186,18 @@ public:
}
}
+ static bool IsEmptyUnion(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2)
+ {
+ if (IsShort(env))
+ {
+ return (((size_t)bs1) | ((size_t)bs2)) == 0;
+ }
+ else
+ {
+ return IsEmptyUnionLong(env, bs1, bs2);
+ }
+ }
+
static void UnionD(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2)
{
if (IsShort(env))
@@ -299,6 +321,34 @@ public:
}
}
+ static void DataFlowD(Env env, BitSetShortLongRep& out, const BitSetShortLongRep gen, const BitSetShortLongRep in)
+ {
+ if (IsShort(env))
+ {
+ (size_t&)out = (size_t)out & ((size_t)gen | (size_t)in);
+ }
+ else
+ {
+ DataFlowDLong(env, out, gen, in);
+ }
+ }
+
+ static void LivenessD(Env env,
+ BitSetShortLongRep& in,
+ const BitSetShortLongRep def,
+ const BitSetShortLongRep use,
+ const BitSetShortLongRep out)
+ {
+ if (IsShort(env))
+ {
+ (size_t&)in = (size_t)use | ((size_t)out & ~(size_t)def);
+ }
+ else
+ {
+ LivenessDLong(env, in, def, use, out);
+ }
+ }
+
static bool IsSubset(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2)
{
if (IsShort(env))
@@ -332,14 +382,13 @@ public:
if (IsShort(env))
{
assert(sizeof(BitSetShortLongRep) == sizeof(size_t));
- IAllocator* alloc = BitSetTraits::GetDebugOnlyAllocator(env);
- const int CharsForSizeT = sizeof(size_t) * 2;
- char* res = nullptr;
- const int ShortAllocSize = CharsForSizeT + 4;
- res = (char*)alloc->Alloc(ShortAllocSize);
- size_t bits = (size_t)bs;
- unsigned remaining = ShortAllocSize;
- char* ptr = res;
+ const int CharsForSizeT = sizeof(size_t) * 2;
+ char* res = nullptr;
+ const int ShortAllocSize = CharsForSizeT + 4;
+ res = (char*)BitSetTraits::DebugAlloc(env, ShortAllocSize);
+ size_t bits = (size_t)bs;
+ unsigned remaining = ShortAllocSize;
+ char* ptr = res;
if (sizeof(size_t) == sizeof(int64_t))
{
sprintf_s(ptr, remaining, "%016llX", bits);
@@ -629,7 +678,7 @@ BitSetShortLongRep BitSetOps</*BitSetType*/ BitSetShortLongRep,
assert(!IsShort(env));
unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
assert(len > 1); // Or else would not require an array.
- return (BitSetShortLongRep)(BitSetTraits::GetAllocator(env)->Alloc(len * sizeof(size_t)));
+ return (BitSetShortLongRep)(BitSetTraits::Alloc(env, len * sizeof(size_t)));
}
template <typename Env, typename BitSetTraits>
@@ -641,7 +690,7 @@ BitSetShortLongRep BitSetOps</*BitSetType*/ BitSetShortLongRep,
assert(!IsShort(env));
unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
assert(len > 1); // Or else would not require an array.
- BitSetShortLongRep res = (BitSetShortLongRep)(BitSetTraits::GetAllocator(env)->Alloc(len * sizeof(size_t)));
+ BitSetShortLongRep res = (BitSetShortLongRep)(BitSetTraits::Alloc(env, len * sizeof(size_t)));
for (unsigned i = 0; i < len; i++)
{
res[i] = 0;
@@ -658,7 +707,7 @@ BitSetShortLongRep BitSetOps</*BitSetType*/ BitSetShortLongRep,
assert(!IsShort(env));
unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
assert(len > 1); // Or else would not require an array.
- BitSetShortLongRep res = (BitSetShortLongRep)(BitSetTraits::GetAllocator(env)->Alloc(len * sizeof(size_t)));
+ BitSetShortLongRep res = (BitSetShortLongRep)(BitSetTraits::Alloc(env, len * sizeof(size_t)));
for (unsigned i = 0; i < len - 1; i++)
{
res[i] = size_t(-1);
@@ -722,6 +771,59 @@ template <typename Env, typename BitSetTraits>
bool BitSetOps</*BitSetType*/ BitSetShortLongRep,
/*Brand*/ BSShortLong,
/*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>::IsEmptyUnionLong(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2)
+{
+ assert(!IsShort(env));
+ unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+ for (unsigned i = 0; i < len; i++)
+ {
+ if ((bs1[i] | bs2[i]) != 0)
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+template <typename Env, typename BitSetTraits>
+void BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>::DataFlowDLong(Env env,
+ BitSetShortLongRep& out,
+ const BitSetShortLongRep gen,
+ const BitSetShortLongRep in)
+{
+ assert(!IsShort(env));
+ unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+ for (unsigned i = 0; i < len; i++)
+ {
+ out[i] = out[i] & (gen[i] | in[i]);
+ }
+}
+
+template <typename Env, typename BitSetTraits>
+void BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Env,
+ /*BitSetTraits*/ BitSetTraits>::LivenessDLong(Env env,
+ BitSetShortLongRep& in,
+ const BitSetShortLongRep def,
+ const BitSetShortLongRep use,
+ const BitSetShortLongRep out)
+{
+ assert(!IsShort(env));
+ unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+ for (unsigned i = 0; i < len; i++)
+ {
+ in[i] = use[i] | (out[i] & ~def[i]);
+ }
+}
+
+template <typename Env, typename BitSetTraits>
+bool BitSetOps</*BitSetType*/ BitSetShortLongRep,
+ /*Brand*/ BSShortLong,
+ /*Env*/ Env,
/*BitSetTraits*/ BitSetTraits>::EqualLong(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2)
{
assert(!IsShort(env));
@@ -762,13 +864,12 @@ const char* BitSetOps</*BitSetType*/ BitSetShortLongRep,
/*BitSetTraits*/ BitSetTraits>::ToStringLong(Env env, BitSetShortLongRep bs)
{
assert(!IsShort(env));
- unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
- const int CharsForSizeT = sizeof(size_t) * 2;
- unsigned allocSz = len * CharsForSizeT + 4;
- unsigned remaining = allocSz;
- IAllocator* alloc = BitSetTraits::GetDebugOnlyAllocator(env);
- char* res = (char*)alloc->Alloc(allocSz);
- char* temp = res;
+ unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+ const int CharsForSizeT = sizeof(size_t) * 2;
+ unsigned allocSz = len * CharsForSizeT + 4;
+ unsigned remaining = allocSz;
+ char* res = (char*)BitSetTraits::DebugAlloc(env, allocSz);
+ char* temp = res;
for (unsigned i = len; 0 < i; i--)
{
size_t bits = bs[i - 1];
diff --git a/src/jit/block.h b/src/jit/block.h
index 786b83178f..752219bdb7 100644
--- a/src/jit/block.h
+++ b/src/jit/block.h
@@ -47,27 +47,28 @@ typedef BitVec_ValRet_T ASSERT_VALRET_TP;
* of the following enumeration.
*/
+// clang-format off
+
DECLARE_TYPED_ENUM(BBjumpKinds, BYTE)
{
- BBJ_EHFINALLYRET, // block ends with 'endfinally' (for finally or fault)
- BBJ_EHFILTERRET, // block ends with 'endfilter'
- BBJ_EHCATCHRET, // block ends with a leave out of a catch (only #if FEATURE_EH_FUNCLETS)
- BBJ_THROW, // block ends with 'throw'
- BBJ_RETURN, // block ends with 'ret'
-
- BBJ_NONE, // block flows into the next one (no jump)
-
- BBJ_ALWAYS, // block always jumps to the target
- BBJ_LEAVE, // block always jumps to the target, maybe out of guarded
- // region. Used temporarily until importing
- BBJ_CALLFINALLY, // block always calls the target finally
- BBJ_COND, // block conditionally jumps to the target
- BBJ_SWITCH, // block ends with a switch statement
-
- BBJ_COUNT
+ BBJ_EHFINALLYRET,// block ends with 'endfinally' (for finally or fault)
+ BBJ_EHFILTERRET, // block ends with 'endfilter'
+ BBJ_EHCATCHRET, // block ends with a leave out of a catch (only #if FEATURE_EH_FUNCLETS)
+ BBJ_THROW, // block ends with 'throw'
+ BBJ_RETURN, // block ends with 'ret'
+ BBJ_NONE, // block flows into the next one (no jump)
+ BBJ_ALWAYS, // block always jumps to the target
+ BBJ_LEAVE, // block always jumps to the target, maybe out of guarded region. Only used until importing.
+ BBJ_CALLFINALLY, // block always calls the target finally
+ BBJ_COND, // block conditionally jumps to the target
+ BBJ_SWITCH, // block ends with a switch statement
+
+ BBJ_COUNT
}
END_DECLARE_TYPED_ENUM(BBjumpKinds, BYTE)
+// clang-format on
+
struct GenTree;
struct GenTreeStmt;
struct BasicBlock;
@@ -377,66 +378,81 @@ struct BasicBlock : private LIR::Range
unsigned bbRefs; // number of blocks that can reach here, either by fall-through or a branch. If this falls to zero,
// the block is unreachable.
-#define BBF_VISITED 0x00000001 // BB visited during optimizations
-#define BBF_MARKED 0x00000002 // BB marked during optimizations
-#define BBF_CHANGED 0x00000004 // input/output of this block has changed
-#define BBF_REMOVED 0x00000008 // BB has been removed from bb-list
+// clang-format off
-#define BBF_DONT_REMOVE 0x00000010 // BB should not be removed during flow graph optimizations
-#define BBF_IMPORTED 0x00000020 // BB byte-code has been imported
-#define BBF_INTERNAL 0x00000040 // BB has been added by the compiler
+#define BBF_VISITED 0x00000001 // BB visited during optimizations
+#define BBF_MARKED 0x00000002 // BB marked during optimizations
+#define BBF_CHANGED 0x00000004 // input/output of this block has changed
+#define BBF_REMOVED 0x00000008 // BB has been removed from bb-list
+
+#define BBF_DONT_REMOVE 0x00000010 // BB should not be removed during flow graph optimizations
+#define BBF_IMPORTED 0x00000020 // BB byte-code has been imported
+#define BBF_INTERNAL 0x00000040 // BB has been added by the compiler
#define BBF_FAILED_VERIFICATION 0x00000080 // BB has verification exception
-#define BBF_TRY_BEG 0x00000100 // BB starts a 'try' block
-#define BBF_FUNCLET_BEG 0x00000200 // BB is the beginning of a funclet
-#define BBF_HAS_NULLCHECK 0x00000400 // BB contains a null check
-#define BBF_NEEDS_GCPOLL 0x00000800 // This BB is the source of a back edge and needs a GC Poll
-
-#define BBF_RUN_RARELY 0x00001000 // BB is rarely run (catch clauses, blocks with throws etc)
-#define BBF_LOOP_HEAD 0x00002000 // BB is the head of a loop
-#define BBF_LOOP_CALL0 0x00004000 // BB starts a loop that sometimes won't call
-#define BBF_LOOP_CALL1 0x00008000 // BB starts a loop that will always call
-
-#define BBF_HAS_LABEL 0x00010000 // BB needs a label
-#define BBF_JMP_TARGET 0x00020000 // BB is a target of an implicit/explicit jump
-#define BBF_HAS_JMP 0x00040000 // BB executes a JMP instruction (instead of return)
-#define BBF_GC_SAFE_POINT 0x00080000 // BB has a GC safe point (a call). More abstractly, BB does not
- // require a (further) poll -- this may be because this BB has a
- // call, or, in some cases, because the BB occurs in a loop, and
- // we've determined that all paths in the loop body leading to BB
- // include a call.
-#define BBF_HAS_VTABREF 0x00100000 // BB contains reference of vtable
-#define BBF_HAS_IDX_LEN 0x00200000 // BB contains simple index or length expressions on an array local var.
-#define BBF_HAS_NEWARRAY 0x00400000 // BB contains 'new' of an array
-#define BBF_HAS_NEWOBJ 0x00800000 // BB contains 'new' of an object type.
+#define BBF_TRY_BEG 0x00000100 // BB starts a 'try' block
+#define BBF_FUNCLET_BEG 0x00000200 // BB is the beginning of a funclet
+#define BBF_HAS_NULLCHECK 0x00000400 // BB contains a null check
+#define BBF_NEEDS_GCPOLL 0x00000800 // This BB is the source of a back edge and needs a GC Poll
+
+#define BBF_RUN_RARELY 0x00001000 // BB is rarely run (catch clauses, blocks with throws etc)
+#define BBF_LOOP_HEAD 0x00002000 // BB is the head of a loop
+#define BBF_LOOP_CALL0 0x00004000 // BB starts a loop that sometimes won't call
+#define BBF_LOOP_CALL1 0x00008000 // BB starts a loop that will always call
+
+#define BBF_HAS_LABEL 0x00010000 // BB needs a label
+#define BBF_JMP_TARGET 0x00020000 // BB is a target of an implicit/explicit jump
+#define BBF_HAS_JMP 0x00040000 // BB executes a JMP instruction (instead of return)
+#define BBF_GC_SAFE_POINT 0x00080000 // BB has a GC safe point (a call). More abstractly, BB does not require a
+ // (further) poll -- this may be because this BB has a call, or, in some
+ // cases, because the BB occurs in a loop, and we've determined that all
+ // paths in the loop body leading to BB include a call.
+
+#define BBF_HAS_VTABREF 0x00100000 // BB contains reference of vtable
+#define BBF_HAS_IDX_LEN 0x00200000 // BB contains simple index or length expressions on an array local var.
+#define BBF_HAS_NEWARRAY 0x00400000 // BB contains 'new' of an array
+#define BBF_HAS_NEWOBJ 0x00800000 // BB contains 'new' of an object type.
#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
-#define BBF_FINALLY_TARGET 0x01000000 // BB is the target of a finally return: where a finally will return during
- // non-exceptional flow. Because the ARM calling sequence for calling a
- // finally explicitly sets the return address to the finally target and jumps
- // to the finally, instead of using a call instruction, ARM needs this to
- // generate correct code at the finally target, to allow for proper stack
- // unwind from within a non-exceptional call to a finally.
-#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
-#define BBF_BACKWARD_JUMP 0x02000000 // BB is surrounded by a backward jump/switch arc
-#define BBF_RETLESS_CALL 0x04000000 // BBJ_CALLFINALLY that will never return (and therefore, won't need a paired
- // BBJ_ALWAYS); see isBBCallAlwaysPair().
-#define BBF_LOOP_PREHEADER 0x08000000 // BB is a loop preheader block
-
-#define BBF_COLD 0x10000000 // BB is cold
-#define BBF_PROF_WEIGHT 0x20000000 // BB weight is computed from profile data
+
+#define BBF_FINALLY_TARGET 0x01000000 // BB is the target of a finally return: where a finally will return during
+ // non-exceptional flow. Because the ARM calling sequence for calling a
+ // finally explicitly sets the return address to the finally target and jumps
+ // to the finally, instead of using a call instruction, ARM needs this to
+ // generate correct code at the finally target, to allow for proper stack
+ // unwind from within a non-exceptional call to a finally.
+
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+#define BBF_BACKWARD_JUMP 0x02000000 // BB is surrounded by a backward jump/switch arc
+#define BBF_RETLESS_CALL 0x04000000 // BBJ_CALLFINALLY that will never return (and therefore, won't need a paired
+ // BBJ_ALWAYS); see isBBCallAlwaysPair().
+#define BBF_LOOP_PREHEADER 0x08000000 // BB is a loop preheader block
+
+#define BBF_COLD 0x10000000 // BB is cold
+#define BBF_PROF_WEIGHT 0x20000000 // BB weight is computed from profile data
+
#ifdef LEGACY_BACKEND
-#define BBF_FORWARD_SWITCH 0x40000000 // Aux flag used in FP codegen to know if a jmptable entry has been forwarded
-#else // !LEGACY_BACKEND
-#define BBF_IS_LIR 0x40000000 // Set if the basic block contains LIR (as opposed to HIR)
-#endif // LEGACY_BACKEND
-#define BBF_KEEP_BBJ_ALWAYS 0x80000000 // A special BBJ_ALWAYS block, used by EH code generation. Keep the jump kind
- // as BBJ_ALWAYS. Used for the paired BBJ_ALWAYS block following the
- // BBJ_CALLFINALLY block, as well as, on x86, the final step block out of a
- // finally.
-
-#define BBF_CLONED_FINALLY_BEGIN 0x100000000 // First block of a cloned finally region
-#define BBF_CLONED_FINALLY_END 0x200000000 // Last block of a cloned finally region
+
+#define BBF_FORWARD_SWITCH 0x40000000 // Aux flag used in FP codegen to know if a jmptable entry has been forwarded
+
+#else // !LEGACY_BACKEND
+
+#define BBF_IS_LIR 0x40000000 // Set if the basic block contains LIR (as opposed to HIR)
+
+#endif // LEGACY_BACKEND
+
+#define BBF_KEEP_BBJ_ALWAYS 0x80000000 // A special BBJ_ALWAYS block, used by EH code generation. Keep the jump kind
+ // as BBJ_ALWAYS. Used for the paired BBJ_ALWAYS block following the
+ // BBJ_CALLFINALLY block, as well as, on x86, the final step block out of a
+ // finally.
+
+#define BBF_CLONED_FINALLY_BEGIN 0x100000000 // First block of a cloned finally region
+#define BBF_CLONED_FINALLY_END 0x200000000 // Last block of a cloned finally region
+
+// clang-format on
+
+#define BBF_DOMINATED_BY_EXCEPTIONAL_ENTRY 0x400000000 // Block is dominated by exceptional entry.
// Flags that relate blocks to loop structure.
@@ -518,22 +534,39 @@ struct BasicBlock : private LIR::Range
weight_t bbWeight; // The dynamic execution weight of this block
+ // getCalledCount -- get the value used to normalize weights for this method
+ weight_t getCalledCount(Compiler* comp);
+
// getBBWeight -- get the normalized weight of this block
- unsigned getBBWeight(Compiler* comp);
+ weight_t getBBWeight(Compiler* comp);
+
+ // hasProfileWeight -- Returns true if this block's weight came from profile data
+ bool hasProfileWeight() const
+ {
+ return ((this->bbFlags & BBF_PROF_WEIGHT) != 0);
+ }
- // setBBWeight -- if the block weight is not derived from a profile, then set the weight to the input
- // weight, but make sure to not overflow BB_MAX_WEIGHT
- void setBBWeight(unsigned weight)
+ // setBBWeight -- if the block weight is not derived from a profile,
+ // then set the weight to the input weight, making sure to not overflow BB_MAX_WEIGHT
+ // Note to set the weight from profile data, instead use setBBProfileWeight
+ void setBBWeight(weight_t weight)
{
- if (!(this->bbFlags & BBF_PROF_WEIGHT))
+ if (!hasProfileWeight())
{
this->bbWeight = min(weight, BB_MAX_WEIGHT);
}
}
+ // setBBProfileWeight -- Set the profile-derived weight for a basic block
+ void setBBProfileWeight(unsigned weight)
+ {
+ this->bbFlags |= BBF_PROF_WEIGHT;
+ this->bbWeight = weight;
+ }
+
// modifyBBWeight -- same as setBBWeight, but also make sure that if the block is rarely run, it stays that
// way, and if it's not rarely run then its weight never drops below 1.
- void modifyBBWeight(unsigned weight)
+ void modifyBBWeight(weight_t weight)
{
if (this->bbWeight != BB_ZERO_WEIGHT)
{
@@ -541,20 +574,12 @@ struct BasicBlock : private LIR::Range
}
}
- // setBBProfileWeight -- Set the profile-derived weight for a basic block
- void setBBProfileWeight(unsigned weight)
- {
- this->bbFlags |= BBF_PROF_WEIGHT;
- // Check if the multiplication by BB_UNITY_WEIGHT will overflow.
- this->bbWeight = (weight <= BB_MAX_WEIGHT / BB_UNITY_WEIGHT) ? weight * BB_UNITY_WEIGHT : BB_MAX_WEIGHT;
- }
-
// this block will inherit the same weight and relevant bbFlags as bSrc
void inheritWeight(BasicBlock* bSrc)
{
this->bbWeight = bSrc->bbWeight;
- if (bSrc->bbFlags & BBF_PROF_WEIGHT)
+ if (bSrc->hasProfileWeight())
{
this->bbFlags |= BBF_PROF_WEIGHT;
}
@@ -868,12 +893,6 @@ struct BasicBlock : private LIR::Range
unsigned bbDfsNum; // The index of this block in DFS reverse post order
// relative to the flow graph.
-#if ASSERTION_PROP
- // A set of blocks which dominate this one *except* the normal entry block. This is lazily initialized
- // and used only by Assertion Prop, intersected with fgEnterBlks!
- BlockSet bbDoms;
-#endif
-
IL_OFFSET bbCodeOffs; // IL offset of the beginning of the block
IL_OFFSET bbCodeOffsEnd; // IL offset past the end of the block. Thus, the [bbCodeOffs..bbCodeOffsEnd)
// range is not inclusive of the end offset. The count of IL bytes in the block
@@ -945,12 +964,6 @@ struct BasicBlock : private LIR::Range
};
union {
-#if ASSERTION_PROP
- ASSERT_TP bbAssertionKill; // value assignments killed by block
-#endif
- };
-
- union {
EXPSET_TP bbCseIn; // CSEs available on entry
#if ASSERTION_PROP
ASSERT_TP bbAssertionIn; // value assignments available on entry
@@ -1050,7 +1063,6 @@ struct BasicBlock : private LIR::Range
GenTreeStmt* firstStmt() const;
GenTreeStmt* lastStmt() const;
- GenTreeStmt* lastTopLevelStmt();
GenTree* firstNode();
GenTree* lastNode();
@@ -1074,13 +1086,7 @@ struct BasicBlock : private LIR::Range
GenTree* FirstNonPhiDefOrCatchArgAsg();
BasicBlock()
- :
-#if ASSERTION_PROP
- BLOCKSET_INIT_NOCOPY(bbDoms, BlockSetOps::UninitVal())
- ,
-#endif // ASSERTION_PROP
- VARSET_INIT_NOCOPY(bbLiveIn, VarSetOps::UninitVal())
- , VARSET_INIT_NOCOPY(bbLiveOut, VarSetOps::UninitVal())
+ : VARSET_INIT_NOCOPY(bbLiveIn, VarSetOps::UninitVal()), VARSET_INIT_NOCOPY(bbLiveOut, VarSetOps::UninitVal())
{
}
@@ -1167,6 +1173,16 @@ public:
void MakeLIR(GenTree* firstNode, GenTree* lastNode);
bool IsLIR();
+
+ void SetDominatedByExceptionalEntryFlag()
+ {
+ bbFlags |= BBF_DOMINATED_BY_EXCEPTIONAL_ENTRY;
+ }
+
+ bool IsDominatedByExceptionalEntryFlag()
+ {
+ return (bbFlags & BBF_DOMINATED_BY_EXCEPTIONAL_ENTRY) != 0;
+ }
};
template <>
diff --git a/src/jit/codegen.h b/src/jit/codegen.h
index 090283ee50..e50e6405e0 100755
--- a/src/jit/codegen.h
+++ b/src/jit/codegen.h
@@ -162,6 +162,36 @@ private:
//
unsigned genStackLevel;
+ void SubtractStackLevel(unsigned adjustment)
+ {
+ assert(genStackLevel >= adjustment);
+ unsigned newStackLevel = genStackLevel - adjustment;
+ if (genStackLevel != newStackLevel)
+ {
+ JITDUMP("Adjusting stack level from %d to %d\n", genStackLevel, newStackLevel);
+ }
+ genStackLevel = newStackLevel;
+ }
+
+ void AddStackLevel(unsigned adjustment)
+ {
+ unsigned newStackLevel = genStackLevel + adjustment;
+ if (genStackLevel != newStackLevel)
+ {
+ JITDUMP("Adjusting stack level from %d to %d\n", genStackLevel, newStackLevel);
+ }
+ genStackLevel = newStackLevel;
+ }
+
+ void SetStackLevel(unsigned newStackLevel)
+ {
+ if (genStackLevel != newStackLevel)
+ {
+ JITDUMP("Setting stack level from %d to %d\n", genStackLevel, newStackLevel);
+ }
+ genStackLevel = newStackLevel;
+ }
+
#if STACK_PROBES
// Stack Probes
bool genNeedPrologStackProbe;
@@ -416,20 +446,30 @@ protected:
void genPrologPadForReJit();
+ // clang-format off
void genEmitCall(int callType,
CORINFO_METHOD_HANDLE methHnd,
- INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) void* addr X86_ARG(ssize_t argSize),
- emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
- IL_OFFSETX ilOffset,
- regNumber base = REG_NA,
- bool isJump = false,
- bool isNoGC = false);
-
+ INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo)
+ void* addr
+ X86_ARG(ssize_t argSize),
+ emitAttr retSize
+ MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+ IL_OFFSETX ilOffset,
+ regNumber base = REG_NA,
+ bool isJump = false,
+ bool isNoGC = false);
+ // clang-format on
+
+ // clang-format off
void genEmitCall(int callType,
CORINFO_METHOD_HANDLE methHnd,
- INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) GenTreeIndir* indir X86_ARG(ssize_t argSize),
- emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
- IL_OFFSETX ilOffset);
+ INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo)
+ GenTreeIndir* indir
+ X86_ARG(ssize_t argSize),
+ emitAttr retSize
+ MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+ IL_OFFSETX ilOffset);
+ // clang-format on
//
// Epilog functions
@@ -470,6 +510,9 @@ protected:
void genSetPSPSym(regNumber initReg, bool* pInitRegZeroed);
void genUpdateCurrentFunclet(BasicBlock* block);
+#if defined(_TARGET_ARM_)
+ void genInsertNopForUnwinder(BasicBlock* block);
+#endif
#else // FEATURE_EH_FUNCLETS
@@ -479,6 +522,13 @@ protected:
return;
}
+#if defined(_TARGET_ARM_)
+ void genInsertNopForUnwinder(BasicBlock* block)
+ {
+ return;
+ }
+#endif
+
#endif // FEATURE_EH_FUNCLETS
void genGeneratePrologsAndEpilogs();
@@ -810,8 +860,8 @@ public:
instruction ins, regNumber reg, TempDsc* tmp, unsigned ofs, var_types type, emitAttr size = EA_UNKNOWN);
void inst_FS_ST(instruction ins, emitAttr size, TempDsc* tmp, unsigned ofs);
- void instEmit_indCall(GenTreePtr call,
- size_t argSize,
+ void instEmit_indCall(GenTreeCall* call,
+ size_t argSize,
emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize));
void instEmit_RM(instruction ins, GenTreePtr tree, GenTreePtr addr, unsigned offs);
diff --git a/src/jit/codegenarm.cpp b/src/jit/codegenarm.cpp
index 81f5889e3f..41bd8040ac 100644
--- a/src/jit/codegenarm.cpp
+++ b/src/jit/codegenarm.cpp
@@ -24,85 +24,47 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "emit.h"
//------------------------------------------------------------------------
-// genSetRegToIcon: Generate code that will set the given register to the integer constant.
-//
-void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
-{
- // Reg cannot be a FP reg
- assert(!genIsValidFloatReg(reg));
-
- // The only TYP_REF constant that can come this path is a managed 'null' since it is not
- // relocatable. Other ref type constants (e.g. string objects) go through a different
- // code path.
- noway_assert(type != TYP_REF || val == 0);
-
- instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
-}
-
-//------------------------------------------------------------------------
-// genEmitGSCookieCheck: Generate code to check that the GS cookie wasn't thrashed by a buffer overrun.
-//
-void CodeGen::genEmitGSCookieCheck(bool pushReg)
-{
- NYI("ARM genEmitGSCookieCheck");
-}
-
-//------------------------------------------------------------------------
// genCallFinally: Generate a call to the finally block.
//
BasicBlock* CodeGen::genCallFinally(BasicBlock* block)
{
- NYI("ARM genCallFinally");
- return block;
+ BasicBlock* bbFinallyRet = nullptr;
+
+ // We don't have retless calls, since we use the BBJ_ALWAYS to point at a NOP pad where
+ // we would have otherwise created retless calls.
+ assert(block->isBBCallAlwaysPair());
+
+ assert(block->bbNext != NULL);
+ assert(block->bbNext->bbJumpKind == BBJ_ALWAYS);
+ assert(block->bbNext->bbJumpDest != NULL);
+ assert(block->bbNext->bbJumpDest->bbFlags & BBF_FINALLY_TARGET);
+
+ bbFinallyRet = block->bbNext->bbJumpDest;
+ bbFinallyRet->bbFlags |= BBF_JMP_TARGET;
+
+ // Load the address where the finally funclet should return into LR.
+ // The funclet prolog/epilog will do "push {lr}" / "pop {pc}" to do the return.
+ getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR);
+ getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR);
+
+ // Jump to the finally BB
+ inst_JMP(EJ_jmp, block->bbJumpDest);
+
+ // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
+ // jump target using bbJumpDest - that is already used to point
+ // to the finally block. So just skip past the BBJ_ALWAYS unless the
+ // block is RETLESS.
+ assert(!(block->bbFlags & BBF_RETLESS_CALL));
+ assert(block->isBBCallAlwaysPair());
+ return block->bbNext;
}
//------------------------------------------------------------------------
// genEHCatchRet:
void CodeGen::genEHCatchRet(BasicBlock* block)
{
- NYI("ARM genEHCatchRet");
-}
-
-//---------------------------------------------------------------------
-// genIntrinsic - generate code for a given intrinsic
-//
-// Arguments
-// treeNode - the GT_INTRINSIC node
-//
-// Return value:
-// None
-//
-void CodeGen::genIntrinsic(GenTreePtr treeNode)
-{
- // Both operand and its result must be of the same floating point type.
- GenTreePtr srcNode = treeNode->gtOp.gtOp1;
- assert(varTypeIsFloating(srcNode));
- assert(srcNode->TypeGet() == treeNode->TypeGet());
-
- // Right now only Abs/Round/Sqrt are treated as math intrinsics.
- //
- switch (treeNode->gtIntrinsic.gtIntrinsicId)
- {
- case CORINFO_INTRINSIC_Abs:
- genConsumeOperands(treeNode->AsOp());
- getEmitter()->emitInsBinary(INS_vabs, emitTypeSize(treeNode), treeNode, srcNode);
- break;
-
- case CORINFO_INTRINSIC_Round:
- NYI_ARM("genIntrinsic for round - not implemented yet");
- break;
-
- case CORINFO_INTRINSIC_Sqrt:
- genConsumeOperands(treeNode->AsOp());
- getEmitter()->emitInsBinary(INS_vsqrt, emitTypeSize(treeNode), treeNode, srcNode);
- break;
-
- default:
- assert(!"genIntrinsic: Unsupported intrinsic");
- unreached();
- }
-
- genProduceReg(treeNode);
+ getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_INTRET);
+ getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_INTRET);
}
//------------------------------------------------------------------------
@@ -243,6 +205,131 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
}
//------------------------------------------------------------------------
+// genCodeForBinary: Generate code for many binary arithmetic operators
+// This method is expected to have called genConsumeOperands() before calling it.
+//
+// Arguments:
+// treeNode - The binary operation for which we are generating code.
+//
+// Return Value:
+// None.
+//
+// Notes:
+// Mul and div are not handled here.
+// See the assert below for the operators that are handled.
+
+void CodeGen::genCodeForBinary(GenTree* treeNode)
+{
+ const genTreeOps oper = treeNode->OperGet();
+ regNumber targetReg = treeNode->gtRegNum;
+ var_types targetType = treeNode->TypeGet();
+ emitter* emit = getEmitter();
+
+ assert(oper == GT_ADD || oper == GT_SUB || oper == GT_ADD_LO || oper == GT_ADD_HI || oper == GT_SUB_LO ||
+ oper == GT_SUB_HI || oper == GT_OR || oper == GT_XOR || oper == GT_AND);
+
+ if ((oper == GT_ADD || oper == GT_SUB || oper == GT_ADD_HI || oper == GT_SUB_HI) && treeNode->gtOverflow())
+ {
+ // This is also checked in the importer.
+ NYI("Overflow not yet implemented");
+ }
+
+ GenTreePtr op1 = treeNode->gtGetOp1();
+ GenTreePtr op2 = treeNode->gtGetOp2();
+
+ instruction ins = genGetInsForOper(oper, targetType);
+
+ // The arithmetic node must be sitting in a register (since it's not contained)
+ noway_assert(targetReg != REG_NA);
+
+ if ((oper == GT_ADD_LO || oper == GT_SUB_LO))
+ {
+ // During decomposition, all operands become reg
+ assert(!op1->isContained() && !op2->isContained());
+ emit->emitIns_R_R_R(ins, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum, op2->gtRegNum,
+ INS_FLAGS_SET);
+ }
+ else
+ {
+ regNumber r = emit->emitInsTernary(ins, emitTypeSize(treeNode), treeNode, op1, op2);
+ assert(r == targetReg);
+ }
+
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genReturn: Generates code for return statement.
+// In case of struct return, delegates to the genStructReturn method.
+//
+// Arguments:
+// treeNode - The GT_RETURN or GT_RETFILT tree node.
+//
+// Return Value:
+// None
+//
+void CodeGen::genReturn(GenTreePtr treeNode)
+{
+ assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
+ GenTreePtr op1 = treeNode->gtGetOp1();
+ var_types targetType = treeNode->TypeGet();
+
+#ifdef DEBUG
+ if (targetType == TYP_VOID)
+ {
+ assert(op1 == nullptr);
+ }
+#endif
+
+ if (treeNode->TypeGet() == TYP_LONG)
+ {
+ assert(op1 != nullptr);
+ noway_assert(op1->OperGet() == GT_LONG);
+ GenTree* loRetVal = op1->gtGetOp1();
+ GenTree* hiRetVal = op1->gtGetOp2();
+ noway_assert((loRetVal->gtRegNum != REG_NA) && (hiRetVal->gtRegNum != REG_NA));
+
+ genConsumeReg(loRetVal);
+ genConsumeReg(hiRetVal);
+ if (loRetVal->gtRegNum != REG_LNGRET_LO)
+ {
+ inst_RV_RV(ins_Copy(targetType), REG_LNGRET_LO, loRetVal->gtRegNum, TYP_INT);
+ }
+ if (hiRetVal->gtRegNum != REG_LNGRET_HI)
+ {
+ inst_RV_RV(ins_Copy(targetType), REG_LNGRET_HI, hiRetVal->gtRegNum, TYP_INT);
+ }
+ }
+ else
+ {
+ if (varTypeIsStruct(treeNode))
+ {
+ NYI_ARM("struct return");
+ }
+ else if (targetType != TYP_VOID)
+ {
+ assert(op1 != nullptr);
+ noway_assert(op1->gtRegNum != REG_NA);
+
+ // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has
+ // consumed a reg for the operand. This is because the variable
+ // is dead after return. But we are issuing more instructions
+ // like "profiler leave callback" after this consumption. So
+ // if you are issuing more instructions after this point,
+ // remember to keep the variable live up until the new method
+ // exit point where it is actually dead.
+ genConsumeReg(op1);
+
+ regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
+ if (op1->gtRegNum != retReg)
+ {
+ inst_RV_RV(ins_Move_Extend(targetType, true), retReg, op1->gtRegNum, targetType);
+ }
+ }
+ }
+}
+
+//------------------------------------------------------------------------
// genCodeForTreeNode Generate code for a single node in the tree.
//
// Preconditions:
@@ -256,11 +343,13 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
#ifdef DEBUG
lastConsumedNode = nullptr;
+ if (compiler->verbose)
+ {
+ unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio
+ compiler->gtDispLIRNode(treeNode, "Generating: ");
+ }
#endif
- JITDUMP("Generating: ");
- DISPNODE(treeNode);
-
// contained nodes are part of their parents for codegen purposes
// ex : immediates, most LEAs
if (treeNode->isContained())
@@ -270,6 +359,10 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
switch (treeNode->gtOper)
{
+ case GT_LCLHEAP:
+ genLclHeap(treeNode);
+ break;
+
case GT_CNS_INT:
case GT_CNS_DBL:
genSetRegToConst(targetReg, targetType, treeNode);
@@ -313,12 +406,22 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
assert(varTypeIsIntegralOrI(treeNode));
__fallthrough;
+ case GT_ADD_LO:
+ case GT_ADD_HI:
+ case GT_SUB_LO:
+ case GT_SUB_HI:
case GT_ADD:
case GT_SUB:
+ genConsumeOperands(treeNode->AsOp());
+ genCodeForBinary(treeNode);
+ break;
+
case GT_MUL:
{
+ genConsumeOperands(treeNode->AsOp());
+
const genTreeOps oper = treeNode->OperGet();
- if ((oper == GT_ADD || oper == GT_SUB || oper == GT_MUL) && treeNode->gtOverflow())
+ if (treeNode->gtOverflow())
{
// This is also checked in the importer.
NYI("Overflow not yet implemented");
@@ -331,56 +434,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
// The arithmetic node must be sitting in a register (since it's not contained)
noway_assert(targetReg != REG_NA);
- regNumber op1reg = op1->gtRegNum;
- regNumber op2reg = op2->gtRegNum;
-
- GenTreePtr dst;
- GenTreePtr src;
-
- genConsumeIfReg(op1);
- genConsumeIfReg(op2);
-
- if (!varTypeIsFloating(targetType))
- {
- // This is the case of reg1 = reg1 op reg2
- // We're ready to emit the instruction without any moves
- if (op1reg == targetReg)
- {
- dst = op1;
- src = op2;
- }
- // We have reg1 = reg2 op reg1
- // In order for this operation to be correct
- // we need that op is a commutative operation so
- // we can convert it into reg1 = reg1 op reg2 and emit
- // the same code as above
- else if (op2reg == targetReg)
- {
- assert(GenTree::OperIsCommutative(treeNode->OperGet()));
- dst = op2;
- src = op1;
- }
- // dest, op1 and op2 registers are different:
- // reg3 = reg1 op reg2
- // We can implement this by issuing a mov:
- // reg3 = reg1
- // reg3 = reg3 op reg2
- else
- {
- inst_RV_RV(ins_Move_Extend(targetType, true), targetReg, op1reg, op1->gtType);
- regTracker.rsTrackRegCopy(targetReg, op1reg);
- gcInfo.gcMarkRegPtrVal(targetReg, targetType);
- dst = treeNode;
- src = op2;
- }
-
- regNumber r = emit->emitInsBinary(ins, emitTypeSize(treeNode), dst, src);
- assert(r == targetReg);
- }
- else
- {
- emit->emitIns_R_R_R(ins, emitTypeSize(treeNode), targetReg, op1reg, op2reg);
- }
+ regNumber r = emit->emitInsTernary(ins, emitTypeSize(treeNode), treeNode, op1, op2);
+ assert(r == targetReg);
}
genProduceReg(treeNode);
break;
@@ -388,18 +443,19 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_LSH:
case GT_RSH:
case GT_RSZ:
+ case GT_ROR:
genCodeForShift(treeNode);
- // genCodeForShift() calls genProduceReg()
+ break;
+
+ case GT_LSH_HI:
+ case GT_RSH_LO:
+ genCodeForShiftLong(treeNode);
break;
case GT_CAST:
// Cast is never contained (?)
noway_assert(targetReg != REG_NA);
- // Overflow conversions from float/double --> int types go through helper calls.
- if (treeNode->gtOverflow() && !varTypeIsFloating(treeNode->gtOp.gtOp1))
- NYI("Unimplmented GT_CAST:int <--> int with overflow");
-
if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1))
{
// Casts float/double <--> double/float
@@ -480,45 +536,110 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_STORE_LCL_FLD:
{
- NYI_IF(targetType == TYP_STRUCT, "GT_STORE_LCL_FLD: struct store local field not supported");
+ noway_assert(targetType != TYP_STRUCT);
+
+ // record the offset
+ unsigned offset = treeNode->gtLclFld.gtLclOffs;
+
+ // We must have a stack store with GT_STORE_LCL_FLD
noway_assert(!treeNode->InReg());
+ noway_assert(targetReg == REG_NA);
- GenTreePtr op1 = treeNode->gtOp.gtOp1->gtEffectiveVal();
- genConsumeIfReg(op1);
- emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1);
+ GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon();
+ unsigned varNum = varNode->gtLclNum;
+ assert(varNum < compiler->lvaCount);
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+ // Ensure that lclVar nodes are typed correctly.
+ assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet()));
+
+ GenTreePtr data = treeNode->gtOp.gtOp1->gtEffectiveVal();
+ instruction ins = ins_Store(targetType);
+ emitAttr attr = emitTypeSize(targetType);
+ if (data->isContainedIntOrIImmed())
+ {
+ assert(data->IsIntegralConst(0));
+ NYI_ARM("st.lclFld contained operand");
+ }
+ else
+ {
+ assert(!data->isContained());
+ genConsumeReg(data);
+ emit->emitIns_S_R(ins, attr, data->gtRegNum, varNum, offset);
+ }
+
+ genUpdateLife(varNode);
+ varDsc->lvRegNum = REG_STK;
}
break;
case GT_STORE_LCL_VAR:
{
- NYI_IF(targetType == TYP_STRUCT, "struct store local not supported");
+ GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon();
- GenTreePtr op1 = treeNode->gtOp.gtOp1->gtEffectiveVal();
- genConsumeIfReg(op1);
- if (treeNode->gtRegNum == REG_NA)
- {
- // stack store
- emit->emitInsMov(ins_Store(targetType), emitTypeSize(treeNode), treeNode);
- compiler->lvaTable[treeNode->AsLclVarCommon()->gtLclNum].lvRegNum = REG_STK;
- }
- else if (op1->isContained())
+ unsigned varNum = varNode->gtLclNum;
+ assert(varNum < compiler->lvaCount);
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+ unsigned offset = 0;
+
+ // Ensure that lclVar nodes are typed correctly.
+ assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet()));
+
+ GenTreePtr data = treeNode->gtOp.gtOp1->gtEffectiveVal();
+
+ // var = call, where call returns a multi-reg return value
+ // case is handled separately.
+ if (data->gtSkipReloadOrCopy()->IsMultiRegCall())
{
- // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register
- // must be a constant. However, in the future we might want to support a contained memory op.
- // This is a bit tricky because we have to decide it's contained before register allocation,
- // and this would be a case where, once that's done, we need to mark that node as always
- // requiring a register - which we always assume now anyway, but once we "optimize" that
- // we'll have to take cases like this into account.
- assert((op1->gtRegNum == REG_NA) && op1->OperIsConst());
- genSetRegToConst(treeNode->gtRegNum, targetType, op1);
+ genMultiRegCallStoreToLocal(treeNode);
+ break;
}
- else if (op1->gtRegNum != treeNode->gtRegNum)
+ else
{
- assert(op1->gtRegNum != REG_NA);
- emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(treeNode), treeNode, op1);
+ if (treeNode->TypeGet() == TYP_LONG)
+ {
+ genStoreLongLclVar(treeNode);
+ break;
+ }
+
+ genConsumeRegs(data);
+
+ regNumber dataReg = REG_NA;
+ if (data->isContainedIntOrIImmed())
+ {
+ assert(data->IsIntegralConst(0));
+ NYI_ARM("st.lclVar contained operand");
+ }
+ else
+ {
+ assert(!data->isContained());
+ dataReg = data->gtRegNum;
+ }
+ assert(dataReg != REG_NA);
+
+ if (targetReg == REG_NA) // store into stack based LclVar
+ {
+ inst_set_SV_var(varNode);
+
+ instruction ins = ins_Store(targetType);
+ emitAttr attr = emitTypeSize(targetType);
+
+ emit->emitIns_S_R(ins, attr, dataReg, varNum, offset);
+
+ genUpdateLife(varNode);
+
+ varDsc->lvRegNum = REG_STK;
+ }
+ else // store into register (i.e move into register)
+ {
+ if (dataReg != targetReg)
+ {
+ // Assign into targetReg when dataReg (from op1) is not the same register
+ inst_RV_RV(ins_Copy(targetType), targetReg, dataReg, targetType);
+ }
+ genProduceReg(treeNode);
+ }
}
- if (treeNode->gtRegNum != REG_NA)
- genProduceReg(treeNode);
}
break;
@@ -535,26 +656,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
__fallthrough;
case GT_RETURN:
- {
- GenTreePtr op1 = treeNode->gtOp.gtOp1;
- if (targetType == TYP_VOID)
- {
- assert(op1 == nullptr);
- break;
- }
- assert(op1 != nullptr);
- op1 = op1->gtEffectiveVal();
-
- NYI_IF(op1->gtRegNum == REG_NA, "GT_RETURN: return of a value not in register");
- genConsumeReg(op1);
-
- regNumber retReg = varTypeIsFloating(op1) ? REG_FLOATRET : REG_INTRET;
- if (op1->gtRegNum != retReg)
- {
- inst_RV_RV(ins_Move_Extend(targetType, true), retReg, op1->gtRegNum, targetType);
- }
- }
- break;
+ genReturn(treeNode);
+ break;
case GT_LEA:
{
@@ -568,7 +671,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_IND:
genConsumeAddress(treeNode->AsIndir()->Addr());
- emit->emitInsMov(ins_Load(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode);
+ emit->emitInsLoadStoreOp(ins_Load(targetType), emitTypeSize(treeNode), targetReg, treeNode->AsIndir());
genProduceReg(treeNode);
break;
@@ -652,6 +755,22 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
// vmrs with register 0xf has special meaning of transferring flags
emit->emitIns_R(INS_vmrs, EA_4BYTE, REG_R15);
}
+ else if (varTypeIsLong(op1))
+ {
+#ifdef DEBUG
+ // The result of an unlowered long compare on a 32-bit target must either be
+ // a) materialized into a register, or
+ // b) unused.
+ //
+ // A long compare that has a result that is used but not materialized into a register should
+ // have been handled by Lowering::LowerCompare.
+
+ LIR::Use use;
+ assert((treeNode->gtRegNum != REG_NA) || !LIR::AsRange(compiler->compCurBB).TryGetUse(treeNode, &use));
+#endif
+ genCompareLong(treeNode);
+ break;
+ }
else
{
var_types op1Type = op1->TypeGet();
@@ -702,6 +821,19 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
}
break;
+ case GT_JCC:
+ {
+ GenTreeJumpCC* jcc = treeNode->AsJumpCC();
+
+ assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
+
+ CompareKind compareKind = ((jcc->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
+ emitJumpKind jumpKind = genJumpKindForOper(jcc->gtCondition, compareKind);
+
+ inst_JMP(jumpKind, compiler->compCurBB->bbJumpDest);
+ }
+ break;
+
case GT_RETURNTRAP:
{
// this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC
@@ -785,30 +917,14 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
genConsumeAddress(addr);
}
- emit->emitInsMov(ins_Store(data->TypeGet()), emitTypeSize(storeInd), storeInd);
+ emit->emitInsLoadStoreOp(ins_Store(targetType), emitTypeSize(storeInd), data->gtRegNum,
+ treeNode->AsIndir());
}
}
break;
case GT_COPY:
- {
- assert(treeNode->gtOp.gtOp1->IsLocal());
- GenTreeLclVarCommon* lcl = treeNode->gtOp.gtOp1->AsLclVarCommon();
- LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
- inst_RV_RV(ins_Move_Extend(targetType, true), targetReg, genConsumeReg(treeNode->gtOp.gtOp1), targetType,
- emitTypeSize(targetType));
-
- // The old location is dying
- genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(treeNode->gtOp.gtOp1));
-
- gcInfo.gcMarkRegSetNpt(genRegMask(treeNode->gtOp.gtOp1->gtRegNum));
-
- genUpdateVarReg(varDsc, treeNode);
-
- // The new location is going live
- genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
- }
- genProduceReg(treeNode);
+ // This is handled at the time we call genConsumeReg() on the GT_COPY
break;
case GT_LIST:
@@ -818,33 +934,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
break;
case GT_PUTARG_STK:
- {
- NYI_IF(targetType == TYP_STRUCT, "GT_PUTARG_STK: struct support not implemented");
-
- // Get argument offset on stack.
- // Here we cross check that argument offset hasn't changed from lowering to codegen since
- // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
- int argOffset = treeNode->AsPutArgStk()->gtSlotNum * TARGET_POINTER_SIZE;
-#ifdef DEBUG
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode);
- assert(curArgTabEntry);
- assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE);
-#endif
-
- GenTreePtr data = treeNode->gtOp.gtOp1->gtEffectiveVal();
- if (data->isContained())
- {
- emit->emitIns_S_I(ins_Store(targetType), emitTypeSize(targetType), compiler->lvaOutgoingArgSpaceVar,
- argOffset, (int)data->AsIntConCommon()->IconValue());
- }
- else
- {
- genConsumeReg(data);
- emit->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum,
- compiler->lvaOutgoingArgSpaceVar, argOffset);
- }
- }
- break;
+ genPutArgStk(treeNode->AsPutArgStk());
+ break;
case GT_PUTARG_REG:
{
@@ -863,7 +954,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
break;
case GT_CALL:
- genCallInstruction(treeNode);
+ genCallInstruction(treeNode->AsCall());
break;
case GT_LOCKADD:
@@ -872,6 +963,10 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
genLockedInstructions(treeNode->AsOp());
break;
+ case GT_MEMORYBARRIER:
+ instGen_MemoryBarrier();
+ break;
+
case GT_CMPXCHG:
{
NYI("GT_CMPXCHG");
@@ -944,7 +1039,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_LABEL:
genPendingCallLabel = genCreateTempLabel();
treeNode->gtLabel.gtLabBB = genPendingCallLabel;
- emit->emitIns_R_L(INS_lea, EA_PTRSIZE, genPendingCallLabel, treeNode->gtRegNum);
+ emit->emitIns_J_R(INS_adr, EA_PTRSIZE, genPendingCallLabel, treeNode->gtRegNum);
break;
case GT_CLS_VAR_ADDR:
@@ -952,6 +1047,27 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
genProduceReg(treeNode);
break;
+ case GT_STORE_DYN_BLK:
+ case GT_STORE_BLK:
+ genCodeForStoreBlk(treeNode->AsBlk());
+ break;
+
+ case GT_JMPTABLE:
+ genJumpTable(treeNode);
+ break;
+
+ case GT_SWITCH_TABLE:
+ genTableBasedSwitch(treeNode);
+ break;
+
+ case GT_ARR_INDEX:
+ genCodeForArrIndex(treeNode->AsArrIndex());
+ break;
+
+ case GT_ARR_OFFSET:
+ genCodeForArrOffset(treeNode->AsArrOffs());
+ break;
+
case GT_IL_OFFSET:
// Do nothing; these nodes are simply markers for debug info.
break;
@@ -960,7 +1076,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
{
#ifdef DEBUG
char message[256];
- _snprintf_s(message, _countof(message), _TRUNCATE, "NYI: Unimplemented node type %s\n",
+ _snprintf_s(message, _countof(message), _TRUNCATE, "NYI: Unimplemented node type %s",
GenTree::NodeName(treeNode->OperGet()));
NYIRAW(message);
#else
@@ -982,70 +1098,372 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
NYI("genLockedInstructions");
}
-//------------------------------------------------------------------------
-// genRangeCheck: generate code for GT_ARR_BOUNDS_CHECK node.
+//--------------------------------------------------------------------------------------
+// genLclHeap: Generate code for localloc
+//
+// Description:
+// There are 2 ways depending from build version to generate code for localloc:
+// 1) For debug build where memory should be initialized we generate loop
+// which invoke push {tmpReg} N times.
+// 2) Fore /o build However, we tickle the pages to ensure that SP is always
+// valid and is in sync with the "stack guard page". Amount of iteration
+// is N/PAGE_SIZE.
//
-void CodeGen::genRangeCheck(GenTreePtr oper)
+// Comments:
+// There can be some optimization:
+// 1) It's not needed to generate loop for zero size allocation
+// 2) For small allocation (less than 4 store) we unroll loop
+// 3) For allocation less than PAGE_SIZE and when it's not needed to initialize
+// memory to zero, we can just increment SP.
+//
+// Notes: Size N should be aligned to STACK_ALIGN before any allocation
+//
+void CodeGen::genLclHeap(GenTreePtr tree)
{
- noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
- GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
+ assert(tree->OperGet() == GT_LCLHEAP);
+
+ GenTreePtr size = tree->gtOp.gtOp1;
+ noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
+
+ // Result of localloc will be returned in regCnt.
+ // Also it used as temporary register in code generation
+ // for storing allocation size
+ regNumber regCnt = tree->gtRegNum;
+ regMaskTP tmpRegsMask = tree->gtRsvdRegs;
+ regNumber pspSymReg = REG_NA;
+ var_types type = genActualType(size->gtType);
+ emitAttr easz = emitTypeSize(type);
+ BasicBlock* endLabel = nullptr;
+ BasicBlock* loop = nullptr;
+ unsigned stackAdjustment = 0;
+
+#ifdef DEBUG
+ // Verify ESP
+ if (compiler->opts.compStackCheckOnRet)
+ {
+ noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+ getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
+
+ BasicBlock* esp_check = genCreateTempLabel();
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, esp_check);
+ getEmitter()->emitIns(INS_BREAKPOINT);
+ genDefineTempLabel(esp_check);
+ }
+#endif
+
+ noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes
+ noway_assert(genStackLevel == 0); // Can't have anything on the stack
+
+ // Whether method has PSPSym.
+ bool hasPspSym;
+#if FEATURE_EH_FUNCLETS
+ hasPspSym = (compiler->lvaPSPSym != BAD_VAR_NUM);
+#else
+ hasPspSym = false;
+#endif
+
+ // Check to 0 size allocations
+ // size_t amount = 0;
+ if (size->IsCnsIntOrI())
+ {
+ // If size is a constant, then it must be contained.
+ assert(size->isContained());
- GenTreePtr arrIdx = bndsChk->gtIndex->gtEffectiveVal();
- GenTreePtr arrLen = bndsChk->gtArrLen->gtEffectiveVal();
- GenTreePtr arrRef = NULL;
- int lenOffset = 0;
+ // If amount is zero then return null in regCnt
+ size_t amount = size->gtIntCon.gtIconVal;
+ if (amount == 0)
+ {
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
+ goto BAILOUT;
+ }
+ }
+ else
+ {
+ // If 0 bail out by returning null in regCnt
+ genConsumeRegAndCopy(size, regCnt);
+ endLabel = genCreateTempLabel();
+ getEmitter()->emitIns_R_R(INS_TEST, easz, regCnt, regCnt);
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, endLabel);
+ }
- genConsumeIfReg(arrIdx);
- genConsumeIfReg(arrLen);
+ stackAdjustment = 0;
+#if FEATURE_EH_FUNCLETS
+ // If we have PSPsym, then need to re-locate it after localloc.
+ if (hasPspSym)
+ {
+ stackAdjustment += STACK_ALIGN;
+
+ // Save a copy of PSPSym
+ assert(genCountBits(tmpRegsMask) >= 1);
+ regMaskTP pspSymRegMask = genFindLowestBit(tmpRegsMask);
+ tmpRegsMask &= ~pspSymRegMask;
+ pspSymReg = genRegNumFromMask(pspSymRegMask);
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, pspSymReg, compiler->lvaPSPSym, 0);
+ }
+#endif
- GenTree * src1, *src2;
- emitJumpKind jmpKind;
+#if FEATURE_FIXED_OUT_ARGS
+ // If we have an outgoing arg area then we must adjust the SP by popping off the
+ // outgoing arg area. We will restore it right before we return from this method.
+ if (compiler->lvaOutgoingArgSpaceSize > 0)
+ {
+ assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain
+ // aligned
+ inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
+ stackAdjustment += compiler->lvaOutgoingArgSpaceSize;
+ }
+#endif
- if (arrIdx->isContainedIntOrIImmed())
+ // Put aligned allocation size to regCnt
+ if (size->IsCnsIntOrI())
{
- // To encode using a cmp immediate, we place the
- // constant operand in the second position
- src1 = arrLen;
- src2 = arrIdx;
- jmpKind = genJumpKindForOper(GT_LE, CK_UNSIGNED);
+ // 'amount' is the total number of bytes to localloc to properly STACK_ALIGN
+ size_t amount = size->gtIntCon.gtIconVal;
+ amount = AlignUp(amount, STACK_ALIGN);
+
+ // For small allocations we will generate up to four stp instructions
+ size_t cntStackAlignedWidthItems = (amount >> STACK_ALIGN_SHIFT);
+ if (cntStackAlignedWidthItems <= 4)
+ {
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
+
+ while (cntStackAlignedWidthItems != 0)
+ {
+ inst_IV(INS_push, (unsigned)genRegMask(regCnt));
+ cntStackAlignedWidthItems -= 1;
+ }
+
+ goto ALLOC_DONE;
+ }
+ else if (!compiler->info.compInitMem && (amount < compiler->eeGetPageSize())) // must be < not <=
+ {
+ // Since the size is a page or less, simply adjust the SP value
+ // The SP might already be in the guard page, must touch it BEFORE
+ // the alloc, not after.
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, regCnt, REG_SP, 0);
+ inst_RV_IV(INS_sub, REG_SP, amount, EA_PTRSIZE);
+ goto ALLOC_DONE;
+ }
+
+ // regCnt will be the total number of bytes to locAlloc
+ genSetRegToIcon(regCnt, amount, ((int)amount == amount) ? TYP_INT : TYP_LONG);
}
else
{
- src1 = arrIdx;
- src2 = arrLen;
- jmpKind = genJumpKindForOper(GT_GE, CK_UNSIGNED);
+ // Round up the number of bytes to allocate to a STACK_ALIGN boundary.
+ inst_RV_IV(INS_add, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type));
+ inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
+ }
+
+ // Allocation
+ if (compiler->info.compInitMem)
+ {
+ // At this point 'regCnt' is set to the total number of bytes to locAlloc.
+ // Since we have to zero out the allocated memory AND ensure that RSP is always valid
+ // by tickling the pages, we will just push 0's on the stack.
+
+ assert(tmpRegsMask != RBM_NONE);
+ assert(genCountBits(tmpRegsMask) >= 1);
+
+ regMaskTP regCntMask = genFindLowestBit(tmpRegsMask);
+ tmpRegsMask &= ~regCntMask;
+ regNumber regTmp = genRegNumFromMask(regCntMask);
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, regTmp);
+
+ // Loop:
+ BasicBlock* loop = genCreateTempLabel();
+ genDefineTempLabel(loop);
+
+ noway_assert(STACK_ALIGN == 8);
+ inst_IV(INS_push, (unsigned)genRegMask(regTmp));
+ inst_IV(INS_push, (unsigned)genRegMask(regTmp));
+
+ // If not done, loop
+ // Note that regCnt is the number of bytes to stack allocate.
+ assert(genIsValidIntReg(regCnt));
+ getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, regCnt, regCnt, STACK_ALIGN);
+ emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
+ inst_JMP(jmpNotEqual, loop);
+ }
+ else
+ {
+ // At this point 'regCnt' is set to the total number of bytes to locAlloc.
+ //
+ // We don't need to zero out the allocated memory. However, we do have
+ // to tickle the pages to ensure that SP is always valid and is
+ // in sync with the "stack guard page". Note that in the worst
+ // case SP is on the last byte of the guard page. Thus you must
+ // touch SP+0 first not SP+0x1000.
+ //
+ // Another subtlety is that you don't want SP to be exactly on the
+ // boundary of the guard page because PUSH is predecrement, thus
+ // call setup would not touch the guard page but just beyond it
+ //
+ // Note that we go through a few hoops so that SP never points to
+ // illegal pages at any time during the ticking process
+ //
+ // subs regCnt, SP, regCnt // regCnt now holds ultimate SP
+ // jb Loop // result is smaller than orignial SP (no wrap around)
+ // mov regCnt, #0 // Overflow, pick lowest possible value
+ //
+ // Loop:
+ // ldr regTmp, [SP + 0] // tickle the page - read from the page
+ // sub regTmp, SP, PAGE_SIZE // decrement SP by PAGE_SIZE
+ // cmp regTmp, regCnt
+ // jb Done
+ // mov SP, regTmp
+ // j Loop
+ //
+ // Done:
+ // mov SP, regCnt
+ //
+
+ // Setup the regTmp
+ assert(tmpRegsMask != RBM_NONE);
+ assert(genCountBits(tmpRegsMask) == 1);
+ regNumber regTmp = genRegNumFromMask(tmpRegsMask);
+
+ BasicBlock* loop = genCreateTempLabel();
+ BasicBlock* done = genCreateTempLabel();
+
+ // subs regCnt, SP, regCnt // regCnt now holds ultimate SP
+ getEmitter()->emitIns_R_R_R(INS_sub, EA_PTRSIZE, regCnt, REG_SPBASE, regCnt);
+
+ inst_JMP(EJ_vc, loop); // branch if the V flag is not set
+
+ // Ups... Overflow, set regCnt to lowest possible value
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
+
+ genDefineTempLabel(loop);
+
+ // tickle the page - Read from the updated SP - this triggers a page fault when on the guard page
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, regTmp, REG_SPBASE, 0);
+
+ // decrement SP by PAGE_SIZE
+ getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, regTmp, REG_SPBASE, compiler->eeGetPageSize());
+
+ getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regTmp, regCnt);
+ emitJumpKind jmpLTU = genJumpKindForOper(GT_LT, CK_UNSIGNED);
+ inst_JMP(jmpLTU, done);
+
+ // Update SP to be at the next page of stack that we will tickle
+ getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regCnt);
+
+ // Jump to loop and tickle new stack address
+ inst_JMP(EJ_jmp, loop);
+
+ // Done with stack tickle loop
+ genDefineTempLabel(done);
+
+ // Now just move the final value to SP
+ getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regCnt);
+ }
+
+ALLOC_DONE:
+ // Re-adjust SP to allocate PSPSym and out-going arg area
+ if (stackAdjustment != 0)
+ {
+ assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
+ assert(stackAdjustment > 0);
+ getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, (int)stackAdjustment);
+
+#if FEATURE_EH_FUNCLETS
+ // Write PSPSym to its new location.
+ if (hasPspSym)
+ {
+ assert(genIsValidIntReg(pspSymReg));
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, pspSymReg, compiler->lvaPSPSym, 0);
+ }
+#endif
+ // Return the stackalloc'ed address in result register.
+ // regCnt = RSP + stackAdjustment.
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, regCnt, REG_SPBASE, (int)stackAdjustment);
+ }
+ else // stackAdjustment == 0
+ {
+ // Move the final value of SP to regCnt
+ inst_RV_RV(INS_mov, regCnt, REG_SPBASE);
+ }
+
+BAILOUT:
+ if (endLabel != nullptr)
+ genDefineTempLabel(endLabel);
+
+ // Write the lvaLocAllocSPvar stack frame slot
+ if (compiler->lvaLocAllocSPvar != BAD_VAR_NUM)
+ {
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regCnt, compiler->lvaLocAllocSPvar, 0);
+ }
+
+#if STACK_PROBES
+ if (compiler->opts.compNeedStackProbes)
+ {
+ genGenerateStackProbe();
+ }
+#endif
+
+#ifdef DEBUG
+ // Update new ESP
+ if (compiler->opts.compStackCheckOnRet)
+ {
+ noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
+ compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regCnt, compiler->lvaReturnEspCheck, 0);
}
+#endif
- getEmitter()->emitInsBinary(INS_cmp, emitAttr(TYP_INT), src1, src2);
- genJumpToThrowHlpBlk(jmpKind, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
+ genProduceReg(tree);
}
//------------------------------------------------------------------------
-// indirForm: Make a temporary indir we can feed to pattern matching routines
-// in cases where we don't want to instantiate all the indirs that happen.
+// genTableBasedSwitch: generate code for a switch statement based on a table of ip-relative offsets
//
-GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
+void CodeGen::genTableBasedSwitch(GenTree* treeNode)
{
- GenTreeIndir i(GT_IND, type, base, nullptr);
- i.gtRegNum = REG_NA;
- // has to be nonnull (because contained nodes can't be the last in block)
- // but don't want it to be a valid pointer
- i.gtNext = (GenTree*)(-1);
- return i;
+ genConsumeOperands(treeNode->AsOp());
+ regNumber idxReg = treeNode->gtOp.gtOp1->gtRegNum;
+ regNumber baseReg = treeNode->gtOp.gtOp2->gtRegNum;
+
+ getEmitter()->emitIns_R_ARX(INS_ldr, EA_4BYTE, REG_PC, baseReg, idxReg, TARGET_POINTER_SIZE, 0);
}
//------------------------------------------------------------------------
-// intForm: Make a temporary int we can feed to pattern matching routines
-// in cases where we don't want to instantiate.
+// genJumpTable: emits the table and an instruction to get the address of the first element
//
-GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
+void CodeGen::genJumpTable(GenTree* treeNode)
{
- GenTreeIntCon i(type, value);
- i.gtRegNum = REG_NA;
- // has to be nonnull (because contained nodes can't be the last in block)
- // but don't want it to be a valid pointer
- i.gtNext = (GenTree*)(-1);
- return i;
+ noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
+ assert(treeNode->OperGet() == GT_JMPTABLE);
+
+ unsigned jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab;
+ unsigned jmpTabBase;
+
+ jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCount, false);
+
+ JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
+
+ for (unsigned i = 0; i < jumpCount; i++)
+ {
+ BasicBlock* target = *jumpTable++;
+ noway_assert(target->bbFlags & BBF_JMP_TARGET);
+
+ JITDUMP(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
+
+ getEmitter()->emitDataGenData(i, target);
+ }
+
+ getEmitter()->emitDataGenEnd();
+
+ getEmitter()->emitIns_R_D(INS_movw, EA_HANDLE_CNS_RELOC, jmpTabBase, treeNode->gtRegNum);
+ getEmitter()->emitIns_R_D(INS_movt, EA_HANDLE_CNS_RELOC, jmpTabBase, treeNode->gtRegNum);
+
+ genProduceReg(treeNode);
}
//------------------------------------------------------------------------
@@ -1096,6 +1514,27 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
case GT_XOR:
ins = INS_XOR;
break;
+ case GT_ROR:
+ ins = INS_ror;
+ break;
+ case GT_ADD_LO:
+ ins = INS_add;
+ break;
+ case GT_ADD_HI:
+ ins = INS_adc;
+ break;
+ case GT_SUB_LO:
+ ins = INS_sub;
+ break;
+ case GT_SUB_HI:
+ ins = INS_sbc;
+ break;
+ case GT_LSH_HI:
+ ins = INS_SHIFT_LEFT_LOGICAL;
+ break;
+ case GT_RSH_LO:
+ ins = INS_SHIFT_RIGHT_LOGICAL;
+ break;
default:
unreached();
break;
@@ -1103,357 +1542,303 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
return ins;
}
-//------------------------------------------------------------------------
-// genCodeForShift: Generates the code sequence for a GenTree node that
-// represents a bit shift or rotate operation (<<, >>, >>>, rol, ror).
-//
-// Arguments:
-// tree - the bit shift node (that specifies the type of bit shift to perform).
-//
-// Assumptions:
-// a) All GenTrees are register allocated.
-//
-void CodeGen::genCodeForShift(GenTreePtr tree)
+// Generates CpBlk code by performing a loop unroll
+// Preconditions:
+// The size argument of the CpBlk node is a constant and <= 64 bytes.
+// This may seem small but covers >95% of the cases in several framework assemblies.
+void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
{
- var_types targetType = tree->TypeGet();
- genTreeOps oper = tree->OperGet();
- instruction ins = genGetInsForOper(oper, targetType);
- emitAttr size = emitTypeSize(tree);
+ NYI_ARM("genCodeForCpBlkUnroll");
+}
- assert(tree->gtRegNum != REG_NA);
+// Generate code for InitBlk by performing a loop unroll
+// Preconditions:
+// a) Both the size and fill byte value are integer constants.
+// b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes.
+void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
+{
+ NYI_ARM("genCodeForInitBlkUnroll");
+}
- GenTreePtr operand = tree->gtGetOp1();
- genConsumeReg(operand);
+void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp)
+{
+ if (blkOp->gtBlkOpGcUnsafe)
+ {
+ getEmitter()->emitDisableGC();
+ }
+ bool isCopyBlk = blkOp->OperIsCopyBlkOp();
- GenTreePtr shiftBy = tree->gtGetOp2();
- if (!shiftBy->IsCnsIntOrI())
+ switch (blkOp->gtBlkOpKind)
{
- genConsumeReg(shiftBy);
- getEmitter()->emitIns_R_R_R(ins, size, tree->gtRegNum, operand->gtRegNum, shiftBy->gtRegNum);
+ case GenTreeBlk::BlkOpKindHelper:
+ if (isCopyBlk)
+ {
+ genCodeForCpBlk(blkOp);
+ }
+ else
+ {
+ genCodeForInitBlk(blkOp);
+ }
+ break;
+ case GenTreeBlk::BlkOpKindUnroll:
+ if (isCopyBlk)
+ {
+ genCodeForCpBlkUnroll(blkOp);
+ }
+ else
+ {
+ genCodeForInitBlkUnroll(blkOp);
+ }
+ break;
+ default:
+ unreached();
}
- else
+ if (blkOp->gtBlkOpGcUnsafe)
{
- unsigned immWidth = size * BITS_PER_BYTE;
- ssize_t shiftByImm = shiftBy->gtIntCon.gtIconVal & (immWidth - 1);
-
- getEmitter()->emitIns_R_R_I(ins, size, tree->gtRegNum, operand->gtRegNum, shiftByImm);
+ getEmitter()->emitEnableGC();
}
-
- genProduceReg(tree);
}
//------------------------------------------------------------------------
-// genRegCopy: Generate a register copy.
+// genCodeForShiftLong: Generates the code sequence for a GenTree node that
+// represents a three operand bit shift or rotate operation (<<Hi, >>Lo).
//
-void CodeGen::genRegCopy(GenTree* treeNode)
-{
- NYI("genRegCopy");
-}
-
-//------------------------------------------------------------------------
-// genCallInstruction: Produce code for a GT_CALL node
+// Arguments:
+// tree - the bit shift node (that specifies the type of bit shift to perform).
+//
+// Assumptions:
+// a) All GenTrees are register allocated.
+// b) The shift-by-amount in tree->gtOp.gtOp2 is a contained constant
//
-void CodeGen::genCallInstruction(GenTreePtr node)
+void CodeGen::genCodeForShiftLong(GenTreePtr tree)
{
- GenTreeCall* call = node->AsCall();
-
- assert(call->gtOper == GT_CALL);
-
- gtCallTypes callType = (gtCallTypes)call->gtCallType;
+ // Only the non-RMW case here.
+ genTreeOps oper = tree->OperGet();
+ assert(oper == GT_LSH_HI || oper == GT_RSH_LO);
- IL_OFFSETX ilOffset = BAD_IL_OFFSET;
+ GenTree* operand = tree->gtOp.gtOp1;
+ assert(operand->OperGet() == GT_LONG);
+ assert(operand->gtOp.gtOp1->isUsedFromReg());
+ assert(operand->gtOp.gtOp2->isUsedFromReg());
- // all virtuals should have been expanded into a control expression
- assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
-
- // Consume all the arg regs
- for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
- {
- assert(list->OperIsList());
+ GenTree* operandLo = operand->gtGetOp1();
+ GenTree* operandHi = operand->gtGetOp2();
- GenTreePtr argNode = list->Current();
+ regNumber regLo = operandLo->gtRegNum;
+ regNumber regHi = operandHi->gtRegNum;
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy());
- assert(curArgTabEntry);
+ genConsumeOperands(tree->AsOp());
- if (curArgTabEntry->regNum == REG_STK)
- continue;
-
- // Deal with multi register passed struct args.
- if (argNode->OperGet() == GT_FIELD_LIST)
- {
- GenTreeArgList* argListPtr = argNode->AsArgList();
- unsigned iterationNum = 0;
- regNumber argReg = curArgTabEntry->regNum;
- for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++)
- {
- GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
- assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+ var_types targetType = tree->TypeGet();
+ instruction ins = genGetInsForOper(oper, targetType);
- genConsumeReg(putArgRegNode);
+ GenTreePtr shiftBy = tree->gtGetOp2();
- if (putArgRegNode->gtRegNum != argReg)
- {
- inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg,
- putArgRegNode->gtRegNum);
- }
+ assert(shiftBy->isContainedIntOrIImmed());
- argReg = genRegArgNext(argReg);
- }
- }
- else
- {
- regNumber argReg = curArgTabEntry->regNum;
- genConsumeReg(argNode);
- if (argNode->gtRegNum != argReg)
- {
- inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum);
- }
- }
+ unsigned int count = shiftBy->AsIntConCommon()->IconValue();
- // In the case of a varargs call,
- // the ABI dictates that if we have floating point args,
- // we must pass the enregistered arguments in both the
- // integer and floating point registers so, let's do that.
- if (call->IsVarargs() && varTypeIsFloating(argNode))
- {
- NYI_ARM("CodeGen - IsVarargs");
- }
- }
+ regNumber regResult = (oper == GT_LSH_HI) ? regHi : regLo;
- // Insert a null check on "this" pointer if asked.
- if (call->NeedsNullCheck())
+ if (regResult != tree->gtRegNum)
{
- const regNumber regThis = genGetThisArgReg(call);
- const regNumber tmpReg = genRegNumFromMask(node->gtRsvdRegs);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, regThis, 0);
+ inst_RV_RV(INS_mov, tree->gtRegNum, regResult, targetType);
}
- // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method.
- CORINFO_METHOD_HANDLE methHnd;
- GenTree* target = call->gtControlExpr;
- if (callType == CT_INDIRECT)
+ if (oper == GT_LSH_HI)
{
- assert(target == nullptr);
- target = call->gtCall.gtCallAddr;
- methHnd = nullptr;
+ inst_RV_SH(ins, EA_4BYTE, tree->gtRegNum, count);
+ getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, tree->gtRegNum, tree->gtRegNum, regLo, 32 - count,
+ INS_FLAGS_DONT_CARE, INS_OPTS_LSR);
}
else
{
- methHnd = call->gtCallMethHnd;
+ assert(oper == GT_RSH_LO);
+ inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, tree->gtRegNum, count);
+ getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, tree->gtRegNum, tree->gtRegNum, regHi, 32 - count,
+ INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
}
- CORINFO_SIG_INFO* sigInfo = nullptr;
-#ifdef DEBUG
- // Pass the call signature information down into the emitter so the emitter can associate
- // native call sites with the signatures they were generated from.
- if (callType != CT_HELPER)
+ genProduceReg(tree);
+}
+
+//------------------------------------------------------------------------
+// genLeaInstruction: Produce code for a GT_LEA subnode.
+//
+void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
+{
+ emitAttr size = emitTypeSize(lea);
+ genConsumeOperands(lea);
+
+ if (lea->Base() && lea->Index())
{
- sigInfo = call->callSig;
+ regNumber baseReg = lea->Base()->gtRegNum;
+ regNumber indexReg = lea->Index()->gtRegNum;
+ getEmitter()->emitIns_R_ARX(INS_lea, size, lea->gtRegNum, baseReg, indexReg, lea->gtScale, lea->gtOffset);
}
-#endif // DEBUG
-
- // If fast tail call, then we are done.
- if (call->IsFastTailCall())
+ else if (lea->Base())
{
- NYI_ARM("fast tail call");
+ regNumber baseReg = lea->Base()->gtRegNum;
+ getEmitter()->emitIns_R_AR(INS_lea, size, lea->gtRegNum, baseReg, lea->gtOffset);
}
-
- // For a pinvoke to unmanaged code we emit a label to clear
- // the GC pointer state before the callsite.
- // We can't utilize the typical lazy killing of GC pointers
- // at (or inside) the callsite.
- if (call->IsUnmanaged())
+ else if (lea->Index())
{
- genDefineTempLabel(genCreateTempLabel());
+ assert(!"Should we see a baseless address computation during CodeGen for ARM32?");
}
- // Determine return value size(s).
- ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc();
- emitAttr retSize = EA_PTRSIZE;
+ genProduceReg(lea);
+}
- if (call->HasMultiRegRetVal())
- {
- NYI_ARM("has multi reg ret val");
- }
- else
- {
- assert(!varTypeIsStruct(call));
+//------------------------------------------------------------------------
+// genCompareLong: Generate code for comparing two longs when the result of the compare
+// is manifested in a register.
+//
+// Arguments:
+// treeNode - the compare tree
+//
+// Return Value:
+// None.
+//
+// Comments:
+// For long compares, we need to compare the high parts of operands first, then the low parts.
+// If the high compare is false, we do not need to compare the low parts. For less than and
+// greater than, if the high compare is true, we can assume the entire compare is true.
+//
+void CodeGen::genCompareLong(GenTreePtr treeNode)
+{
+ assert(treeNode->OperIsCompare());
- if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY)
- {
- retSize = EA_GCREF;
- }
- else if (call->gtType == TYP_BYREF)
- {
- retSize = EA_BYREF;
- }
- }
+ GenTreeOp* tree = treeNode->AsOp();
+ GenTreePtr op1 = tree->gtOp1;
+ GenTreePtr op2 = tree->gtOp2;
- // We need to propagate the IL offset information to the call instruction, so we can emit
- // an IL to native mapping record for the call, to support managed return value debugging.
- // We don't want tail call helper calls that were converted from normal calls to get a record,
- // so we skip this hash table lookup logic in that case.
- if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall())
- {
- (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
- }
+ assert(varTypeIsLong(op1->TypeGet()));
+ assert(varTypeIsLong(op2->TypeGet()));
- if (target != nullptr)
- {
- // For ARM a call target can not be a contained indirection
- assert(!target->isContainedIndir());
+ regNumber targetReg = treeNode->gtRegNum;
- // We have already generated code for gtControlExpr evaluating it into a register.
- // We just need to emit "call reg" in this case.
- //
- assert(genIsValidIntReg(target->gtRegNum));
+ genConsumeOperands(tree);
- genEmitCall(emitter::EC_INDIR_R, methHnd,
- INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr
- retSize, ilOffset, target->gtRegNum);
- }
- else
- {
- // Generate a direct call to a non-virtual user defined or helper method
- assert(callType == CT_HELPER || callType == CT_USER_FUNC);
+ GenTreePtr loOp1 = op1->gtGetOp1();
+ GenTreePtr hiOp1 = op1->gtGetOp2();
+ GenTreePtr loOp2 = op2->gtGetOp1();
+ GenTreePtr hiOp2 = op2->gtGetOp2();
- void* addr = nullptr;
- if (callType == CT_HELPER)
- {
- // Direct call to a helper method.
- CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd);
- noway_assert(helperNum != CORINFO_HELP_UNDEF);
+ // Create compare for the high parts
+ instruction ins = INS_cmp;
+ var_types cmpType = TYP_INT;
+ emitAttr cmpAttr = emitTypeSize(cmpType);
- void* pAddr = nullptr;
- addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
+ // Emit the compare instruction
+ getEmitter()->emitInsBinary(ins, cmpAttr, hiOp1, hiOp2);
- if (addr == nullptr)
- {
- addr = pAddr;
- }
- }
- else
- {
- // Direct call to a non-virtual user function.
- CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
- if (call->IsSameThis())
- {
- aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
- }
+ // If the result is not being materialized in a register, we're done.
+ if (targetReg == REG_NA)
+ {
+ return;
+ }
- if ((call->NeedsNullCheck()) == 0)
- {
- aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
- }
+ BasicBlock* labelTrue = genCreateTempLabel();
+ BasicBlock* labelFalse = genCreateTempLabel();
+ BasicBlock* labelNext = genCreateTempLabel();
- CORINFO_CONST_LOOKUP addrInfo;
- compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags);
+ genJccLongHi(tree->gtOper, labelTrue, labelFalse, tree->IsUnsigned());
+ getEmitter()->emitInsBinary(ins, cmpAttr, loOp1, loOp2);
+ genJccLongLo(tree->gtOper, labelTrue, labelFalse);
- addr = addrInfo.addr;
- }
+ genDefineTempLabel(labelFalse);
+ getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(tree->gtType), tree->gtRegNum, 0);
+ getEmitter()->emitIns_J(INS_b, labelNext);
- assert(addr);
- // Non-virtual direct call to known addresses
- if (!arm_Valid_Imm_For_BL((ssize_t)addr))
- {
- regNumber tmpReg = genRegNumFromMask(node->gtRsvdRegs);
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, tmpReg, (ssize_t)addr);
- genEmitCall(emitter::EC_INDIR_R, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) NULL, retSize, ilOffset, tmpReg);
- }
- else
- {
- genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, retSize, ilOffset);
- }
- }
+ genDefineTempLabel(labelTrue);
+ getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(tree->gtType), tree->gtRegNum, 1);
+
+ genDefineTempLabel(labelNext);
- // if it was a pinvoke we may have needed to get the address of a label
- if (genPendingCallLabel)
+ genProduceReg(tree);
+}
+
+void CodeGen::genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool isUnsigned)
+{
+ if (cmp != GT_NE)
{
- assert(call->IsUnmanaged());
- genDefineTempLabel(genPendingCallLabel);
- genPendingCallLabel = nullptr;
+ jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
}
- // Update GC info:
- // All Callee arg registers are trashed and no longer contain any GC pointers.
- // TODO-ARM-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here?
- // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other
- // registers from RBM_CALLEE_TRASH
- assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
- assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
- gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS;
- gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS;
-
- var_types returnType = call->TypeGet();
- if (returnType != TYP_VOID)
+ switch (cmp)
{
- regNumber returnReg;
+ case GT_EQ:
+ inst_JMP(EJ_ne, jumpFalse);
+ break;
- if (call->HasMultiRegRetVal())
- {
- assert(pRetTypeDesc != nullptr);
- unsigned regCount = pRetTypeDesc->GetReturnRegCount();
+ case GT_NE:
+ inst_JMP(EJ_ne, jumpTrue);
+ break;
- // If regs allocated to call node are different from ABI return
- // regs in which the call has returned its result, move the result
- // to regs allocated to call node.
- for (unsigned i = 0; i < regCount; ++i)
- {
- var_types regType = pRetTypeDesc->GetReturnRegType(i);
- returnReg = pRetTypeDesc->GetABIReturnReg(i);
- regNumber allocatedReg = call->GetRegNumByIdx(i);
- if (returnReg != allocatedReg)
- {
- inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType);
- }
- }
- }
- else
- {
- if (varTypeIsFloating(returnType))
+ case GT_LT:
+ case GT_LE:
+ if (isUnsigned)
{
- returnReg = REG_FLOATRET;
+ inst_JMP(EJ_hi, jumpFalse);
+ inst_JMP(EJ_lo, jumpTrue);
}
else
{
- returnReg = REG_INTRET;
+ inst_JMP(EJ_gt, jumpFalse);
+ inst_JMP(EJ_lt, jumpTrue);
}
+ break;
- if (call->gtRegNum != returnReg)
+ case GT_GE:
+ case GT_GT:
+ if (isUnsigned)
{
- inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType);
+ inst_JMP(EJ_lo, jumpFalse);
+ inst_JMP(EJ_hi, jumpTrue);
}
- }
-
- genProduceReg(call);
- }
+ else
+ {
+ inst_JMP(EJ_lt, jumpFalse);
+ inst_JMP(EJ_gt, jumpTrue);
+ }
+ break;
- // If there is nothing next, that means the result is thrown away, so this value is not live.
- // However, for minopts or debuggable code, we keep it live to support managed return value debugging.
- if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode)
- {
- gcInfo.gcMarkRegSetNpt(RBM_INTRET);
+ default:
+ noway_assert(!"expected a comparison operator");
}
}
-//------------------------------------------------------------------------
-// genLeaInstruction: Produce code for a GT_LEA subnode.
-//
-void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
+void CodeGen::genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse)
{
- if (lea->Base() && lea->Index())
+ switch (cmp)
{
- regNumber baseReg = genConsumeReg(lea->Base());
- regNumber indexReg = genConsumeReg(lea->Index());
- getEmitter()->emitIns_R_ARX(INS_lea, EA_BYREF, lea->gtRegNum, baseReg, indexReg, lea->gtScale, lea->gtOffset);
- }
- else if (lea->Base())
- {
- getEmitter()->emitIns_R_AR(INS_lea, EA_BYREF, lea->gtRegNum, genConsumeReg(lea->Base()), lea->gtOffset);
- }
+ case GT_EQ:
+ inst_JMP(EJ_eq, jumpTrue);
+ break;
- genProduceReg(lea);
+ case GT_NE:
+ inst_JMP(EJ_ne, jumpTrue);
+ break;
+
+ case GT_LT:
+ inst_JMP(EJ_lo, jumpTrue);
+ break;
+
+ case GT_LE:
+ inst_JMP(EJ_ls, jumpTrue);
+ break;
+
+ case GT_GE:
+ inst_JMP(EJ_hs, jumpTrue);
+ break;
+
+ case GT_GT:
+ inst_JMP(EJ_hi, jumpTrue);
+ break;
+
+ default:
+ noway_assert(!"expected comparison");
+ }
}
//------------------------------------------------------------------------
@@ -1497,151 +1882,92 @@ void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree)
}
//------------------------------------------------------------------------
-// genIntToIntCast: Generate code for an integer cast
+// genLongToIntCast: Generate code for long to int casts.
//
// Arguments:
-// treeNode - The GT_CAST node
+// cast - The GT_CAST node
//
// Return Value:
// None.
//
// Assumptions:
-// The treeNode must have an assigned register.
-// For a signed convert from byte, the source must be in a byte-addressable register.
-// Neither the source nor target type can be a floating point type.
+// The cast node and its sources (via GT_LONG) must have been assigned registers.
+// The destination cannot be a floating point type or a small integer type.
//
-void CodeGen::genIntToIntCast(GenTreePtr treeNode)
+void CodeGen::genLongToIntCast(GenTree* cast)
{
- assert(treeNode->OperGet() == GT_CAST);
-
- GenTreePtr castOp = treeNode->gtCast.CastOp();
- emitter* emit = getEmitter();
+ assert(cast->OperGet() == GT_CAST);
- var_types dstType = treeNode->CastToType();
- var_types srcType = genActualType(castOp->TypeGet());
- emitAttr movSize = emitActualTypeSize(dstType);
- bool movRequired = false;
-
- regNumber targetReg = treeNode->gtRegNum;
- regNumber sourceReg = castOp->gtRegNum;
+ GenTree* src = cast->gtGetOp1();
+ noway_assert(src->OperGet() == GT_LONG);
- // For Long to Int conversion we will have a reserved integer register to hold the immediate mask
- regNumber tmpReg = (treeNode->gtRsvdRegs == RBM_NONE) ? REG_NA : genRegNumFromMask(treeNode->gtRsvdRegs);
+ genConsumeRegs(src);
- assert(genIsValidIntReg(targetReg));
- assert(genIsValidIntReg(sourceReg));
+ var_types srcType = ((cast->gtFlags & GTF_UNSIGNED) != 0) ? TYP_ULONG : TYP_LONG;
+ var_types dstType = cast->CastToType();
+ regNumber loSrcReg = src->gtGetOp1()->gtRegNum;
+ regNumber hiSrcReg = src->gtGetOp2()->gtRegNum;
+ regNumber dstReg = cast->gtRegNum;
- instruction ins = INS_invalid;
+ assert((dstType == TYP_INT) || (dstType == TYP_UINT));
+ assert(genIsValidIntReg(loSrcReg));
+ assert(genIsValidIntReg(hiSrcReg));
+ assert(genIsValidIntReg(dstReg));
- genConsumeReg(castOp);
- Lowering::CastInfo castInfo;
-
- // Get information about the cast.
- Lowering::getCastDescription(treeNode, &castInfo);
-
- if (castInfo.requiresOverflowCheck)
+ if (cast->gtOverflow())
{
- NYI_ARM("CodeGen::genIntToIntCast for OverflowCheck");
- }
- else // Non-overflow checking cast.
- {
- if (genTypeSize(srcType) == genTypeSize(dstType))
+ //
+ // Generate an overflow check for [u]long to [u]int casts:
+ //
+ // long -> int - check if the upper 33 bits are all 0 or all 1
+ //
+ // ulong -> int - check if the upper 33 bits are all 0
+ //
+ // long -> uint - check if the upper 32 bits are all 0
+ // ulong -> uint - check if the upper 32 bits are all 0
+ //
+
+ if ((srcType == TYP_LONG) && (dstType == TYP_INT))
{
- ins = INS_mov;
+ BasicBlock* allOne = genCreateTempLabel();
+ BasicBlock* success = genCreateTempLabel();
+
+ inst_RV_RV(INS_tst, loSrcReg, loSrcReg, TYP_INT, EA_4BYTE);
+ emitJumpKind JmpNegative = genJumpKindForOper(GT_LT, CK_LOGICAL);
+ inst_JMP(JmpNegative, allOne);
+ inst_RV_RV(INS_tst, hiSrcReg, hiSrcReg, TYP_INT, EA_4BYTE);
+ emitJumpKind jmpNotEqualL = genJumpKindForOper(GT_NE, CK_LOGICAL);
+ genJumpToThrowHlpBlk(jmpNotEqualL, SCK_OVERFLOW);
+ inst_JMP(EJ_jmp, success);
+
+ genDefineTempLabel(allOne);
+ inst_RV_IV(INS_cmp, hiSrcReg, -1, EA_4BYTE);
+ emitJumpKind jmpNotEqualS = genJumpKindForOper(GT_NE, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpNotEqualS, SCK_OVERFLOW);
+
+ genDefineTempLabel(success);
}
else
{
- var_types extendType = TYP_UNKNOWN;
-
- // If we need to treat a signed type as unsigned
- if ((treeNode->gtFlags & GTF_UNSIGNED) != 0)
- {
- extendType = genUnsignedType(srcType);
- movSize = emitTypeSize(extendType);
- movRequired = true;
- }
- else
+ if ((srcType == TYP_ULONG) && (dstType == TYP_INT))
{
- if (genTypeSize(srcType) < genTypeSize(dstType))
- {
- extendType = srcType;
- movSize = emitTypeSize(srcType);
- if (srcType == TYP_UINT)
- {
- movRequired = true;
- }
- }
- else // (genTypeSize(srcType) > genTypeSize(dstType))
- {
- extendType = dstType;
- movSize = emitTypeSize(dstType);
- }
+ inst_RV_RV(INS_tst, loSrcReg, loSrcReg, TYP_INT, EA_4BYTE);
+ emitJumpKind JmpNegative = genJumpKindForOper(GT_LT, CK_LOGICAL);
+ genJumpToThrowHlpBlk(JmpNegative, SCK_OVERFLOW);
}
- ins = ins_Move_Extend(extendType, castOp->InReg());
+ inst_RV_RV(INS_tst, hiSrcReg, hiSrcReg, TYP_INT, EA_4BYTE);
+ emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_LOGICAL);
+ genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
}
}
- // We should never be generating a load from memory instruction here!
- assert(!emit->emitInsIsLoad(ins));
-
- if ((ins != INS_mov) || movRequired || (targetReg != sourceReg))
+ if (dstReg != loSrcReg)
{
- emit->emitIns_R_R(ins, movSize, targetReg, sourceReg);
+ inst_RV_RV(INS_mov, dstReg, loSrcReg, TYP_INT, EA_4BYTE);
}
- genProduceReg(treeNode);
-}
-
-//------------------------------------------------------------------------
-// genFloatToFloatCast: Generate code for a cast between float and double
-//
-// Arguments:
-// treeNode - The GT_CAST node
-//
-// Return Value:
-// None.
-//
-// Assumptions:
-// Cast is a non-overflow conversion.
-// The treeNode must have an assigned register.
-// The cast is between float and double.
-//
-void CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
-{
- // float <--> double conversions are always non-overflow ones
- assert(treeNode->OperGet() == GT_CAST);
- assert(!treeNode->gtOverflow());
-
- regNumber targetReg = treeNode->gtRegNum;
- assert(genIsValidFloatReg(targetReg));
-
- GenTreePtr op1 = treeNode->gtOp.gtOp1;
- assert(!op1->isContained()); // Cannot be contained
- assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg.
-
- var_types dstType = treeNode->CastToType();
- var_types srcType = op1->TypeGet();
- assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
-
- genConsumeOperands(treeNode->AsOp());
-
- // treeNode must be a reg
- assert(!treeNode->isContained());
-
- if (srcType != dstType)
- {
- instruction insVcvt = (srcType == TYP_FLOAT) ? INS_vcvt_f2d // convert Float to Double
- : INS_vcvt_d2f; // convert Double to Float
-
- getEmitter()->emitIns_R_R(insVcvt, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum);
- }
- else if (treeNode->gtRegNum != op1->gtRegNum)
- {
- getEmitter()->emitIns_R_R(INS_vmov, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum);
- }
-
- genProduceReg(treeNode);
+ genProduceReg(cast);
}
//------------------------------------------------------------------------
@@ -1802,36 +2128,6 @@ void CodeGen::genFloatToIntCast(GenTreePtr treeNode)
}
//------------------------------------------------------------------------
-// genCreateAndStoreGCInfo: Create and record GC Info for the function.
-//
-void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
- unsigned prologSize,
- unsigned epilogSize DEBUGARG(void* codePtr))
-{
- IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
- GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC)
- GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
- assert(gcInfoEncoder);
-
- // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
- gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
-
- // First we figure out the encoder ID's for the stack slots and registers.
- gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS);
- // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
- gcInfoEncoder->FinalizeSlotIds();
- // Now we can actually use those slot ID's to declare live ranges.
- gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
-
- gcInfoEncoder->Build();
-
- // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
- // let's save the values anyway for debugging purposes
- compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
- compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
-}
-
-//------------------------------------------------------------------------
// genEmitHelperCall: Emit a call to a helper function.
//
void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg /*= REG_NA */)
@@ -1900,6 +2196,58 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
regTracker.rsTrashRegsForGCInterruptability();
}
+//------------------------------------------------------------------------
+// genStoreLongLclVar: Generate code to store a non-enregistered long lclVar
+//
+// Arguments:
+// treeNode - A TYP_LONG lclVar node.
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// 'treeNode' must be a TYP_LONG lclVar node for a lclVar that has NOT been promoted.
+// Its operand must be a GT_LONG node.
+//
+void CodeGen::genStoreLongLclVar(GenTree* treeNode)
+{
+ emitter* emit = getEmitter();
+
+ GenTreeLclVarCommon* lclNode = treeNode->AsLclVarCommon();
+ unsigned lclNum = lclNode->gtLclNum;
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
+ assert(varDsc->TypeGet() == TYP_LONG);
+ assert(!varDsc->lvPromoted);
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ noway_assert(op1->OperGet() == GT_LONG || op1->OperGet() == GT_MUL_LONG);
+ genConsumeRegs(op1);
+
+ if (op1->OperGet() == GT_LONG)
+ {
+ // Definitions of register candidates will have been lowered to 2 int lclVars.
+ assert(!treeNode->InReg());
+
+ GenTreePtr loVal = op1->gtGetOp1();
+ GenTreePtr hiVal = op1->gtGetOp2();
+
+ // NYI: Contained immediates.
+ NYI_IF((loVal->gtRegNum == REG_NA) || (hiVal->gtRegNum == REG_NA),
+ "Store of long lclVar with contained immediate");
+
+ emit->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, loVal->gtRegNum, lclNum, 0);
+ emit->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, hiVal->gtRegNum, lclNum, genTypeSize(TYP_INT));
+ }
+ else if (op1->OperGet() == GT_MUL_LONG)
+ {
+ assert((op1->gtFlags & GTF_MUL_64RSLT) != 0);
+
+ // Stack store
+ getEmitter()->emitIns_S_R(ins_Store(TYP_INT), emitTypeSize(TYP_INT), REG_LNGRET_LO, lclNum, 0);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_INT), emitTypeSize(TYP_INT), REG_LNGRET_HI, lclNum,
+ genTypeSize(TYP_INT));
+ }
+}
+
#endif // _TARGET_ARM_
#endif // !LEGACY_BACKEND
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp
index 71c6dd1162..7f98221df8 100644
--- a/src/jit/codegenarm64.cpp
+++ b/src/jit/codegenarm64.cpp
@@ -1265,67 +1265,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
*/
-/*****************************************************************************
- *
- * Generate code that will set the given register to the integer constant.
- */
-
-void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
-{
- // Reg cannot be a FP reg
- assert(!genIsValidFloatReg(reg));
-
- // The only TYP_REF constant that can come this path is a managed 'null' since it is not
- // relocatable. Other ref type constants (e.g. string objects) go through a different
- // code path.
- noway_assert(type != TYP_REF || val == 0);
-
- instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
-}
-
-/*****************************************************************************
- *
- * Generate code to check that the GS cookie wasn't thrashed by a buffer
- * overrun. On ARM64 we always use REG_TMP_0 and REG_TMP_1 as temp registers
- * and this works fine in the case of tail calls
- * Implementation Note: pushReg = true, in case of tail calls.
- */
-void CodeGen::genEmitGSCookieCheck(bool pushReg)
-{
- noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
-
- // Make sure that the return register is reported as live GC-ref so that any GC that kicks in while
- // executing GS cookie check will not collect the object pointed to by REG_INTRET (R0).
- if (!pushReg && (compiler->info.compRetType == TYP_REF))
- gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
-
- regNumber regGSConst = REG_TMP_0;
- regNumber regGSValue = REG_TMP_1;
-
- if (compiler->gsGlobalSecurityCookieAddr == nullptr)
- {
- // load the GS cookie constant into a reg
- //
- genSetRegToIcon(regGSConst, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
- }
- else
- {
- // Ngen case - GS cookie constant needs to be accessed through an indirection.
- instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
- getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSConst, regGSConst, 0);
- }
- // Load this method's GS value from the stack frame
- getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0);
- // Compare with the GC cookie constant
- getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regGSConst, regGSValue);
-
- BasicBlock* gsCheckBlk = genCreateTempLabel();
- emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
- inst_JMP(jmpEqual, gsCheckBlk);
- genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
- genDefineTempLabel(gsCheckBlk);
-}
-
BasicBlock* CodeGen::genCallFinally(BasicBlock* block)
{
// Generate a call to the finally, like this:
@@ -1532,14 +1471,14 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
var_types targetType = treeNode->TypeGet();
emitter *emit = getEmitter();
emitAttr size = emitTypeSize(treeNode);
- GenTree *op1 = treeNode->gtOp.gtOp1;
- GenTree *op2 = treeNode->gtOp.gtOp2;
+ GenTree *op1 = treeNode->gtOp1;
+ GenTree *op2 = treeNode->gtOp2;
// to get the high bits of the multiply, we are constrained to using the
// 1-op form: RDX:RAX = RAX * rm
// The 3-op form (Rx=Ry*Rz) does not support it.
- genConsumeOperands(treeNode->AsOp());
+ genConsumeOperands(treeNode);
GenTree* regOp = op1;
GenTree* rmOp = op2;
@@ -1883,8 +1822,8 @@ void CodeGen::genReturn(GenTreePtr treeNode)
if (movRequired)
{
- emitAttr movSize = EA_ATTR(genTypeSize(targetType));
- getEmitter()->emitIns_R_R(INS_mov, movSize, retReg, op1->gtRegNum);
+ emitAttr attr = emitTypeSize(targetType);
+ getEmitter()->emitIns_R_R(INS_mov, attr, retReg, op1->gtRegNum);
}
}
@@ -1923,8 +1862,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
if (compiler->verbose)
{
unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio
- printf("Generating: ");
- compiler->gtDispTree(treeNode, nullptr, nullptr, true);
+ compiler->gtDispLIRNode(treeNode, "Generating: ");
}
#endif // DEBUG
@@ -2721,7 +2659,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
break;
case GT_CALL:
- genCallInstruction(treeNode);
+ genCallInstruction(treeNode->AsCall());
break;
case GT_JMP:
@@ -2905,77 +2843,6 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
}
}
-//----------------------------------------------------------------------------------
-// genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local
-//
-// Arguments:
-// treeNode - Gentree of GT_STORE_LCL_VAR
-//
-// Return Value:
-// None
-//
-// Assumption:
-// The child of store is a multi-reg call node.
-// genProduceReg() on treeNode is made by caller of this routine.
-//
-void CodeGen::genMultiRegCallStoreToLocal(GenTreePtr treeNode)
-{
- assert(treeNode->OperGet() == GT_STORE_LCL_VAR);
-
- // Structs of size >=9 and <=16 are returned in two return registers on ARM64 and HFAs.
- assert(varTypeIsStruct(treeNode));
-
- // Assumption: current ARM64 implementation requires that a multi-reg struct
- // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
- // being struct promoted.
- unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
- LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
- noway_assert(varDsc->lvIsMultiRegRet);
-
- GenTree* op1 = treeNode->gtGetOp1();
- GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
- GenTreeCall* call = actualOp1->AsCall();
- assert(call->HasMultiRegRetVal());
-
- genConsumeRegs(op1);
-
- ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc();
- unsigned regCount = pRetTypeDesc->GetReturnRegCount();
-
- if (treeNode->gtRegNum != REG_NA)
- {
- // Right now the only enregistrable structs supported are SIMD types.
- assert(varTypeIsSIMD(treeNode));
- NYI("GT_STORE_LCL_VAR of a SIMD enregisterable struct");
- }
- else
- {
- // Stack store
- int offset = 0;
- for (unsigned i = 0; i < regCount; ++i)
- {
- var_types type = pRetTypeDesc->GetReturnRegType(i);
- regNumber reg = call->GetRegNumByIdx(i);
- if (op1->IsCopyOrReload())
- {
- // GT_COPY/GT_RELOAD will have valid reg for those positions
- // that need to be copied or reloaded.
- regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
- if (reloadReg != REG_NA)
- {
- reg = reloadReg;
- }
- }
-
- assert(reg != REG_NA);
- getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
- offset += genTypeSize(type);
- }
-
- varDsc->lvRegNum = REG_STK;
- }
-}
-
/***********************************************************************************************
* Generate code for localloc
*/
@@ -3331,7 +3198,6 @@ BAILOUT:
// b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes.
void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
{
-#if 0
// Make sure we got the arguments of the initblk/initobj operation in the right registers
unsigned size = initBlkNode->Size();
GenTreePtr dstAddr = initBlkNode->Addr();
@@ -3341,57 +3207,57 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
initVal = initVal->gtGetOp1();
}
- assert(!dstAddr->isContained());
- assert(!initVal->isContained());
+ assert(dstAddr->isUsedFromReg());
+ assert(initVal->isUsedFromReg() && !initVal->IsIntegralConst(0) || initVal->IsIntegralConst(0));
assert(size != 0);
assert(size <= INITBLK_UNROLL_LIMIT);
- assert(initVal->gtSkipReloadOrCopy()->IsCnsIntOrI());
- emitter *emit = getEmitter();
+ emitter* emit = getEmitter();
genConsumeOperands(initBlkNode);
- // If the initVal was moved, or spilled and reloaded to a different register,
- // get the original initVal from below the GT_RELOAD, but only after capturing the valReg,
- // which needs to be the new register.
- regNumber valReg = initVal->gtRegNum;
- initVal = initVal->gtSkipReloadOrCopy();
-#else // !0
- NYI("genCodeForInitBlkUnroll");
-#endif // !0
-}
+ regNumber valReg = initVal->IsIntegralConst(0) ? REG_ZR : initVal->gtRegNum;
-// Generates code for InitBlk by calling the VM memset helper function.
-// Preconditions:
-// a) The size argument of the InitBlk is not an integer constant.
-// b) The size argument of the InitBlk is >= INITBLK_STOS_LIMIT bytes.
-void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode)
-{
- // Make sure we got the arguments of the initblk operation in the right registers
- unsigned size = initBlkNode->Size();
- GenTreePtr dstAddr = initBlkNode->Addr();
- GenTreePtr initVal = initBlkNode->Data();
- if (initVal->OperIsInitVal())
- {
- initVal = initVal->gtGetOp1();
- }
+ assert(!initVal->IsIntegralConst(0) || (valReg == REG_ZR));
- assert(!dstAddr->isContained());
- assert(!initVal->isContained());
- assert(initBlkNode->gtRsvdRegs == RBM_ARG_2);
+ unsigned offset = 0;
-// TODO-ARM64-CQ: When initblk loop unrolling is implemented
-// put this assert back on.
-#if 0
- if (size != 0)
+ // Perform an unroll using stp.
+ if (size >= 2 * REGSIZE_BYTES)
{
- assert(blockSize >= INITBLK_UNROLL_LIMIT);
- }
-#endif // 0
+ // Determine how many 16 byte slots
+ size_t slots = size / (2 * REGSIZE_BYTES);
- genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
+ while (slots-- > 0)
+ {
+ emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, valReg, valReg, dstAddr->gtRegNum, offset);
+ offset += (2 * REGSIZE_BYTES);
+ }
+ }
- genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
+ // Fill the remainder (15 bytes or less) if there's any.
+ if ((size & 0xf) != 0)
+ {
+ if ((size & 8) != 0)
+ {
+ emit->emitIns_R_R_I(INS_str, EA_8BYTE, valReg, dstAddr->gtRegNum, offset);
+ offset += 8;
+ }
+ if ((size & 4) != 0)
+ {
+ emit->emitIns_R_R_I(INS_str, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
+ offset += 4;
+ }
+ if ((size & 2) != 0)
+ {
+ emit->emitIns_R_R_I(INS_strh, EA_2BYTE, valReg, dstAddr->gtRegNum, offset);
+ offset += 2;
+ }
+ if ((size & 1) != 0)
+ {
+ emit->emitIns_R_R_I(INS_strb, EA_1BYTE, valReg, dstAddr->gtRegNum, offset);
+ }
+ }
}
// Generate code for a load from some address + offset
@@ -3413,13 +3279,34 @@ void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst
}
}
+// Generate code for a load pair from some address + offset
+// base: tree node which can be either a local address or arbitrary node
+// offset: distance from the base from which to load
+void CodeGen::genCodeForLoadPairOffset(regNumber dst, regNumber dst2, GenTree* base, unsigned offset)
+{
+ emitter* emit = getEmitter();
+
+ if (base->OperIsLocalAddr())
+ {
+ if (base->gtOper == GT_LCL_FLD_ADDR)
+ offset += base->gtLclFld.gtLclOffs;
+
+ // TODO-ARM64-CQ: Implement support for using a ldp instruction with a varNum (see emitIns_R_S)
+ emit->emitIns_R_S(INS_ldr, EA_8BYTE, dst, base->gtLclVarCommon.gtLclNum, offset);
+ emit->emitIns_R_S(INS_ldr, EA_8BYTE, dst2, base->gtLclVarCommon.gtLclNum, offset + REGSIZE_BYTES);
+ }
+ else
+ {
+ emit->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, dst, dst2, base->gtRegNum, offset);
+ }
+}
+
// Generate code for a store to some address + offset
// base: tree node which can be either a local address or arbitrary node
// offset: distance from the base from which to load
void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset)
{
-#if 0
- emitter *emit = getEmitter();
+ emitter* emit = getEmitter();
if (base->OperIsLocalAddr())
{
@@ -3429,11 +3316,30 @@ void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber sr
}
else
{
- emit->emitIns_AR_R(ins, size, src, base->gtRegNum, offset);
+ emit->emitIns_R_R_I(ins, size, src, base->gtRegNum, offset);
+ }
+}
+
+// Generate code for a store pair to some address + offset
+// base: tree node which can be either a local address or arbitrary node
+// offset: distance from the base from which to load
+void CodeGen::genCodeForStorePairOffset(regNumber src, regNumber src2, GenTree* base, unsigned offset)
+{
+ emitter* emit = getEmitter();
+
+ if (base->OperIsLocalAddr())
+ {
+ if (base->gtOper == GT_LCL_FLD_ADDR)
+ offset += base->gtLclFld.gtLclOffs;
+
+ // TODO-ARM64-CQ: Implement support for using a stp instruction with a varNum (see emitIns_S_R)
+ emit->emitIns_S_R(INS_str, EA_8BYTE, src, base->gtLclVarCommon.gtLclNum, offset);
+ emit->emitIns_S_R(INS_str, EA_8BYTE, src2, base->gtLclVarCommon.gtLclNum, offset + REGSIZE_BYTES);
+ }
+ else
+ {
+ emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, src, src2, base->gtRegNum, offset);
}
-#else // !0
- NYI("genCodeForStoreOffset");
-#endif // !0
}
// Generates CpBlk code by performing a loop unroll
@@ -3442,80 +3348,96 @@ void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber sr
// This may seem small but covers >95% of the cases in several framework assemblies.
void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
{
-#if 0
// Make sure we got the arguments of the cpblk operation in the right registers
unsigned size = cpBlkNode->Size();
GenTreePtr dstAddr = cpBlkNode->Addr();
GenTreePtr source = cpBlkNode->Data();
- noway_assert(source->gtOper == GT_IND);
- GenTreePtr srcAddr = source->gtGetOp1();
+ GenTreePtr srcAddr = nullptr;
- assert((size != 0 ) && (size <= CPBLK_UNROLL_LIMIT));
+ assert((size != 0) && (size <= CPBLK_UNROLL_LIMIT));
- emitter *emit = getEmitter();
+ emitter* emit = getEmitter();
- if (!srcAddr->isContained())
- genConsumeReg(srcAddr);
+ if (source->gtOper == GT_IND)
+ {
+ srcAddr = source->gtGetOp1();
+ if (srcAddr->isUsedFromReg())
+ {
+ genConsumeReg(srcAddr);
+ }
+ }
+ else
+ {
+ noway_assert(source->IsLocal());
+ // TODO-Cleanup: Consider making the addrForm() method in Rationalize public, e.g. in GenTree.
+ // OR: transform source to GT_IND(GT_LCL_VAR_ADDR)
+ if (source->OperGet() == GT_LCL_VAR)
+ {
+ source->SetOper(GT_LCL_VAR_ADDR);
+ }
+ else
+ {
+ assert(source->OperGet() == GT_LCL_FLD);
+ source->SetOper(GT_LCL_FLD_ADDR);
+ }
+ srcAddr = source;
+ }
- if (!dstAddr->isContained())
+ if (dstAddr->isUsedFromReg())
+ {
genConsumeReg(dstAddr);
+ }
unsigned offset = 0;
- // If the size of this struct is larger than 16 bytes
- // let's use SSE2 to be able to do 16 byte at a time
- // loads and stores.
- if (size >= XMM_REGSIZE_BYTES)
+ // Grab the integer temp register to emit the loads and stores.
+ regMaskTP tmpMask = genFindLowestBit(cpBlkNode->gtRsvdRegs & RBM_ALLINT);
+ regNumber tmpReg = genRegNumFromMask(tmpMask);
+
+ if (size >= 2 * REGSIZE_BYTES)
{
- assert(cpBlkNode->gtRsvdRegs != RBM_NONE);
- assert(genCountBits(cpBlkNode->gtRsvdRegs) == 1);
- regNumber xmmReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs);
- assert(genIsValidFloatReg(xmmReg));
- size_t slots = size / XMM_REGSIZE_BYTES;
+ regMaskTP tmp2Mask = cpBlkNode->gtRsvdRegs & RBM_ALLINT & ~tmpMask;
+ regNumber tmp2Reg = genRegNumFromMask(tmp2Mask);
+
+ size_t slots = size / (2 * REGSIZE_BYTES);
while (slots-- > 0)
{
// Load
- genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, srcAddr, offset);
+ genCodeForLoadPairOffset(tmpReg, tmp2Reg, srcAddr, offset);
// Store
- genCodeForStoreOffset(INS_movdqu, EA_8BYTE, xmmReg, dstAddr, offset);
- offset += XMM_REGSIZE_BYTES;
+ genCodeForStorePairOffset(tmpReg, tmp2Reg, dstAddr, offset);
+ offset += 2 * REGSIZE_BYTES;
}
}
// Fill the remainder (15 bytes or less) if there's one.
if ((size & 0xf) != 0)
{
- // Grab the integer temp register to emit the remaining loads and stores.
- regNumber tmpReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLINT);
-
if ((size & 8) != 0)
{
- genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, srcAddr, offset);
- genCodeForStoreOffset(INS_mov, EA_8BYTE, tmpReg, dstAddr, offset);
+ genCodeForLoadOffset(INS_ldr, EA_8BYTE, tmpReg, srcAddr, offset);
+ genCodeForStoreOffset(INS_str, EA_8BYTE, tmpReg, dstAddr, offset);
offset += 8;
}
if ((size & 4) != 0)
{
- genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
- genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
+ genCodeForLoadOffset(INS_ldr, EA_4BYTE, tmpReg, srcAddr, offset);
+ genCodeForStoreOffset(INS_str, EA_4BYTE, tmpReg, dstAddr, offset);
offset += 4;
}
if ((size & 2) != 0)
{
- genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, srcAddr, offset);
- genCodeForStoreOffset(INS_mov, EA_2BYTE, tmpReg, dstAddr, offset);
+ genCodeForLoadOffset(INS_ldrh, EA_2BYTE, tmpReg, srcAddr, offset);
+ genCodeForStoreOffset(INS_strh, EA_2BYTE, tmpReg, dstAddr, offset);
offset += 2;
}
if ((size & 1) != 0)
{
- genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, srcAddr, offset);
- genCodeForStoreOffset(INS_mov, EA_1BYTE, tmpReg, dstAddr, offset);
+ genCodeForLoadOffset(INS_ldrb, EA_1BYTE, tmpReg, srcAddr, offset);
+ genCodeForStoreOffset(INS_strb, EA_1BYTE, tmpReg, dstAddr, offset);
}
}
-#else // !0
- NYI("genCodeForCpBlkUnroll");
-#endif // !0
}
// Generate code for CpObj nodes wich copy structs that have interleaved
@@ -3587,22 +3509,28 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
unsigned slots = cpObjNode->gtSlots;
emitter* emit = getEmitter();
+ BYTE* gcPtrs = cpObjNode->gtGcPtrs;
+
// If we can prove it's on the stack we don't need to use the write barrier.
if (dstOnStack)
{
// TODO-ARM64-CQ: Consider using LDP/STP to save codesize.
- while (slots > 0)
+ for (unsigned i = 0; i < slots; ++i)
{
- emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
+ emitAttr attr = EA_8BYTE;
+ if (gcPtrs[i] == GCT_GCREF)
+ attr = EA_GCREF;
+ else if (gcPtrs[i] == GCT_BYREF)
+ attr = EA_BYREF;
+
+ emit->emitIns_R_R_I(INS_ldr, attr, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
INS_OPTS_POST_INDEX);
- emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE,
+ emit->emitIns_R_R_I(INS_str, attr, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE,
INS_OPTS_POST_INDEX);
- slots--;
}
}
else
{
- BYTE* gcPtrs = cpObjNode->gtGcPtrs;
unsigned gcPtrCount = cpObjNode->gtGcPtrCount;
unsigned i = 0;
@@ -3619,8 +3547,9 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
break;
default:
- // We have a GC pointer, call the memory barrier.
+ // In the case of a GC-Pointer we'll call the ByRef write barrier helper
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
+
gcPtrCount--;
break;
}
@@ -3635,30 +3564,6 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER_SRC_BYREF | RBM_WRITE_BARRIER_DST_BYREF);
}
-// Generate code for a CpBlk node by the means of the VM memcpy helper call
-// Preconditions:
-// a) The size argument of the CpBlk is not an integer constant
-// b) The size argument is a constant but is larger than CPBLK_MOVS_LIMIT bytes.
-void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode)
-{
- // Make sure we got the arguments of the cpblk operation in the right registers
- unsigned blockSize = cpBlkNode->Size();
- GenTreePtr dstAddr = cpBlkNode->Addr();
- assert(!dstAddr->isContained());
-
- genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
-
-#if 0
- // Enable this when we support cpblk loop unrolling.
- if (blockSize != 0)
- {
- assert(blockSize->gtIntCon.gtIconVal >= CPBLK_UNROLL_LIMIT);
- }
-#endif // 0
-
- genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
-}
-
// generate code do a switch statement based on a table of ip-relative offsets
void CodeGen::genTableBasedSwitch(GenTree* treeNode)
{
@@ -3779,239 +3684,6 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
#endif // !0
}
-// generate code for BoundsCheck nodes
-void CodeGen::genRangeCheck(GenTreePtr oper)
-{
-#ifdef FEATURE_SIMD
- noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK || oper->OperGet() == GT_SIMD_CHK);
-#else // !FEATURE_SIMD
- noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
-#endif // !FEATURE_SIMD
-
- GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
-
- GenTreePtr arrLen = bndsChk->gtArrLen;
- GenTreePtr arrIndex = bndsChk->gtIndex;
- GenTreePtr arrRef = NULL;
- int lenOffset = 0;
-
- GenTree * src1, *src2;
- emitJumpKind jmpKind;
-
- genConsumeRegs(arrIndex);
- genConsumeRegs(arrLen);
-
- if (arrIndex->isContainedIntOrIImmed())
- {
- // To encode using a cmp immediate, we place the
- // constant operand in the second position
- src1 = arrLen;
- src2 = arrIndex;
- jmpKind = genJumpKindForOper(GT_LE, CK_UNSIGNED);
- }
- else
- {
- src1 = arrIndex;
- src2 = arrLen;
- jmpKind = genJumpKindForOper(GT_GE, CK_UNSIGNED);
- }
-
- GenTreeIntConCommon* intConst = nullptr;
- if (src2->isContainedIntOrIImmed())
- {
- intConst = src2->AsIntConCommon();
- }
-
- if (intConst != nullptr)
- {
- getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, src1->gtRegNum, intConst->IconValue());
- }
- else
- {
- getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, src1->gtRegNum, src2->gtRegNum);
- }
-
- genJumpToThrowHlpBlk(jmpKind, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
-}
-
-//------------------------------------------------------------------------
-// genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the
-// lower bound for the given dimension.
-//
-// Arguments:
-// elemType - the element type of the array
-// rank - the rank of the array
-// dimension - the dimension for which the lower bound offset will be returned.
-//
-// Return Value:
-// The offset.
-// TODO-Cleanup: move to CodeGenCommon.cpp
-
-// static
-unsigned CodeGen::genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension)
-{
- // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
- return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * (dimension + rank);
-}
-
-//------------------------------------------------------------------------
-// genOffsetOfMDArrayLength: Returns the offset from the Array object to the
-// size for the given dimension.
-//
-// Arguments:
-// elemType - the element type of the array
-// rank - the rank of the array
-// dimension - the dimension for which the lower bound offset will be returned.
-//
-// Return Value:
-// The offset.
-// TODO-Cleanup: move to CodeGenCommon.cpp
-
-// static
-unsigned CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension)
-{
- // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
- return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * dimension;
-}
-
-//------------------------------------------------------------------------
-// genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference,
-// producing the effective index by subtracting the lower bound.
-//
-// Arguments:
-// arrIndex - the node for which we're generating code
-//
-// Return Value:
-// None.
-//
-
-void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex)
-{
- emitter* emit = getEmitter();
- GenTreePtr arrObj = arrIndex->ArrObj();
- GenTreePtr indexNode = arrIndex->IndexExpr();
- regNumber arrReg = genConsumeReg(arrObj);
- regNumber indexReg = genConsumeReg(indexNode);
- regNumber tgtReg = arrIndex->gtRegNum;
- noway_assert(tgtReg != REG_NA);
-
- // We will use a temp register to load the lower bound and dimension size values
- //
- regMaskTP tmpRegsMask = arrIndex->gtRsvdRegs; // there will be two bits set
- tmpRegsMask &= ~genRegMask(tgtReg); // remove the bit for 'tgtReg' from 'tmpRegsMask'
-
- regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask
- regNumber tmpReg = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask
- noway_assert(tmpReg != REG_NA);
-
- assert(tgtReg != tmpReg);
-
- unsigned dim = arrIndex->gtCurrDim;
- unsigned rank = arrIndex->gtArrRank;
- var_types elemType = arrIndex->gtArrElemType;
- unsigned offset;
-
- offset = genOffsetOfMDArrayLowerBound(elemType, rank, dim);
- emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_8BYTE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load
- emit->emitIns_R_R_R(INS_sub, EA_4BYTE, tgtReg, indexReg, tmpReg);
-
- offset = genOffsetOfMDArrayDimensionSize(elemType, rank, dim);
- emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_8BYTE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load
- emit->emitIns_R_R(INS_cmp, EA_4BYTE, tgtReg, tmpReg);
-
- emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
- genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL);
-
- genProduceReg(arrIndex);
-}
-
-//------------------------------------------------------------------------
-// genCodeForArrOffset: Generates code to compute the flattened array offset for
-// one dimension of an array reference:
-// result = (prevDimOffset * dimSize) + effectiveIndex
-// where dimSize is obtained from the arrObj operand
-//
-// Arguments:
-// arrOffset - the node for which we're generating code
-//
-// Return Value:
-// None.
-//
-// Notes:
-// dimSize and effectiveIndex are always non-negative, the former by design,
-// and the latter because it has been normalized to be zero-based.
-
-void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
-{
- GenTreePtr offsetNode = arrOffset->gtOffset;
- GenTreePtr indexNode = arrOffset->gtIndex;
- regNumber tgtReg = arrOffset->gtRegNum;
-
- noway_assert(tgtReg != REG_NA);
-
- if (!offsetNode->IsIntegralConst(0))
- {
- emitter* emit = getEmitter();
- regNumber offsetReg = genConsumeReg(offsetNode);
- noway_assert(offsetReg != REG_NA);
- regNumber indexReg = genConsumeReg(indexNode);
- noway_assert(indexReg != REG_NA);
- GenTreePtr arrObj = arrOffset->gtArrObj;
- regNumber arrReg = genConsumeReg(arrObj);
- noway_assert(arrReg != REG_NA);
- regMaskTP tmpRegMask = arrOffset->gtRsvdRegs;
- regNumber tmpReg = genRegNumFromMask(tmpRegMask);
- noway_assert(tmpReg != REG_NA);
- unsigned dim = arrOffset->gtCurrDim;
- unsigned rank = arrOffset->gtArrRank;
- var_types elemType = arrOffset->gtArrElemType;
- unsigned offset = genOffsetOfMDArrayDimensionSize(elemType, rank, dim);
-
- // Load tmpReg with the dimension size
- emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_8BYTE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load
-
- // Evaluate tgtReg = offsetReg*dim_size + indexReg.
- emit->emitIns_R_R_R_R(INS_madd, EA_4BYTE, tgtReg, tmpReg, offsetReg, indexReg);
- }
- else
- {
- regNumber indexReg = genConsumeReg(indexNode);
- if (indexReg != tgtReg)
- {
- inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT);
- }
- }
- genProduceReg(arrOffset);
-}
-
-// make a temporary indir we can feed to pattern matching routines
-// in cases where we don't want to instantiate all the indirs that happen
-//
-// TODO-Cleanup: move to CodeGenCommon.cpp
-GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
-{
- GenTreeIndir i(GT_IND, type, base, nullptr);
- i.gtRegNum = REG_NA;
- // has to be nonnull (because contained nodes can't be the last in block)
- // but don't want it to be a valid pointer
- i.gtNext = (GenTree*)(-1);
- return i;
-}
-
-// make a temporary int we can feed to pattern matching routines
-// in cases where we don't want to instantiate
-//
-// TODO-Cleanup: move to CodeGenCommon.cpp
-GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
-{
- GenTreeIntCon i(type, value);
- i.gtRegNum = REG_NA;
- // has to be nonnull (because contained nodes can't be the last in block)
- // but don't want it to be a valid pointer
- i.gtNext = (GenTree*)(-1);
- return i;
-}
-
instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
{
instruction ins = INS_brk;
@@ -4098,414 +3770,6 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
return ins;
}
-//------------------------------------------------------------------------
-// genCodeForShift: Generates the code sequence for a GenTree node that
-// represents a bit shift or rotate operation (<<, >>, >>>, rol, ror).
-//
-// Arguments:
-// tree - the bit shift node (that specifies the type of bit shift to perform).
-//
-// Assumptions:
-// a) All GenTrees are register allocated.
-//
-void CodeGen::genCodeForShift(GenTreePtr tree)
-{
- var_types targetType = tree->TypeGet();
- genTreeOps oper = tree->OperGet();
- instruction ins = genGetInsForOper(oper, targetType);
- emitAttr size = emitTypeSize(tree);
-
- assert(tree->gtRegNum != REG_NA);
-
- GenTreePtr operand = tree->gtGetOp1();
- genConsumeOperands(tree->AsOp());
-
- GenTreePtr shiftBy = tree->gtGetOp2();
- if (!shiftBy->IsCnsIntOrI())
- {
- getEmitter()->emitIns_R_R_R(ins, size, tree->gtRegNum, operand->gtRegNum, shiftBy->gtRegNum);
- }
- else
- {
- unsigned immWidth = emitter::getBitWidth(size); // immWidth will be set to 32 or 64
- ssize_t shiftByImm = shiftBy->gtIntCon.gtIconVal & (immWidth - 1);
-
- getEmitter()->emitIns_R_R_I(ins, size, tree->gtRegNum, operand->gtRegNum, shiftByImm);
- }
-
- genProduceReg(tree);
-}
-
-void CodeGen::genRegCopy(GenTree* treeNode)
-{
- assert(treeNode->OperGet() == GT_COPY);
-
- var_types targetType = treeNode->TypeGet();
- regNumber targetReg = treeNode->gtRegNum;
- assert(targetReg != REG_NA);
-
- GenTree* op1 = treeNode->gtOp.gtOp1;
-
- // Check whether this node and the node from which we're copying the value have the same
- // register type.
- // This can happen if (currently iff) we have a SIMD vector type that fits in an integer
- // register, in which case it is passed as an argument, or returned from a call,
- // in an integer register and must be copied if it's in an xmm register.
-
- if (varTypeIsFloating(treeNode) != varTypeIsFloating(op1))
- {
- inst_RV_RV(INS_fmov, targetReg, genConsumeReg(op1), targetType);
- }
- else
- {
- inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType);
- }
-
- if (op1->IsLocal())
- {
- // The lclVar will never be a def.
- // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will
- // appropriately set the gcInfo for the copied value.
- // If not, there are two cases we need to handle:
- // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable
- // will remain live in its original register.
- // genProduceReg() will appropriately set the gcInfo for the copied value,
- // and genConsumeReg will reset it.
- // - Otherwise, we need to update register info for the lclVar.
-
- GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
- assert((lcl->gtFlags & GTF_VAR_DEF) == 0);
-
- if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0)
- {
- LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
-
- // If we didn't just spill it (in genConsumeReg, above), then update the register info
- if (varDsc->lvRegNum != REG_STK)
- {
- // The old location is dying
- genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1));
-
- gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum));
-
- genUpdateVarReg(varDsc, treeNode);
-
- // The new location is going live
- genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
- }
- }
- }
- genProduceReg(treeNode);
-}
-
-// Produce code for a GT_CALL node
-void CodeGen::genCallInstruction(GenTreePtr node)
-{
- GenTreeCall* call = node->AsCall();
-
- assert(call->gtOper == GT_CALL);
-
- gtCallTypes callType = (gtCallTypes)call->gtCallType;
-
- IL_OFFSETX ilOffset = BAD_IL_OFFSET;
-
- // all virtuals should have been expanded into a control expression
- assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
-
- // Consume all the arg regs
- for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
- {
- assert(list->OperIsList());
-
- GenTreePtr argNode = list->Current();
-
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy());
- assert(curArgTabEntry);
-
- if (curArgTabEntry->regNum == REG_STK)
- continue;
-
- // Deal with multi register passed struct args.
- if (argNode->OperGet() == GT_FIELD_LIST)
- {
- GenTreeArgList* argListPtr = argNode->AsArgList();
- unsigned iterationNum = 0;
- regNumber argReg = curArgTabEntry->regNum;
- for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++)
- {
- GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
- assert(putArgRegNode->gtOper == GT_PUTARG_REG);
-
- genConsumeReg(putArgRegNode);
-
- if (putArgRegNode->gtRegNum != argReg)
- {
- inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg,
- putArgRegNode->gtRegNum);
- }
-
- argReg = genRegArgNext(argReg);
- }
- }
- else
- {
- regNumber argReg = curArgTabEntry->regNum;
- genConsumeReg(argNode);
- if (argNode->gtRegNum != argReg)
- {
- inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum);
- }
- }
-
- // In the case of a varargs call,
- // the ABI dictates that if we have floating point args,
- // we must pass the enregistered arguments in both the
- // integer and floating point registers so, let's do that.
- if (call->IsVarargs() && varTypeIsFloating(argNode))
- {
- NYI_ARM64("CodeGen - IsVarargs");
- }
- }
-
- // Insert a null check on "this" pointer if asked.
- if (call->NeedsNullCheck())
- {
- const regNumber regThis = genGetThisArgReg(call);
- getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, regThis, 0);
- }
-
- // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method.
- CORINFO_METHOD_HANDLE methHnd;
- GenTree* target = call->gtControlExpr;
- if (callType == CT_INDIRECT)
- {
- assert(target == nullptr);
- target = call->gtCall.gtCallAddr;
- methHnd = nullptr;
- }
- else
- {
- methHnd = call->gtCallMethHnd;
- }
-
- CORINFO_SIG_INFO* sigInfo = nullptr;
-#ifdef DEBUG
- // Pass the call signature information down into the emitter so the emitter can associate
- // native call sites with the signatures they were generated from.
- if (callType != CT_HELPER)
- {
- sigInfo = call->callSig;
- }
-#endif // DEBUG
-
- // If fast tail call, then we are done. In this case we setup the args (both reg args
- // and stack args in incoming arg area) and call target in IP0. Epilog sequence would
- // generate "br IP0".
- if (call->IsFastTailCall())
- {
- // Don't support fast tail calling JIT helpers
- assert(callType != CT_HELPER);
-
- // Fast tail calls materialize call target either in gtControlExpr or in gtCallAddr.
- assert(target != nullptr);
-
- genConsumeReg(target);
-
- if (target->gtRegNum != REG_IP0)
- {
- inst_RV_RV(INS_mov, REG_IP0, target->gtRegNum);
- }
- return;
- }
-
- // For a pinvoke to unmanged code we emit a label to clear
- // the GC pointer state before the callsite.
- // We can't utilize the typical lazy killing of GC pointers
- // at (or inside) the callsite.
- if (call->IsUnmanaged())
- {
- genDefineTempLabel(genCreateTempLabel());
- }
-
- // Determine return value size(s).
- ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc();
- emitAttr retSize = EA_PTRSIZE;
- emitAttr secondRetSize = EA_UNKNOWN;
-
- if (call->HasMultiRegRetVal())
- {
- retSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(0));
- secondRetSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(1));
- }
- else
- {
- assert(!varTypeIsStruct(call));
-
- if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY)
- {
- retSize = EA_GCREF;
- }
- else if (call->gtType == TYP_BYREF)
- {
- retSize = EA_BYREF;
- }
- }
-
- // We need to propagate the IL offset information to the call instruction, so we can emit
- // an IL to native mapping record for the call, to support managed return value debugging.
- // We don't want tail call helper calls that were converted from normal calls to get a record,
- // so we skip this hash table lookup logic in that case.
- if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall())
- {
- (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
- }
-
- if (target != nullptr)
- {
- // For Arm64 a call target can not be a contained indirection
- assert(!target->isContainedIndir());
-
- // We have already generated code for gtControlExpr evaluating it into a register.
- // We just need to emit "call reg" in this case.
- //
- assert(genIsValidIntReg(target->gtRegNum));
-
- genEmitCall(emitter::EC_INDIR_R, methHnd,
- INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr
- retSize, secondRetSize, ilOffset, genConsumeReg(target));
- }
- else
- {
- // Generate a direct call to a non-virtual user defined or helper method
- assert(callType == CT_HELPER || callType == CT_USER_FUNC);
-
- void* addr = nullptr;
- if (callType == CT_HELPER)
- {
- // Direct call to a helper method.
- CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd);
- noway_assert(helperNum != CORINFO_HELP_UNDEF);
-
- void* pAddr = nullptr;
- addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
-
- if (addr == nullptr)
- {
- addr = pAddr;
- }
- }
- else
- {
- // Direct call to a non-virtual user function.
- CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
- if (call->IsSameThis())
- {
- aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
- }
-
- if ((call->NeedsNullCheck()) == 0)
- {
- aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
- }
-
- CORINFO_CONST_LOOKUP addrInfo;
- compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags);
-
- addr = addrInfo.addr;
- }
-#if 0
- // Use this path if you want to load an absolute call target using
- // a sequence of movs followed by an indirect call (blr instruction)
-
- // Load the call target address in x16
- instGen_Set_Reg_To_Imm(EA_8BYTE, REG_IP0, (ssize_t) addr);
-
- // indirect call to constant address in IP0
- genEmitCall(emitter::EC_INDIR_R,
- methHnd,
- INDEBUG_LDISASM_COMMA(sigInfo)
- nullptr, //addr
- retSize,
- secondRetSize,
- ilOffset,
- REG_IP0);
-#else
- // Non-virtual direct call to known addresses
- genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, retSize, secondRetSize,
- ilOffset);
-#endif
- }
-
- // if it was a pinvoke we may have needed to get the address of a label
- if (genPendingCallLabel)
- {
- assert(call->IsUnmanaged());
- genDefineTempLabel(genPendingCallLabel);
- genPendingCallLabel = nullptr;
- }
-
- // Update GC info:
- // All Callee arg registers are trashed and no longer contain any GC pointers.
- // TODO-ARM64-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here?
- // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other
- // registers from RBM_CALLEE_TRASH
- assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
- assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
- gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS;
- gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS;
-
- var_types returnType = call->TypeGet();
- if (returnType != TYP_VOID)
- {
- regNumber returnReg;
-
- if (call->HasMultiRegRetVal())
- {
- assert(pRetTypeDesc != nullptr);
- unsigned regCount = pRetTypeDesc->GetReturnRegCount();
-
- // If regs allocated to call node are different from ABI return
- // regs in which the call has returned its result, move the result
- // to regs allocated to call node.
- for (unsigned i = 0; i < regCount; ++i)
- {
- var_types regType = pRetTypeDesc->GetReturnRegType(i);
- returnReg = pRetTypeDesc->GetABIReturnReg(i);
- regNumber allocatedReg = call->GetRegNumByIdx(i);
- if (returnReg != allocatedReg)
- {
- inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType);
- }
- }
- }
- else
- {
- if (varTypeIsFloating(returnType))
- {
- returnReg = REG_FLOATRET;
- }
- else
- {
- returnReg = REG_INTRET;
- }
-
- if (call->gtRegNum != returnReg)
- {
- inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType);
- }
- }
-
- genProduceReg(call);
- }
-
- // If there is nothing next, that means the result is thrown away, so this value is not live.
- // However, for minopts or debuggable code, we keep it live to support managed return value debugging.
- if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode)
- {
- gcInfo.gcMarkRegSetNpt(RBM_INTRET);
- }
-}
-
// Produce code for a GT_JMP node.
// The arguments of the caller needs to be transferred to the callee before exiting caller.
// The actual jump to callee is generated as part of caller epilog sequence.
@@ -4767,13 +4031,12 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
if (lsl > 0)
{
// Generate code to set tmpReg = base + index*scale
- emit->emitIns_R_R_R_I(INS_add, EA_PTRSIZE, tmpReg, memBase->gtRegNum, index->gtRegNum, lsl,
- INS_OPTS_LSL);
+ emit->emitIns_R_R_R_I(INS_add, size, tmpReg, memBase->gtRegNum, index->gtRegNum, lsl, INS_OPTS_LSL);
}
else // no scale
{
// Generate code to set tmpReg = base + index
- emit->emitIns_R_R_R(INS_add, EA_PTRSIZE, tmpReg, memBase->gtRegNum, index->gtRegNum);
+ emit->emitIns_R_R_R(INS_add, size, tmpReg, memBase->gtRegNum, index->gtRegNum);
}
// Then compute target reg from [tmpReg + offset]
@@ -4786,7 +4049,7 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
// Then add the base register
// rd = rd + base
- emit->emitIns_R_R_R(INS_add, EA_PTRSIZE, tmpReg, tmpReg, memBase->gtRegNum);
+ emit->emitIns_R_R_R(INS_add, size, tmpReg, tmpReg, memBase->gtRegNum);
noway_assert(tmpReg != index->gtRegNum);
@@ -5035,237 +4298,6 @@ void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree)
}
//------------------------------------------------------------------------
-// genIntToIntCast: Generate code for an integer cast
-// This method handles integer overflow checking casts
-// as well as ordinary integer casts.
-//
-// Arguments:
-// treeNode - The GT_CAST node
-//
-// Return Value:
-// None.
-//
-// Assumptions:
-// The treeNode is not a contained node and must have an assigned register.
-// For a signed convert from byte, the source must be in a byte-addressable register.
-// Neither the source nor target type can be a floating point type.
-//
-// TODO-ARM64-CQ: Allow castOp to be a contained node without an assigned register.
-//
-void CodeGen::genIntToIntCast(GenTreePtr treeNode)
-{
- assert(treeNode->OperGet() == GT_CAST);
-
- GenTreePtr castOp = treeNode->gtCast.CastOp();
- emitter* emit = getEmitter();
-
- var_types dstType = treeNode->CastToType();
- var_types srcType = genActualType(castOp->TypeGet());
- emitAttr movSize = emitActualTypeSize(dstType);
- bool movRequired = false;
-
- regNumber targetReg = treeNode->gtRegNum;
- regNumber sourceReg = castOp->gtRegNum;
-
- // For Long to Int conversion we will have a reserved integer register to hold the immediate mask
- regNumber tmpReg = (treeNode->gtRsvdRegs == RBM_NONE) ? REG_NA : genRegNumFromMask(treeNode->gtRsvdRegs);
-
- assert(genIsValidIntReg(targetReg));
- assert(genIsValidIntReg(sourceReg));
-
- instruction ins = INS_invalid;
-
- genConsumeReg(castOp);
- Lowering::CastInfo castInfo;
-
- // Get information about the cast.
- Lowering::getCastDescription(treeNode, &castInfo);
-
- if (castInfo.requiresOverflowCheck)
- {
-
- emitAttr cmpSize = EA_ATTR(genTypeSize(srcType));
-
- if (castInfo.signCheckOnly)
- {
- // We only need to check for a negative value in sourceReg
- emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, 0);
- emitJumpKind jmpLT = genJumpKindForOper(GT_LT, CK_SIGNED);
- genJumpToThrowHlpBlk(jmpLT, SCK_OVERFLOW);
- noway_assert(genTypeSize(srcType) == 4 || genTypeSize(srcType) == 8);
- // This is only interesting case to ensure zero-upper bits.
- if ((srcType == TYP_INT) && (dstType == TYP_ULONG))
- {
- // cast to TYP_ULONG:
- // We use a mov with size=EA_4BYTE
- // which will zero out the upper bits
- movSize = EA_4BYTE;
- movRequired = true;
- }
- }
- else if (castInfo.unsignedSource || castInfo.unsignedDest)
- {
- // When we are converting from/to unsigned,
- // we only have to check for any bits set in 'typeMask'
-
- noway_assert(castInfo.typeMask != 0);
- emit->emitIns_R_I(INS_tst, cmpSize, sourceReg, castInfo.typeMask);
- emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
- genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
- }
- else
- {
- // For a narrowing signed cast
- //
- // We must check the value is in a signed range.
-
- // Compare with the MAX
-
- noway_assert((castInfo.typeMin != 0) && (castInfo.typeMax != 0));
-
- if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMax, cmpSize))
- {
- emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, castInfo.typeMax);
- }
- else
- {
- noway_assert(tmpReg != REG_NA);
- instGen_Set_Reg_To_Imm(cmpSize, tmpReg, castInfo.typeMax);
- emit->emitIns_R_R(INS_cmp, cmpSize, sourceReg, tmpReg);
- }
-
- emitJumpKind jmpGT = genJumpKindForOper(GT_GT, CK_SIGNED);
- genJumpToThrowHlpBlk(jmpGT, SCK_OVERFLOW);
-
- // Compare with the MIN
-
- if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMin, cmpSize))
- {
- emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, castInfo.typeMin);
- }
- else
- {
- noway_assert(tmpReg != REG_NA);
- instGen_Set_Reg_To_Imm(cmpSize, tmpReg, castInfo.typeMin);
- emit->emitIns_R_R(INS_cmp, cmpSize, sourceReg, tmpReg);
- }
-
- emitJumpKind jmpLT = genJumpKindForOper(GT_LT, CK_SIGNED);
- genJumpToThrowHlpBlk(jmpLT, SCK_OVERFLOW);
- }
- ins = INS_mov;
- }
- else // Non-overflow checking cast.
- {
- if (genTypeSize(srcType) == genTypeSize(dstType))
- {
- ins = INS_mov;
- }
- else
- {
- var_types extendType = TYP_UNKNOWN;
-
- // If we need to treat a signed type as unsigned
- if ((treeNode->gtFlags & GTF_UNSIGNED) != 0)
- {
- extendType = genUnsignedType(srcType);
- movSize = emitTypeSize(extendType);
- movRequired = true;
- }
- else
- {
- if (genTypeSize(srcType) < genTypeSize(dstType))
- {
- extendType = srcType;
- if (srcType == TYP_UINT)
- {
- // If we are casting from a smaller type to
- // a larger type, then we need to make sure the
- // higher 4 bytes are zero to gaurentee the correct value.
- // Therefore using a mov with EA_4BYTE in place of EA_8BYTE
- // will zero the upper bits
- movSize = EA_4BYTE;
- movRequired = true;
- }
- }
- else // (genTypeSize(srcType) > genTypeSize(dstType))
- {
- extendType = dstType;
- if (dstType == TYP_INT)
- {
- movSize = EA_8BYTE; // a sxtw instruction requires EA_8BYTE
- }
- }
- }
-
- ins = ins_Move_Extend(extendType, castOp->InReg());
- }
- }
-
- // We should never be generating a load from memory instruction here!
- assert(!emit->emitInsIsLoad(ins));
-
- if ((ins != INS_mov) || movRequired || (targetReg != sourceReg))
- {
- emit->emitIns_R_R(ins, movSize, targetReg, sourceReg);
- }
-
- genProduceReg(treeNode);
-}
-
-//------------------------------------------------------------------------
-// genFloatToFloatCast: Generate code for a cast between float and double
-//
-// Arguments:
-// treeNode - The GT_CAST node
-//
-// Return Value:
-// None.
-//
-// Assumptions:
-// Cast is a non-overflow conversion.
-// The treeNode must have an assigned register.
-// The cast is between float and double or vice versa.
-//
-void CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
-{
- // float <--> double conversions are always non-overflow ones
- assert(treeNode->OperGet() == GT_CAST);
- assert(!treeNode->gtOverflow());
-
- regNumber targetReg = treeNode->gtRegNum;
- assert(genIsValidFloatReg(targetReg));
-
- GenTreePtr op1 = treeNode->gtOp.gtOp1;
- assert(!op1->isContained()); // Cannot be contained
- assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg.
-
- var_types dstType = treeNode->CastToType();
- var_types srcType = op1->TypeGet();
- assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
-
- genConsumeOperands(treeNode->AsOp());
-
- // treeNode must be a reg
- assert(!treeNode->isContained());
-
- if (srcType != dstType)
- {
- insOpts cvtOption = (srcType == TYP_FLOAT) ? INS_OPTS_S_TO_D // convert Single to Double
- : INS_OPTS_D_TO_S; // convert Double to Single
-
- getEmitter()->emitIns_R_R(INS_fcvt, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum, cvtOption);
- }
- else if (treeNode->gtRegNum != op1->gtRegNum)
- {
- // If double to double cast or float to float cast. Emit a move instruction.
- getEmitter()->emitIns_R_R(INS_mov, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum);
- }
-
- genProduceReg(treeNode);
-}
-
-//------------------------------------------------------------------------
// genIntToFloatCast: Generate code to cast an int/long to float/double
//
// Arguments:
@@ -5543,546 +4575,6 @@ int CodeGenInterface::genCallerSPtoInitialSPdelta()
return callerSPtoSPdelta;
}
-//---------------------------------------------------------------------
-// genIntrinsic - generate code for a given intrinsic
-//
-// Arguments
-// treeNode - the GT_INTRINSIC node
-//
-// Return value:
-// None
-//
-void CodeGen::genIntrinsic(GenTreePtr treeNode)
-{
- // Both operand and its result must be of the same floating point type.
- GenTreePtr srcNode = treeNode->gtOp.gtOp1;
- assert(varTypeIsFloating(srcNode));
- assert(srcNode->TypeGet() == treeNode->TypeGet());
-
- // Right now only Abs/Round/Sqrt are treated as math intrinsics.
- //
- switch (treeNode->gtIntrinsic.gtIntrinsicId)
- {
- case CORINFO_INTRINSIC_Abs:
- genConsumeOperands(treeNode->AsOp());
- getEmitter()->emitInsBinary(INS_fabs, emitTypeSize(treeNode), treeNode, srcNode);
- break;
-
- case CORINFO_INTRINSIC_Round:
- genConsumeOperands(treeNode->AsOp());
- getEmitter()->emitInsBinary(INS_frintn, emitTypeSize(treeNode), treeNode, srcNode);
- break;
-
- case CORINFO_INTRINSIC_Sqrt:
- genConsumeOperands(treeNode->AsOp());
- getEmitter()->emitInsBinary(INS_fsqrt, emitTypeSize(treeNode), treeNode, srcNode);
- break;
-
- default:
- assert(!"genIntrinsic: Unsupported intrinsic");
- unreached();
- }
-
- genProduceReg(treeNode);
-}
-
-//---------------------------------------------------------------------
-// genPutArgStk - generate code for a GT_PUTARG_STK node
-//
-// Arguments
-// treeNode - the GT_PUTARG_STK node
-//
-// Return value:
-// None
-//
-void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
-{
- assert(treeNode->OperGet() == GT_PUTARG_STK);
- var_types targetType = treeNode->TypeGet();
- GenTreePtr source = treeNode->gtOp.gtOp1;
- emitter* emit = getEmitter();
-
- // This is the varNum for our store operations,
- // typically this is the varNum for the Outgoing arg space
- // When we are generating a tail call it will be the varNum for arg0
- unsigned varNumOut;
- unsigned argOffsetMax; // Records the maximum size of this area for assert checks
-
- // This is the varNum for our load operations,
- // only used when we have a multireg struct with a LclVar source
- unsigned varNumInp = BAD_VAR_NUM;
-
- // Get argument offset to use with 'varNumOut'
- // Here we cross check that argument offset hasn't changed from lowering to codegen since
- // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
- unsigned argOffsetOut = treeNode->AsPutArgStk()->gtSlotNum * TARGET_POINTER_SIZE;
-
-#ifdef DEBUG
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode);
- assert(curArgTabEntry);
- assert(argOffsetOut == (curArgTabEntry->slotNum * TARGET_POINTER_SIZE));
-#endif // DEBUG
-
-#if FEATURE_FASTTAILCALL
- bool putInIncomingArgArea = treeNode->AsPutArgStk()->putInIncomingArgArea;
-#else
- const bool putInIncomingArgArea = false;
-#endif
- // Whether to setup stk arg in incoming or out-going arg area?
- // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area.
- // All other calls - stk arg is setup in out-going arg area.
- if (putInIncomingArgArea)
- {
- varNumOut = getFirstArgWithStackSlot();
- argOffsetMax = compiler->compArgSize;
-#if FEATURE_FASTTAILCALL
- // This must be a fast tail call.
- assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall());
-
- // Since it is a fast tail call, the existence of first incoming arg is guaranteed
- // because fast tail call requires that in-coming arg area of caller is >= out-going
- // arg area required for tail call.
- LclVarDsc* varDsc = &(compiler->lvaTable[varNumOut]);
- assert(varDsc != nullptr);
-#endif // FEATURE_FASTTAILCALL
- }
- else
- {
- varNumOut = compiler->lvaOutgoingArgSpaceVar;
- argOffsetMax = compiler->lvaOutgoingArgSpaceSize;
- }
- bool isStruct = (targetType == TYP_STRUCT) || (source->OperGet() == GT_FIELD_LIST);
-
- if (!isStruct) // a normal non-Struct argument
- {
- instruction storeIns = ins_Store(targetType);
- emitAttr storeAttr = emitTypeSize(targetType);
-
- // If it is contained then source must be the integer constant zero
- if (source->isContained())
- {
- assert(source->OperGet() == GT_CNS_INT);
- assert(source->AsIntConCommon()->IconValue() == 0);
- emit->emitIns_S_R(storeIns, storeAttr, REG_ZR, varNumOut, argOffsetOut);
- }
- else
- {
- genConsumeReg(source);
- emit->emitIns_S_R(storeIns, storeAttr, source->gtRegNum, varNumOut, argOffsetOut);
- }
- argOffsetOut += EA_SIZE_IN_BYTES(storeAttr);
- assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
- }
- else // We have some kind of a struct argument
- {
- assert(source->isContained()); // We expect that this node was marked as contained in LowerArm64
-
- if (source->OperGet() == GT_FIELD_LIST)
- {
- // Deal with the multi register passed struct args.
- GenTreeFieldList* fieldListPtr = source->AsFieldList();
-
- // Evaluate each of the GT_FIELD_LIST items into their register
- // and store their register into the outgoing argument area
- for (; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest())
- {
- GenTreePtr nextArgNode = fieldListPtr->gtOp.gtOp1;
- genConsumeReg(nextArgNode);
-
- regNumber reg = nextArgNode->gtRegNum;
- var_types type = nextArgNode->TypeGet();
- emitAttr attr = emitTypeSize(type);
-
- // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing
- // argument area
- emit->emitIns_S_R(ins_Store(type), attr, reg, varNumOut, argOffsetOut);
- argOffsetOut += EA_SIZE_IN_BYTES(attr);
- assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
- }
- }
- else // We must have a GT_OBJ or a GT_LCL_VAR
- {
- noway_assert((source->OperGet() == GT_LCL_VAR) || (source->OperGet() == GT_OBJ));
-
- var_types targetType = source->TypeGet();
- noway_assert(varTypeIsStruct(targetType));
-
- // We will copy this struct to the stack, possibly using a ldp instruction
- // Setup loReg and hiReg from the internal registers that we reserved in lower.
- //
- regNumber loReg = REG_NA;
- regNumber hiReg = REG_NA;
- regNumber addrReg = REG_NA;
-
- // In lowerArm64/TreeNodeInfoInitPutArgStk we have reserved two internal integer registers
- genGetRegPairFromMask(treeNode->gtRsvdRegs, &loReg, &hiReg);
-
- GenTreeLclVarCommon* varNode = nullptr;
- GenTreePtr addrNode = nullptr;
-
- if (source->OperGet() == GT_LCL_VAR)
- {
- varNode = source->AsLclVarCommon();
- }
- else // we must have a GT_OBJ
- {
- assert(source->OperGet() == GT_OBJ);
-
- addrNode = source->gtOp.gtOp1;
-
- // addrNode can either be a GT_LCL_VAR_ADDR or an address expression
- //
- if (addrNode->OperGet() == GT_LCL_VAR_ADDR)
- {
- // We have a GT_OBJ(GT_LCL_VAR_ADDR)
- //
- // We will treat this case the same as above
- // (i.e if we just had this GT_LCL_VAR directly as the source)
- // so update 'source' to point this GT_LCL_VAR_ADDR node
- // and continue to the codegen for the LCL_VAR node below
- //
- varNode = addrNode->AsLclVarCommon();
- addrNode = nullptr;
- }
- }
-
- // Either varNode or addrNOde must have been setup above,
- // the xor ensures that only one of the two is setup, not both
- assert((varNode != nullptr) ^ (addrNode != nullptr));
-
- BYTE gcPtrs[MAX_ARG_REG_COUNT] = {}; // TYPE_GC_NONE = 0
- BYTE* structGcLayout = &gcPtrs[0]; // The GC layout for the struct
- unsigned gcPtrCount; // The count of GC pointers in the struct
- int structSize;
- bool isHfa;
-
- // Setup the structSize, isHFa, and gcPtrCount
- if (varNode != nullptr)
- {
- varNumInp = varNode->gtLclNum;
- assert(varNumInp < compiler->lvaCount);
- LclVarDsc* varDsc = &compiler->lvaTable[varNumInp];
-
- assert(varDsc->lvType == TYP_STRUCT);
- assert(varDsc->lvOnFrame); // This struct also must live in the stack frame
- assert(!varDsc->lvRegister); // And it can't live in a register (SIMD)
-
- structSize = varDsc->lvSize(); // This yields the roundUp size, but that is fine
- // as that is how much stack is allocated for this LclVar
- isHfa = varDsc->lvIsHfa();
- gcPtrCount = varDsc->lvStructGcCount;
- structGcLayout = varDsc->lvGcLayout;
- }
- else // addrNode is used
- {
- assert(addrNode != nullptr);
-
- // Generate code to load the address that we need into a register
- genConsumeAddress(addrNode);
- addrReg = addrNode->gtRegNum;
-
- CORINFO_CLASS_HANDLE objClass = source->gtObj.gtClass;
-
- structSize = compiler->info.compCompHnd->getClassSize(objClass);
- isHfa = compiler->IsHfa(objClass);
- gcPtrCount = compiler->info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
- }
-
- bool hasGCpointers = (gcPtrCount > 0); // true if there are any GC pointers in the struct
-
- // If we have an HFA we can't have any GC pointers,
- // if not then the max size for the the struct is 16 bytes
- if (isHfa)
- {
- noway_assert(gcPtrCount == 0);
- }
- else
- {
- noway_assert(structSize <= 2 * TARGET_POINTER_SIZE);
- }
-
- noway_assert(structSize <= MAX_PASS_MULTIREG_BYTES);
-
- // For a 16-byte structSize with GC pointers we will use two ldr and two str instructions
- // ldr x2, [x0]
- // ldr x3, [x0, #8]
- // str x2, [sp, #16]
- // str x3, [sp, #24]
- //
- // For a 16-byte structSize with no GC pointers we will use a ldp and two str instructions
- // ldp x2, x3, [x0]
- // str x2, [sp, #16]
- // str x3, [sp, #24]
- //
- // For a 32-byte structSize with no GC pointers we will use two ldp and four str instructions
- // ldp x2, x3, [x0]
- // str x2, [sp, #16]
- // str x3, [sp, #24]
- // ldp x2, x3, [x0]
- // str x2, [sp, #32]
- // str x3, [sp, #40]
- //
- // Note that when loading from a varNode we currently can't use the ldp instruction
- // TODO-ARM64-CQ: Implement support for using a ldp instruction with a varNum (see emitIns_R_S)
- //
-
- int remainingSize = structSize;
- unsigned structOffset = 0;
- unsigned nextIndex = 0;
-
- while (remainingSize >= 2 * TARGET_POINTER_SIZE)
- {
- var_types type0 = compiler->getJitGCType(gcPtrs[nextIndex + 0]);
- var_types type1 = compiler->getJitGCType(gcPtrs[nextIndex + 1]);
-
- if (hasGCpointers)
- {
- // We have GC pointers, so use two ldr instructions
- //
- // We must do it this way because we can't currently pass or track
- // two different emitAttr values for a ldp instruction.
-
- // Make sure that the first load instruction does not overwrite the addrReg.
- //
- if (loReg != addrReg)
- {
- if (varNode != nullptr)
- {
- // Load from our varNumImp source
- emit->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), loReg, varNumInp, 0);
- emit->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), hiReg, varNumInp,
- TARGET_POINTER_SIZE);
- }
- else
- {
- // Load from our address expression source
- emit->emitIns_R_R_I(ins_Load(type0), emitTypeSize(type0), loReg, addrReg, structOffset);
- emit->emitIns_R_R_I(ins_Load(type1), emitTypeSize(type1), hiReg, addrReg,
- structOffset + TARGET_POINTER_SIZE);
- }
- }
- else // loReg == addrReg
- {
- assert(varNode == nullptr); // because addrReg is REG_NA when varNode is non-null
- assert(hiReg != addrReg);
- // Load from our address expression source
- emit->emitIns_R_R_I(ins_Load(type1), emitTypeSize(type1), hiReg, addrReg,
- structOffset + TARGET_POINTER_SIZE);
- emit->emitIns_R_R_I(ins_Load(type0), emitTypeSize(type0), loReg, addrReg, structOffset);
- }
- }
- else // our struct has no GC pointers
- {
- if (varNode != nullptr)
- {
- // Load from our varNumImp source, currently we can't use a ldp instruction to do this
- emit->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), loReg, varNumInp, 0);
- emit->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), hiReg, varNumInp, TARGET_POINTER_SIZE);
- }
- else
- {
- // Use a ldp instruction
-
- // Load from our address expression source
- emit->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, loReg, hiReg, addrReg, structOffset);
- }
- }
-
- // Emit two store instructions to store the two registers into the outgoing argument area
- emit->emitIns_S_R(ins_Store(type0), emitTypeSize(type0), loReg, varNumOut, argOffsetOut);
- emit->emitIns_S_R(ins_Store(type1), emitTypeSize(type1), hiReg, varNumOut,
- argOffsetOut + TARGET_POINTER_SIZE);
- argOffsetOut += (2 * TARGET_POINTER_SIZE); // We stored 16-bytes of the struct
- assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
-
- remainingSize -= (2 * TARGET_POINTER_SIZE); // We loaded 16-bytes of the struct
- structOffset += (2 * TARGET_POINTER_SIZE);
- nextIndex += 2;
- }
-
- // For a 12-byte structSize we will we will generate two load instructions
- // ldr x2, [x0]
- // ldr w3, [x0, #8]
- // str x2, [sp, #16]
- // str w3, [sp, #24]
- //
- // When the first instruction has a loReg that is the same register as the addrReg,
- // we set deferLoad to true and issue the intructions in the reverse order
- // ldr x3, [x2, #8]
- // ldr x2, [x2]
- // str x2, [sp, #16]
- // str x3, [sp, #24]
- //
-
- var_types nextType = compiler->getJitGCType(gcPtrs[nextIndex]);
- emitAttr nextAttr = emitTypeSize(nextType);
- regNumber curReg = loReg;
-
- bool deferLoad = false;
- var_types deferType = TYP_UNKNOWN;
- emitAttr deferAttr = EA_PTRSIZE;
- int deferOffset = 0;
-
- while (remainingSize > 0)
- {
- if (remainingSize >= TARGET_POINTER_SIZE)
- {
- remainingSize -= TARGET_POINTER_SIZE;
-
- if ((curReg == addrReg) && (remainingSize != 0))
- {
- deferLoad = true;
- deferType = nextType;
- deferAttr = emitTypeSize(nextType);
- deferOffset = structOffset;
- }
- else // the typical case
- {
- if (varNode != nullptr)
- {
- // Load from our varNumImp source
- emit->emitIns_R_S(ins_Load(nextType), nextAttr, curReg, varNumInp, structOffset);
- }
- else
- {
- // Load from our address expression source
- emit->emitIns_R_R_I(ins_Load(nextType), nextAttr, curReg, addrReg, structOffset);
- }
- // Emit a store instruction to store the register into the outgoing argument area
- emit->emitIns_S_R(ins_Store(nextType), nextAttr, curReg, varNumOut, argOffsetOut);
- argOffsetOut += EA_SIZE_IN_BYTES(nextAttr);
- assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
- }
- curReg = hiReg;
- structOffset += TARGET_POINTER_SIZE;
- nextIndex++;
- nextType = compiler->getJitGCType(gcPtrs[nextIndex]);
- nextAttr = emitTypeSize(nextType);
- }
- else // (remainingSize < TARGET_POINTER_SIZE)
- {
- int loadSize = remainingSize;
- remainingSize = 0;
-
- // We should never have to do a non-pointer sized load when we have a LclVar source
- assert(varNode == nullptr);
-
- // the left over size is smaller than a pointer and thus can never be a GC type
- assert(varTypeIsGC(nextType) == false);
-
- var_types loadType = TYP_UINT;
- if (loadSize == 1)
- {
- loadType = TYP_UBYTE;
- }
- else if (loadSize == 2)
- {
- loadType = TYP_USHORT;
- }
- else
- {
- // Need to handle additional loadSize cases here
- noway_assert(loadSize == 4);
- }
-
- instruction loadIns = ins_Load(loadType);
- emitAttr loadAttr = emitAttr(loadSize);
-
- // When deferLoad is false, curReg can be the same as addrReg
- // because the last instruction is allowed to overwrite addrReg.
- //
- noway_assert(!deferLoad || (curReg != addrReg));
-
- emit->emitIns_R_R_I(loadIns, loadAttr, curReg, addrReg, structOffset);
-
- // Emit a store instruction to store the register into the outgoing argument area
- emit->emitIns_S_R(ins_Store(loadType), loadAttr, curReg, varNumOut, argOffsetOut);
- argOffsetOut += EA_SIZE_IN_BYTES(loadAttr);
- assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
- }
- }
-
- if (deferLoad)
- {
- // We should never have to do a deferred load when we have a LclVar source
- assert(varNode == nullptr);
-
- curReg = addrReg;
-
- // Load from our address expression source
- emit->emitIns_R_R_I(ins_Load(deferType), deferAttr, curReg, addrReg, deferOffset);
-
- // Emit a store instruction to store the register into the outgoing argument area
- emit->emitIns_S_R(ins_Store(nextType), nextAttr, curReg, varNumOut, argOffsetOut);
- argOffsetOut += EA_SIZE_IN_BYTES(nextAttr);
- assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
- }
- }
- }
-}
-
-/*****************************************************************************
- *
- * Create and record GC Info for the function.
- */
-void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
- unsigned prologSize,
- unsigned epilogSize DEBUGARG(void* codePtr))
-{
- genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
-}
-
-void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
-{
- IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
- GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC)
- GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
- assert(gcInfoEncoder != nullptr);
-
- // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
- gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
-
- // First we figure out the encoder ID's for the stack slots and registers.
- gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS);
-
- // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
- gcInfoEncoder->FinalizeSlotIds();
-
- // Now we can actually use those slot ID's to declare live ranges.
- gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
-
- if (compiler->opts.compDbgEnC)
- {
- // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp)
- // which is:
- // -return address
- // -saved off RBP
- // -saved 'this' pointer and bool for synchronized methods
-
- // 4 slots for RBP + return address + RSI + RDI
- int preservedAreaSize = 4 * REGSIZE_BYTES;
-
- if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
- {
- if (!(compiler->info.compFlags & CORINFO_FLG_STATIC))
- preservedAreaSize += REGSIZE_BYTES;
-
- preservedAreaSize += 1; // bool for synchronized methods
- }
-
- // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the
- // frame
- gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
- }
-
- gcInfoEncoder->Build();
-
- // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
- // let's save the values anyway for debugging purposes
- compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
- compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
-}
-
/*****************************************************************************
* Emit a call to a helper function.
*
diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp
new file mode 100644
index 0000000000..af9fdfed9c
--- /dev/null
+++ b/src/jit/codegenarmarch.cpp
@@ -0,0 +1,1687 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX ARM/ARM64 Code Generator Common Code XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+
+#ifdef _TARGET_ARMARCH_ // This file is ONLY used for ARM and ARM64 architectures
+
+#include "codegen.h"
+#include "lower.h"
+#include "gcinfo.h"
+#include "emit.h"
+
+//------------------------------------------------------------------------
+// genSetRegToIcon: Generate code that will set the given register to the integer constant.
+//
+void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
+{
+ // Reg cannot be a FP reg
+ assert(!genIsValidFloatReg(reg));
+
+ // The only TYP_REF constant that can come this path is a managed 'null' since it is not
+ // relocatable. Other ref type constants (e.g. string objects) go through a different
+ // code path.
+ noway_assert(type != TYP_REF || val == 0);
+
+ instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
+}
+
+//---------------------------------------------------------------------
+// genIntrinsic - generate code for a given intrinsic
+//
+// Arguments
+// treeNode - the GT_INTRINSIC node
+//
+// Return value:
+// None
+//
+void CodeGen::genIntrinsic(GenTreePtr treeNode)
+{
+ // Both operand and its result must be of the same floating point type.
+ GenTreePtr srcNode = treeNode->gtOp.gtOp1;
+ assert(varTypeIsFloating(srcNode));
+ assert(srcNode->TypeGet() == treeNode->TypeGet());
+
+ // Right now only Abs/Round/Sqrt are treated as math intrinsics.
+ //
+ switch (treeNode->gtIntrinsic.gtIntrinsicId)
+ {
+ case CORINFO_INTRINSIC_Abs:
+ genConsumeOperands(treeNode->AsOp());
+ getEmitter()->emitInsBinary(INS_ABS, emitTypeSize(treeNode), treeNode, srcNode);
+ break;
+
+ case CORINFO_INTRINSIC_Round:
+ NYI_ARM("genIntrinsic for round - not implemented yet");
+ genConsumeOperands(treeNode->AsOp());
+ getEmitter()->emitInsBinary(INS_ROUND, emitTypeSize(treeNode), treeNode, srcNode);
+ break;
+
+ case CORINFO_INTRINSIC_Sqrt:
+ genConsumeOperands(treeNode->AsOp());
+ getEmitter()->emitInsBinary(INS_SQRT, emitTypeSize(treeNode), treeNode, srcNode);
+ break;
+
+ default:
+ assert(!"genIntrinsic: Unsupported intrinsic");
+ unreached();
+ }
+
+ genProduceReg(treeNode);
+}
+
+//---------------------------------------------------------------------
+// genPutArgStk - generate code for a GT_PUTARG_STK node
+//
+// Arguments
+// treeNode - the GT_PUTARG_STK node
+//
+// Return value:
+// None
+//
+void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
+{
+ assert(treeNode->OperGet() == GT_PUTARG_STK);
+ var_types targetType = treeNode->TypeGet();
+ GenTreePtr source = treeNode->gtOp1;
+ emitter* emit = getEmitter();
+
+ // This is the varNum for our store operations,
+ // typically this is the varNum for the Outgoing arg space
+ // When we are generating a tail call it will be the varNum for arg0
+ unsigned varNumOut = (unsigned)-1;
+ unsigned argOffsetMax = (unsigned)-1; // Records the maximum size of this area for assert checks
+
+ // Get argument offset to use with 'varNumOut'
+ // Here we cross check that argument offset hasn't changed from lowering to codegen since
+ // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
+ unsigned argOffsetOut = treeNode->gtSlotNum * TARGET_POINTER_SIZE;
+
+#ifdef DEBUG
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->gtCall, treeNode);
+ assert(curArgTabEntry);
+ assert(argOffsetOut == (curArgTabEntry->slotNum * TARGET_POINTER_SIZE));
+#endif // DEBUG
+
+ // Whether to setup stk arg in incoming or out-going arg area?
+ // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area.
+ // All other calls - stk arg is setup in out-going arg area.
+ if (treeNode->putInIncomingArgArea())
+ {
+ NYI_ARM("genPutArgStk: fast tail call");
+
+#ifdef _TARGET_ARM64_
+ varNumOut = getFirstArgWithStackSlot();
+ argOffsetMax = compiler->compArgSize;
+#if FEATURE_FASTTAILCALL
+ // This must be a fast tail call.
+ assert(treeNode->gtCall->IsFastTailCall());
+
+ // Since it is a fast tail call, the existence of first incoming arg is guaranteed
+ // because fast tail call requires that in-coming arg area of caller is >= out-going
+ // arg area required for tail call.
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNumOut]);
+ assert(varDsc != nullptr);
+#endif // FEATURE_FASTTAILCALL
+#endif // _TARGET_ARM64_
+ }
+ else
+ {
+ varNumOut = compiler->lvaOutgoingArgSpaceVar;
+ argOffsetMax = compiler->lvaOutgoingArgSpaceSize;
+ }
+
+ bool isStruct = (targetType == TYP_STRUCT) || (source->OperGet() == GT_FIELD_LIST);
+
+ if (!isStruct) // a normal non-Struct argument
+ {
+ instruction storeIns = ins_Store(targetType);
+ emitAttr storeAttr = emitTypeSize(targetType);
+
+ // If it is contained then source must be the integer constant zero
+ if (source->isContained())
+ {
+ assert(source->OperGet() == GT_CNS_INT);
+ assert(source->AsIntConCommon()->IconValue() == 0);
+ NYI_ARM("genPutArgStk: contained zero source");
+
+#ifdef _TARGET_ARM64_
+ emit->emitIns_S_R(storeIns, storeAttr, REG_ZR, varNumOut, argOffsetOut);
+#endif // _TARGET_ARM64_
+ }
+ else
+ {
+ genConsumeReg(source);
+ emit->emitIns_S_R(storeIns, storeAttr, source->gtRegNum, varNumOut, argOffsetOut);
+ }
+ argOffsetOut += EA_SIZE_IN_BYTES(storeAttr);
+ assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
+ }
+ else // We have some kind of a struct argument
+ {
+ assert(source->isContained()); // We expect that this node was marked as contained in Lower
+
+ if (source->OperGet() == GT_FIELD_LIST)
+ {
+ // Deal with the multi register passed struct args.
+ GenTreeFieldList* fieldListPtr = source->AsFieldList();
+
+ // Evaluate each of the GT_FIELD_LIST items into their register
+ // and store their register into the outgoing argument area
+ for (; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest())
+ {
+ GenTreePtr nextArgNode = fieldListPtr->gtOp.gtOp1;
+ genConsumeReg(nextArgNode);
+
+ regNumber reg = nextArgNode->gtRegNum;
+ var_types type = nextArgNode->TypeGet();
+ emitAttr attr = emitTypeSize(type);
+
+ // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing
+ // argument area
+ emit->emitIns_S_R(ins_Store(type), attr, reg, varNumOut, argOffsetOut);
+ argOffsetOut += EA_SIZE_IN_BYTES(attr);
+ assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
+ }
+ }
+ else // We must have a GT_OBJ or a GT_LCL_VAR
+ {
+ noway_assert((source->OperGet() == GT_LCL_VAR) || (source->OperGet() == GT_OBJ));
+
+ NYI_ARM("genPutArgStk: GT_OBJ or GT_LCL_VAR source of struct type");
+
+#ifdef _TARGET_ARM64_
+
+ var_types targetType = source->TypeGet();
+ noway_assert(varTypeIsStruct(targetType));
+
+ // We will copy this struct to the stack, possibly using a ldp instruction
+ // Setup loReg and hiReg from the internal registers that we reserved in lower.
+ //
+ regNumber loReg = REG_NA;
+ regNumber hiReg = REG_NA;
+ regNumber addrReg = REG_NA;
+
+ // In lowerArm64/TreeNodeInfoInitPutArgStk we have reserved two internal integer registers
+ genGetRegPairFromMask(treeNode->gtRsvdRegs, &loReg, &hiReg);
+
+ GenTreeLclVarCommon* varNode = nullptr;
+ GenTreePtr addrNode = nullptr;
+
+ if (source->OperGet() == GT_LCL_VAR)
+ {
+ varNode = source->AsLclVarCommon();
+ }
+ else // we must have a GT_OBJ
+ {
+ assert(source->OperGet() == GT_OBJ);
+
+ addrNode = source->gtOp.gtOp1;
+
+ // addrNode can either be a GT_LCL_VAR_ADDR or an address expression
+ //
+ if (addrNode->OperGet() == GT_LCL_VAR_ADDR)
+ {
+ // We have a GT_OBJ(GT_LCL_VAR_ADDR)
+ //
+ // We will treat this case the same as above
+ // (i.e if we just had this GT_LCL_VAR directly as the source)
+ // so update 'source' to point this GT_LCL_VAR_ADDR node
+ // and continue to the codegen for the LCL_VAR node below
+ //
+ varNode = addrNode->AsLclVarCommon();
+ addrNode = nullptr;
+ }
+ }
+
+ // Either varNode or addrNOde must have been setup above,
+ // the xor ensures that only one of the two is setup, not both
+ assert((varNode != nullptr) ^ (addrNode != nullptr));
+
+ BYTE gcPtrs[MAX_ARG_REG_COUNT] = {}; // TYPE_GC_NONE = 0
+ unsigned gcPtrCount; // The count of GC pointers in the struct
+ int structSize;
+ bool isHfa;
+
+ // This is the varNum for our load operations,
+ // only used when we have a multireg struct with a LclVar source
+ unsigned varNumInp = BAD_VAR_NUM;
+
+ // Setup the structSize, isHFa, and gcPtrCount
+ if (varNode != nullptr)
+ {
+ varNumInp = varNode->gtLclNum;
+ assert(varNumInp < compiler->lvaCount);
+ LclVarDsc* varDsc = &compiler->lvaTable[varNumInp];
+
+ assert(varDsc->lvType == TYP_STRUCT);
+ assert(varDsc->lvOnFrame); // This struct also must live in the stack frame
+ assert(!varDsc->lvRegister); // And it can't live in a register (SIMD)
+
+ structSize = varDsc->lvSize(); // This yields the roundUp size, but that is fine
+ // as that is how much stack is allocated for this LclVar
+ isHfa = varDsc->lvIsHfa();
+ gcPtrCount = varDsc->lvStructGcCount;
+ for (unsigned i = 0; i < gcPtrCount; ++i)
+ gcPtrs[i] = varDsc->lvGcLayout[i];
+ }
+ else // addrNode is used
+ {
+ assert(addrNode != nullptr);
+
+ // Generate code to load the address that we need into a register
+ genConsumeAddress(addrNode);
+ addrReg = addrNode->gtRegNum;
+
+ CORINFO_CLASS_HANDLE objClass = source->gtObj.gtClass;
+
+ structSize = compiler->info.compCompHnd->getClassSize(objClass);
+ isHfa = compiler->IsHfa(objClass);
+ gcPtrCount = compiler->info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
+ }
+
+ bool hasGCpointers = (gcPtrCount > 0); // true if there are any GC pointers in the struct
+
+ // If we have an HFA we can't have any GC pointers,
+ // if not then the max size for the the struct is 16 bytes
+ if (isHfa)
+ {
+ noway_assert(gcPtrCount == 0);
+ }
+ else
+ {
+ noway_assert(structSize <= 2 * TARGET_POINTER_SIZE);
+ }
+
+ noway_assert(structSize <= MAX_PASS_MULTIREG_BYTES);
+
+ // For a 16-byte structSize with GC pointers we will use two ldr and two str instructions
+ // ldr x2, [x0]
+ // ldr x3, [x0, #8]
+ // str x2, [sp, #16]
+ // str x3, [sp, #24]
+ //
+ // For a 16-byte structSize with no GC pointers we will use a ldp and two str instructions
+ // ldp x2, x3, [x0]
+ // str x2, [sp, #16]
+ // str x3, [sp, #24]
+ //
+ // For a 32-byte structSize with no GC pointers we will use two ldp and four str instructions
+ // ldp x2, x3, [x0]
+ // str x2, [sp, #16]
+ // str x3, [sp, #24]
+ // ldp x2, x3, [x0]
+ // str x2, [sp, #32]
+ // str x3, [sp, #40]
+ //
+ // Note that when loading from a varNode we currently can't use the ldp instruction
+ // TODO-ARM64-CQ: Implement support for using a ldp instruction with a varNum (see emitIns_R_S)
+ //
+
+ int remainingSize = structSize;
+ unsigned structOffset = 0;
+ unsigned nextIndex = 0;
+
+ while (remainingSize >= 2 * TARGET_POINTER_SIZE)
+ {
+ var_types type0 = compiler->getJitGCType(gcPtrs[nextIndex + 0]);
+ var_types type1 = compiler->getJitGCType(gcPtrs[nextIndex + 1]);
+
+ if (hasGCpointers)
+ {
+ // We have GC pointers, so use two ldr instructions
+ //
+ // We must do it this way because we can't currently pass or track
+ // two different emitAttr values for a ldp instruction.
+
+ // Make sure that the first load instruction does not overwrite the addrReg.
+ //
+ if (loReg != addrReg)
+ {
+ if (varNode != nullptr)
+ {
+ // Load from our varNumImp source
+ emit->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), loReg, varNumInp, 0);
+ emit->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), hiReg, varNumInp,
+ TARGET_POINTER_SIZE);
+ }
+ else
+ {
+ // Load from our address expression source
+ emit->emitIns_R_R_I(ins_Load(type0), emitTypeSize(type0), loReg, addrReg, structOffset);
+ emit->emitIns_R_R_I(ins_Load(type1), emitTypeSize(type1), hiReg, addrReg,
+ structOffset + TARGET_POINTER_SIZE);
+ }
+ }
+ else // loReg == addrReg
+ {
+ assert(varNode == nullptr); // because addrReg is REG_NA when varNode is non-null
+ assert(hiReg != addrReg);
+ // Load from our address expression source
+ emit->emitIns_R_R_I(ins_Load(type1), emitTypeSize(type1), hiReg, addrReg,
+ structOffset + TARGET_POINTER_SIZE);
+ emit->emitIns_R_R_I(ins_Load(type0), emitTypeSize(type0), loReg, addrReg, structOffset);
+ }
+ }
+ else // our struct has no GC pointers
+ {
+ if (varNode != nullptr)
+ {
+ // Load from our varNumImp source, currently we can't use a ldp instruction to do this
+ emit->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), loReg, varNumInp, 0);
+ emit->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), hiReg, varNumInp, TARGET_POINTER_SIZE);
+ }
+ else
+ {
+ // Use a ldp instruction
+
+ // Load from our address expression source
+ emit->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, loReg, hiReg, addrReg, structOffset);
+ }
+ }
+
+ // Emit two store instructions to store the two registers into the outgoing argument area
+ emit->emitIns_S_R(ins_Store(type0), emitTypeSize(type0), loReg, varNumOut, argOffsetOut);
+ emit->emitIns_S_R(ins_Store(type1), emitTypeSize(type1), hiReg, varNumOut,
+ argOffsetOut + TARGET_POINTER_SIZE);
+ argOffsetOut += (2 * TARGET_POINTER_SIZE); // We stored 16-bytes of the struct
+ assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
+
+ remainingSize -= (2 * TARGET_POINTER_SIZE); // We loaded 16-bytes of the struct
+ structOffset += (2 * TARGET_POINTER_SIZE);
+ nextIndex += 2;
+ }
+
+ // For a 12-byte structSize we will we will generate two load instructions
+ // ldr x2, [x0]
+ // ldr w3, [x0, #8]
+ // str x2, [sp, #16]
+ // str w3, [sp, #24]
+ //
+ // When the first instruction has a loReg that is the same register as the addrReg,
+ // we set deferLoad to true and issue the intructions in the reverse order
+ // ldr x3, [x2, #8]
+ // ldr x2, [x2]
+ // str x2, [sp, #16]
+ // str x3, [sp, #24]
+ //
+
+ var_types nextType = compiler->getJitGCType(gcPtrs[nextIndex]);
+ emitAttr nextAttr = emitTypeSize(nextType);
+ regNumber curReg = loReg;
+
+ bool deferLoad = false;
+ var_types deferType = TYP_UNKNOWN;
+ emitAttr deferAttr = EA_PTRSIZE;
+ int deferOffset = 0;
+
+ while (remainingSize > 0)
+ {
+ if (remainingSize >= TARGET_POINTER_SIZE)
+ {
+ remainingSize -= TARGET_POINTER_SIZE;
+
+ if ((curReg == addrReg) && (remainingSize != 0))
+ {
+ deferLoad = true;
+ deferType = nextType;
+ deferAttr = emitTypeSize(nextType);
+ deferOffset = structOffset;
+ }
+ else // the typical case
+ {
+ if (varNode != nullptr)
+ {
+ // Load from our varNumImp source
+ emit->emitIns_R_S(ins_Load(nextType), nextAttr, curReg, varNumInp, structOffset);
+ }
+ else
+ {
+ // Load from our address expression source
+ emit->emitIns_R_R_I(ins_Load(nextType), nextAttr, curReg, addrReg, structOffset);
+ }
+ // Emit a store instruction to store the register into the outgoing argument area
+ emit->emitIns_S_R(ins_Store(nextType), nextAttr, curReg, varNumOut, argOffsetOut);
+ argOffsetOut += EA_SIZE_IN_BYTES(nextAttr);
+ assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
+ }
+ curReg = hiReg;
+ structOffset += TARGET_POINTER_SIZE;
+ nextIndex++;
+ nextType = compiler->getJitGCType(gcPtrs[nextIndex]);
+ nextAttr = emitTypeSize(nextType);
+ }
+ else // (remainingSize < TARGET_POINTER_SIZE)
+ {
+ int loadSize = remainingSize;
+ remainingSize = 0;
+
+ // We should never have to do a non-pointer sized load when we have a LclVar source
+ assert(varNode == nullptr);
+
+ // the left over size is smaller than a pointer and thus can never be a GC type
+ assert(varTypeIsGC(nextType) == false);
+
+ var_types loadType = TYP_UINT;
+ if (loadSize == 1)
+ {
+ loadType = TYP_UBYTE;
+ }
+ else if (loadSize == 2)
+ {
+ loadType = TYP_USHORT;
+ }
+ else
+ {
+ // Need to handle additional loadSize cases here
+ noway_assert(loadSize == 4);
+ }
+
+ instruction loadIns = ins_Load(loadType);
+ emitAttr loadAttr = emitAttr(loadSize);
+
+ // When deferLoad is false, curReg can be the same as addrReg
+ // because the last instruction is allowed to overwrite addrReg.
+ //
+ noway_assert(!deferLoad || (curReg != addrReg));
+
+ emit->emitIns_R_R_I(loadIns, loadAttr, curReg, addrReg, structOffset);
+
+ // Emit a store instruction to store the register into the outgoing argument area
+ emit->emitIns_S_R(ins_Store(loadType), loadAttr, curReg, varNumOut, argOffsetOut);
+ argOffsetOut += EA_SIZE_IN_BYTES(loadAttr);
+ assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
+ }
+ }
+
+ if (deferLoad)
+ {
+ // We should never have to do a deferred load when we have a LclVar source
+ assert(varNode == nullptr);
+
+ curReg = addrReg;
+
+ // Load from our address expression source
+ emit->emitIns_R_R_I(ins_Load(deferType), deferAttr, curReg, addrReg, deferOffset);
+
+ // Emit a store instruction to store the register into the outgoing argument area
+ emit->emitIns_S_R(ins_Store(nextType), nextAttr, curReg, varNumOut, argOffsetOut);
+ argOffsetOut += EA_SIZE_IN_BYTES(nextAttr);
+ assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
+ }
+
+#endif // _TARGET_ARM64_
+ }
+ }
+}
+
+//----------------------------------------------------------------------------------
+// genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local
+//
+// Arguments:
+// treeNode - Gentree of GT_STORE_LCL_VAR
+//
+// Return Value:
+// None
+//
+// Assumption:
+// The child of store is a multi-reg call node.
+// genProduceReg() on treeNode is made by caller of this routine.
+//
+void CodeGen::genMultiRegCallStoreToLocal(GenTreePtr treeNode)
+{
+ assert(treeNode->OperGet() == GT_STORE_LCL_VAR);
+
+#if defined(_TARGET_ARM_)
+ // Longs are returned in two return registers on Arm32.
+ assert(varTypeIsLong(treeNode));
+#elif defined(_TARGET_ARM64_)
+ // Structs of size >=9 and <=16 are returned in two return registers on ARM64 and HFAs.
+ assert(varTypeIsStruct(treeNode));
+#endif // _TARGET_*
+
+ // Assumption: current implementation requires that a multi-reg
+ // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
+ // being promoted.
+ unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
+ noway_assert(varDsc->lvIsMultiRegRet);
+
+ GenTree* op1 = treeNode->gtGetOp1();
+ GenTree* actualOp1 = op1->gtSkipReloadOrCopy();
+ GenTreeCall* call = actualOp1->AsCall();
+ assert(call->HasMultiRegRetVal());
+
+ genConsumeRegs(op1);
+
+ ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc();
+ unsigned regCount = pRetTypeDesc->GetReturnRegCount();
+
+ if (treeNode->gtRegNum != REG_NA)
+ {
+ // Right now the only enregistrable multi-reg return types supported are SIMD types.
+ assert(varTypeIsSIMD(treeNode));
+ NYI("GT_STORE_LCL_VAR of a SIMD enregisterable struct");
+ }
+ else
+ {
+ // Stack store
+ int offset = 0;
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ var_types type = pRetTypeDesc->GetReturnRegType(i);
+ regNumber reg = call->GetRegNumByIdx(i);
+ if (op1->IsCopyOrReload())
+ {
+ // GT_COPY/GT_RELOAD will have valid reg for those positions
+ // that need to be copied or reloaded.
+ regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
+ if (reloadReg != REG_NA)
+ {
+ reg = reloadReg;
+ }
+ }
+
+ assert(reg != REG_NA);
+ getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
+ offset += genTypeSize(type);
+ }
+
+ varDsc->lvRegNum = REG_STK;
+ }
+}
+
+//------------------------------------------------------------------------
+// genRangeCheck: generate code for GT_ARR_BOUNDS_CHECK node.
+//
+void CodeGen::genRangeCheck(GenTreePtr oper)
+{
+#ifdef FEATURE_SIMD
+ noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK || oper->OperGet() == GT_SIMD_CHK);
+#else // !FEATURE_SIMD
+ noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
+#endif // !FEATURE_SIMD
+
+ GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
+
+ GenTreePtr arrLen = bndsChk->gtArrLen;
+ GenTreePtr arrIndex = bndsChk->gtIndex;
+ GenTreePtr arrRef = NULL;
+ int lenOffset = 0;
+
+ GenTree* src1;
+ GenTree* src2;
+ emitJumpKind jmpKind;
+
+ genConsumeRegs(arrIndex);
+ genConsumeRegs(arrLen);
+
+ if (arrIndex->isContainedIntOrIImmed())
+ {
+ // To encode using a cmp immediate, we place the
+ // constant operand in the second position
+ src1 = arrLen;
+ src2 = arrIndex;
+ jmpKind = genJumpKindForOper(GT_LE, CK_UNSIGNED);
+ }
+ else
+ {
+ src1 = arrIndex;
+ src2 = arrLen;
+ jmpKind = genJumpKindForOper(GT_GE, CK_UNSIGNED);
+ }
+
+ getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, src1, src2);
+ genJumpToThrowHlpBlk(jmpKind, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
+}
+
+//------------------------------------------------------------------------
+// genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the
+// lower bound for the given dimension.
+//
+// Arguments:
+// elemType - the element type of the array
+// rank - the rank of the array
+// dimension - the dimension for which the lower bound offset will be returned.
+//
+// Return Value:
+// The offset.
+// TODO-Cleanup: move to CodeGenCommon.cpp
+
+// static
+unsigned CodeGen::genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension)
+{
+ // Note that the lower bound and length fields of the Array object are always TYP_INT
+ return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * (dimension + rank);
+}
+
+//------------------------------------------------------------------------
+// genOffsetOfMDArrayLength: Returns the offset from the Array object to the
+// size for the given dimension.
+//
+// Arguments:
+// elemType - the element type of the array
+// rank - the rank of the array
+// dimension - the dimension for which the lower bound offset will be returned.
+//
+// Return Value:
+// The offset.
+// TODO-Cleanup: move to CodeGenCommon.cpp
+
+// static
+unsigned CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension)
+{
+ // Note that the lower bound and length fields of the Array object are always TYP_INT
+ return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * dimension;
+}
+
+//------------------------------------------------------------------------
+// genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference,
+// producing the effective index by subtracting the lower bound.
+//
+// Arguments:
+// arrIndex - the node for which we're generating code
+//
+// Return Value:
+// None.
+//
+void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex)
+{
+ emitter* emit = getEmitter();
+ GenTreePtr arrObj = arrIndex->ArrObj();
+ GenTreePtr indexNode = arrIndex->IndexExpr();
+ regNumber arrReg = genConsumeReg(arrObj);
+ regNumber indexReg = genConsumeReg(indexNode);
+ regNumber tgtReg = arrIndex->gtRegNum;
+ noway_assert(tgtReg != REG_NA);
+
+ // We will use a temp register to load the lower bound and dimension size values
+ //
+ regMaskTP tmpRegsMask = arrIndex->gtRsvdRegs; // there will be two bits set
+ tmpRegsMask &= ~genRegMask(tgtReg); // remove the bit for 'tgtReg' from 'tmpRegsMask'
+
+ regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask
+ regNumber tmpReg = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask
+ noway_assert(tmpReg != REG_NA);
+
+ assert(tgtReg != tmpReg);
+
+ unsigned dim = arrIndex->gtCurrDim;
+ unsigned rank = arrIndex->gtArrRank;
+ var_types elemType = arrIndex->gtArrElemType;
+ unsigned offset;
+
+ offset = genOffsetOfMDArrayLowerBound(elemType, rank, dim);
+ emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_PTRSIZE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load
+ emit->emitIns_R_R_R(INS_sub, EA_4BYTE, tgtReg, indexReg, tmpReg);
+
+ offset = genOffsetOfMDArrayDimensionSize(elemType, rank, dim);
+ emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_PTRSIZE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load
+ emit->emitIns_R_R(INS_cmp, EA_4BYTE, tgtReg, tmpReg);
+
+ emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
+ genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL);
+
+ genProduceReg(arrIndex);
+}
+
+//------------------------------------------------------------------------
+// genCodeForArrOffset: Generates code to compute the flattened array offset for
+// one dimension of an array reference:
+// result = (prevDimOffset * dimSize) + effectiveIndex
+// where dimSize is obtained from the arrObj operand
+//
+// Arguments:
+// arrOffset - the node for which we're generating code
+//
+// Return Value:
+// None.
+//
+// Notes:
+// dimSize and effectiveIndex are always non-negative, the former by design,
+// and the latter because it has been normalized to be zero-based.
+
+void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
+{
+ GenTreePtr offsetNode = arrOffset->gtOffset;
+ GenTreePtr indexNode = arrOffset->gtIndex;
+ regNumber tgtReg = arrOffset->gtRegNum;
+
+ noway_assert(tgtReg != REG_NA);
+
+ if (!offsetNode->IsIntegralConst(0))
+ {
+ emitter* emit = getEmitter();
+ regNumber offsetReg = genConsumeReg(offsetNode);
+ regNumber indexReg = genConsumeReg(indexNode);
+ regNumber arrReg = genConsumeReg(arrOffset->gtArrObj);
+ noway_assert(offsetReg != REG_NA);
+ noway_assert(indexReg != REG_NA);
+ noway_assert(arrReg != REG_NA);
+
+ regMaskTP tmpRegMask = arrOffset->gtRsvdRegs;
+ regNumber tmpReg = genRegNumFromMask(tmpRegMask);
+ noway_assert(tmpReg != REG_NA);
+
+ unsigned dim = arrOffset->gtCurrDim;
+ unsigned rank = arrOffset->gtArrRank;
+ var_types elemType = arrOffset->gtArrElemType;
+ unsigned offset = genOffsetOfMDArrayDimensionSize(elemType, rank, dim);
+
+// Load tmpReg with the dimension size and evaluate
+// tgtReg = offsetReg*dim_size + indexReg.
+#if defined(_TARGET_ARM_)
+ emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load
+ emit->emitIns_R_R_R(INS_MUL, EA_4BYTE, tgtReg, tmpReg, offsetReg);
+ emit->emitIns_R_R_R(INS_add, EA_4BYTE, tgtReg, tgtReg, indexReg);
+#elif defined(_TARGET_ARM64_)
+ emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_8BYTE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load
+ emit->emitIns_R_R_R_R(INS_madd, EA_4BYTE, tgtReg, tmpReg, offsetReg, indexReg);
+#endif // _TARGET_*
+ }
+ else
+ {
+ regNumber indexReg = genConsumeReg(indexNode);
+ if (indexReg != tgtReg)
+ {
+ inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT);
+ }
+ }
+ genProduceReg(arrOffset);
+}
+
+//------------------------------------------------------------------------
+// indirForm: Make a temporary indir we can feed to pattern matching routines
+// in cases where we don't want to instantiate all the indirs that happen.
+//
+GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
+{
+ GenTreeIndir i(GT_IND, type, base, nullptr);
+ i.gtRegNum = REG_NA;
+ // has to be nonnull (because contained nodes can't be the last in block)
+ // but don't want it to be a valid pointer
+ i.gtNext = (GenTree*)(-1);
+ return i;
+}
+
+//------------------------------------------------------------------------
+// intForm: Make a temporary int we can feed to pattern matching routines
+// in cases where we don't want to instantiate.
+//
+GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
+{
+ GenTreeIntCon i(type, value);
+ i.gtRegNum = REG_NA;
+ // has to be nonnull (because contained nodes can't be the last in block)
+ // but don't want it to be a valid pointer
+ i.gtNext = (GenTree*)(-1);
+ return i;
+}
+
+//------------------------------------------------------------------------
+// genCodeForShift: Generates the code sequence for a GenTree node that
+// represents a bit shift or rotate operation (<<, >>, >>>, rol, ror).
+//
+// Arguments:
+// tree - the bit shift node (that specifies the type of bit shift to perform).
+//
+// Assumptions:
+// a) All GenTrees are register allocated.
+//
+void CodeGen::genCodeForShift(GenTreePtr tree)
+{
+ var_types targetType = tree->TypeGet();
+ genTreeOps oper = tree->OperGet();
+ instruction ins = genGetInsForOper(oper, targetType);
+ emitAttr size = emitTypeSize(tree);
+
+ assert(tree->gtRegNum != REG_NA);
+
+ genConsumeOperands(tree->AsOp());
+
+ GenTreePtr operand = tree->gtGetOp1();
+ GenTreePtr shiftBy = tree->gtGetOp2();
+ if (!shiftBy->IsCnsIntOrI())
+ {
+ getEmitter()->emitIns_R_R_R(ins, size, tree->gtRegNum, operand->gtRegNum, shiftBy->gtRegNum);
+ }
+ else
+ {
+ unsigned immWidth = emitter::getBitWidth(size); // For ARM64, immWidth will be set to 32 or 64
+ ssize_t shiftByImm = shiftBy->gtIntCon.gtIconVal & (immWidth - 1);
+
+ getEmitter()->emitIns_R_R_I(ins, size, tree->gtRegNum, operand->gtRegNum, shiftByImm);
+ }
+
+ genProduceReg(tree);
+}
+
+// Generate code for a CpBlk node by the means of the VM memcpy helper call
+// Preconditions:
+// a) The size argument of the CpBlk is not an integer constant
+// b) The size argument is a constant but is larger than CPBLK_MOVS_LIMIT bytes.
+void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode)
+{
+ // Make sure we got the arguments of the cpblk operation in the right registers
+ unsigned blockSize = cpBlkNode->Size();
+ GenTreePtr dstAddr = cpBlkNode->Addr();
+ assert(!dstAddr->isContained());
+
+ genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
+
+#ifdef _TARGET_ARM64_
+ if (blockSize != 0)
+ {
+ assert(blockSize > CPBLK_UNROLL_LIMIT);
+ }
+#endif // _TARGET_ARM64_
+
+ genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
+}
+
+// Generates code for InitBlk by calling the VM memset helper function.
+// Preconditions:
+// a) The size argument of the InitBlk is not an integer constant.
+// b) The size argument of the InitBlk is >= INITBLK_STOS_LIMIT bytes.
+void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode)
+{
+ // Make sure we got the arguments of the initblk operation in the right registers
+ unsigned size = initBlkNode->Size();
+ GenTreePtr dstAddr = initBlkNode->Addr();
+ GenTreePtr initVal = initBlkNode->Data();
+ if (initVal->OperIsInitVal())
+ {
+ initVal = initVal->gtGetOp1();
+ }
+
+ assert(!dstAddr->isContained());
+ assert(!initVal->isContained());
+ if (initBlkNode->gtOper == GT_STORE_DYN_BLK)
+ {
+ assert(initBlkNode->AsDynBlk()->gtDynamicSize->gtRegNum == REG_ARG_2);
+ }
+ else
+ {
+ assert(initBlkNode->gtRsvdRegs == RBM_ARG_2);
+ }
+
+#ifdef _TARGET_ARM64_
+ if (size != 0)
+ {
+ assert(size > INITBLK_UNROLL_LIMIT);
+ }
+#endif // _TARGET_ARM64_
+
+ genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
+ genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
+}
+
+//------------------------------------------------------------------------
+// genRegCopy: Generate a register copy.
+//
+void CodeGen::genRegCopy(GenTree* treeNode)
+{
+ assert(treeNode->OperGet() == GT_COPY);
+
+ var_types targetType = treeNode->TypeGet();
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(targetReg != REG_NA);
+
+ GenTree* op1 = treeNode->gtOp.gtOp1;
+
+ // Check whether this node and the node from which we're copying the value have the same
+ // register type.
+ // This can happen if (currently iff) we have a SIMD vector type that fits in an integer
+ // register, in which case it is passed as an argument, or returned from a call,
+ // in an integer register and must be copied if it's in an xmm register.
+
+ if (varTypeIsFloating(treeNode) != varTypeIsFloating(op1))
+ {
+ NYI_ARM("genRegCopy floating point");
+#ifdef _TARGET_ARM64_
+ inst_RV_RV(INS_fmov, targetReg, genConsumeReg(op1), targetType);
+#endif // _TARGET_ARM64_
+ }
+ else
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType);
+ }
+
+ if (op1->IsLocal())
+ {
+ // The lclVar will never be a def.
+ // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will
+ // appropriately set the gcInfo for the copied value.
+ // If not, there are two cases we need to handle:
+ // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable
+ // will remain live in its original register.
+ // genProduceReg() will appropriately set the gcInfo for the copied value,
+ // and genConsumeReg will reset it.
+ // - Otherwise, we need to update register info for the lclVar.
+
+ GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
+ assert((lcl->gtFlags & GTF_VAR_DEF) == 0);
+
+ if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0)
+ {
+ LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
+
+ // If we didn't just spill it (in genConsumeReg, above), then update the register info
+ if (varDsc->lvRegNum != REG_STK)
+ {
+ // The old location is dying
+ genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1));
+
+ gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum));
+
+ genUpdateVarReg(varDsc, treeNode);
+
+ // The new location is going live
+ genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
+ }
+ }
+ }
+
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genCallInstruction: Produce code for a GT_CALL node
+//
+void CodeGen::genCallInstruction(GenTreeCall* call)
+{
+ gtCallTypes callType = (gtCallTypes)call->gtCallType;
+
+ IL_OFFSETX ilOffset = BAD_IL_OFFSET;
+
+ // all virtuals should have been expanded into a control expression
+ assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
+
+ // Consume all the arg regs
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->OperIsList());
+
+ GenTreePtr argNode = list->Current();
+
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy());
+ assert(curArgTabEntry);
+
+ if (curArgTabEntry->regNum == REG_STK)
+ continue;
+
+ // Deal with multi register passed struct args.
+ if (argNode->OperGet() == GT_FIELD_LIST)
+ {
+ GenTreeArgList* argListPtr = argNode->AsArgList();
+ unsigned iterationNum = 0;
+ regNumber argReg = curArgTabEntry->regNum;
+ for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++)
+ {
+ GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
+ assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+
+ genConsumeReg(putArgRegNode);
+
+ if (putArgRegNode->gtRegNum != argReg)
+ {
+ inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg,
+ putArgRegNode->gtRegNum);
+ }
+
+ argReg = genRegArgNext(argReg);
+ }
+ }
+ else
+ {
+ regNumber argReg = curArgTabEntry->regNum;
+ genConsumeReg(argNode);
+ if (argNode->gtRegNum != argReg)
+ {
+ inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum);
+ }
+ }
+
+ // In the case of a varargs call,
+ // the ABI dictates that if we have floating point args,
+ // we must pass the enregistered arguments in both the
+ // integer and floating point registers so, let's do that.
+ if (call->IsVarargs() && varTypeIsFloating(argNode))
+ {
+ NYI_ARM("CodeGen - IsVarargs");
+ NYI_ARM64("CodeGen - IsVarargs");
+ }
+ }
+
+ // Insert a null check on "this" pointer if asked.
+ if (call->NeedsNullCheck())
+ {
+ const regNumber regThis = genGetThisArgReg(call);
+
+#if defined(_TARGET_ARM_)
+ regMaskTP tempMask = genFindLowestBit(call->gtRsvdRegs);
+ const regNumber tmpReg = genRegNumFromMask(tempMask);
+ if (genCountBits(call->gtRsvdRegs) > 1)
+ {
+ call->gtRsvdRegs &= ~tempMask;
+ }
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, regThis, 0);
+#elif defined(_TARGET_ARM64_)
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, regThis, 0);
+#endif // _TARGET_*
+ }
+
+ // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method.
+ CORINFO_METHOD_HANDLE methHnd;
+ GenTree* target = call->gtControlExpr;
+ if (callType == CT_INDIRECT)
+ {
+ assert(target == nullptr);
+ target = call->gtCallAddr;
+ methHnd = nullptr;
+ }
+ else
+ {
+ methHnd = call->gtCallMethHnd;
+ }
+
+ CORINFO_SIG_INFO* sigInfo = nullptr;
+#ifdef DEBUG
+ // Pass the call signature information down into the emitter so the emitter can associate
+ // native call sites with the signatures they were generated from.
+ if (callType != CT_HELPER)
+ {
+ sigInfo = call->callSig;
+ }
+#endif // DEBUG
+
+ // If fast tail call, then we are done. In this case we setup the args (both reg args
+ // and stack args in incoming arg area) and call target. Epilog sequence would
+ // generate "br <reg>".
+ if (call->IsFastTailCall())
+ {
+ // Don't support fast tail calling JIT helpers
+ assert(callType != CT_HELPER);
+
+ // Fast tail calls materialize call target either in gtControlExpr or in gtCallAddr.
+ assert(target != nullptr);
+
+ genConsumeReg(target);
+
+ NYI_ARM("fast tail call");
+
+#ifdef _TARGET_ARM64_
+ // Use IP0 as the call target register.
+ if (target->gtRegNum != REG_IP0)
+ {
+ inst_RV_RV(INS_mov, REG_IP0, target->gtRegNum);
+ }
+#endif // _TARGET_ARM64_
+
+ return;
+ }
+
+ // For a pinvoke to unmanaged code we emit a label to clear
+ // the GC pointer state before the callsite.
+ // We can't utilize the typical lazy killing of GC pointers
+ // at (or inside) the callsite.
+ if (call->IsUnmanaged())
+ {
+ genDefineTempLabel(genCreateTempLabel());
+ }
+
+ // Determine return value size(s).
+ ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc();
+ emitAttr retSize = EA_PTRSIZE;
+ emitAttr secondRetSize = EA_UNKNOWN;
+
+ if (call->HasMultiRegRetVal())
+ {
+ retSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(0));
+ secondRetSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(1));
+ }
+ else
+ {
+ assert(!varTypeIsStruct(call));
+
+ if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY)
+ {
+ retSize = EA_GCREF;
+ }
+ else if (call->gtType == TYP_BYREF)
+ {
+ retSize = EA_BYREF;
+ }
+ }
+
+ // We need to propagate the IL offset information to the call instruction, so we can emit
+ // an IL to native mapping record for the call, to support managed return value debugging.
+ // We don't want tail call helper calls that were converted from normal calls to get a record,
+ // so we skip this hash table lookup logic in that case.
+ if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall())
+ {
+ (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
+ }
+
+ if (target != nullptr)
+ {
+ // A call target can not be a contained indirection
+ assert(!target->isContainedIndir());
+
+ genConsumeReg(target);
+
+ // We have already generated code for gtControlExpr evaluating it into a register.
+ // We just need to emit "call reg" in this case.
+ //
+ assert(genIsValidIntReg(target->gtRegNum));
+
+ genEmitCall(emitter::EC_INDIR_R, methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr
+ retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset, target->gtRegNum);
+ }
+ else
+ {
+ // Generate a direct call to a non-virtual user defined or helper method
+ assert(callType == CT_HELPER || callType == CT_USER_FUNC);
+
+ void* addr = nullptr;
+ if (callType == CT_HELPER)
+ {
+ // Direct call to a helper method.
+ CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd);
+ noway_assert(helperNum != CORINFO_HELP_UNDEF);
+
+ void* pAddr = nullptr;
+ addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
+
+ if (addr == nullptr)
+ {
+ addr = pAddr;
+ }
+ }
+ else
+ {
+ // Direct call to a non-virtual user function.
+ CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
+ if (call->IsSameThis())
+ {
+ aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
+ }
+
+ if ((call->NeedsNullCheck()) == 0)
+ {
+ aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
+ }
+
+ CORINFO_CONST_LOOKUP addrInfo;
+ compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags);
+
+ addr = addrInfo.addr;
+ }
+
+ assert(addr != nullptr);
+
+// Non-virtual direct call to known addresses
+#ifdef _TARGET_ARM_
+ if (!arm_Valid_Imm_For_BL((ssize_t)addr))
+ {
+ regNumber tmpReg = genRegNumFromMask(call->gtRsvdRegs);
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, tmpReg, (ssize_t)addr);
+ genEmitCall(emitter::EC_INDIR_R, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) NULL, retSize, ilOffset, tmpReg);
+ }
+ else
+#endif // _TARGET_ARM_
+ {
+ genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr,
+ retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
+ }
+
+#if 0 && defined(_TARGET_ARM64_)
+ // Use this path if you want to load an absolute call target using
+ // a sequence of movs followed by an indirect call (blr instruction)
+
+ // Load the call target address in x16
+ instGen_Set_Reg_To_Imm(EA_8BYTE, REG_IP0, (ssize_t) addr);
+
+ // indirect call to constant address in IP0
+ genEmitCall(emitter::EC_INDIR_R,
+ methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo)
+ nullptr, //addr
+ retSize,
+ secondRetSize,
+ ilOffset,
+ REG_IP0);
+#endif
+ }
+
+ // if it was a pinvoke we may have needed to get the address of a label
+ if (genPendingCallLabel)
+ {
+ assert(call->IsUnmanaged());
+ genDefineTempLabel(genPendingCallLabel);
+ genPendingCallLabel = nullptr;
+ }
+
+ // Update GC info:
+ // All Callee arg registers are trashed and no longer contain any GC pointers.
+ // TODO-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here?
+ // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other
+ // registers from RBM_CALLEE_TRASH
+ assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
+ assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
+ gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS;
+ gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS;
+
+ var_types returnType = call->TypeGet();
+ if (returnType != TYP_VOID)
+ {
+ regNumber returnReg;
+
+ if (call->HasMultiRegRetVal())
+ {
+ assert(pRetTypeDesc != nullptr);
+ unsigned regCount = pRetTypeDesc->GetReturnRegCount();
+
+ // If regs allocated to call node are different from ABI return
+ // regs in which the call has returned its result, move the result
+ // to regs allocated to call node.
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ var_types regType = pRetTypeDesc->GetReturnRegType(i);
+ returnReg = pRetTypeDesc->GetABIReturnReg(i);
+ regNumber allocatedReg = call->GetRegNumByIdx(i);
+ if (returnReg != allocatedReg)
+ {
+ inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType);
+ }
+ }
+ }
+ else
+ {
+#ifdef _TARGET_ARM_
+ if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
+ {
+ // The CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
+ // TCB in REG_PINVOKE_TCB. fgMorphCall() sets the correct argument registers.
+ returnReg = REG_PINVOKE_TCB;
+ }
+ else
+#endif // _TARGET_ARM_
+ if (varTypeIsFloating(returnType))
+ {
+ returnReg = REG_FLOATRET;
+ }
+ else
+ {
+ returnReg = REG_INTRET;
+ }
+
+ if (call->gtRegNum != returnReg)
+ {
+ inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType);
+ }
+ }
+
+ genProduceReg(call);
+ }
+
+ // If there is nothing next, that means the result is thrown away, so this value is not live.
+ // However, for minopts or debuggable code, we keep it live to support managed return value debugging.
+ if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode)
+ {
+ gcInfo.gcMarkRegSetNpt(RBM_INTRET);
+ }
+}
+
+//------------------------------------------------------------------------
+// genIntToIntCast: Generate code for an integer cast
+//
+// Arguments:
+// treeNode - The GT_CAST node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// The treeNode must have an assigned register.
+// For a signed convert from byte, the source must be in a byte-addressable register.
+// Neither the source nor target type can be a floating point type.
+//
+// TODO-ARM64-CQ: Allow castOp to be a contained node without an assigned register.
+//
+void CodeGen::genIntToIntCast(GenTreePtr treeNode)
+{
+ assert(treeNode->OperGet() == GT_CAST);
+
+ GenTreePtr castOp = treeNode->gtCast.CastOp();
+ emitter* emit = getEmitter();
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = genActualType(castOp->TypeGet());
+ emitAttr movSize = emitActualTypeSize(dstType);
+ bool movRequired = false;
+
+#ifdef _TARGET_ARM_
+ if (varTypeIsLong(srcType))
+ {
+ genLongToIntCast(treeNode);
+ return;
+ }
+#endif // _TARGET_ARM_
+
+ regNumber targetReg = treeNode->gtRegNum;
+ regNumber sourceReg = castOp->gtRegNum;
+
+ // For Long to Int conversion we will have a reserved integer register to hold the immediate mask
+ regNumber tmpReg = (treeNode->gtRsvdRegs == RBM_NONE) ? REG_NA : genRegNumFromMask(treeNode->gtRsvdRegs);
+
+ assert(genIsValidIntReg(targetReg));
+ assert(genIsValidIntReg(sourceReg));
+
+ instruction ins = INS_invalid;
+
+ genConsumeReg(castOp);
+ Lowering::CastInfo castInfo;
+
+ // Get information about the cast.
+ Lowering::getCastDescription(treeNode, &castInfo);
+
+ if (castInfo.requiresOverflowCheck)
+ {
+ emitAttr cmpSize = EA_ATTR(genTypeSize(srcType));
+
+ if (castInfo.signCheckOnly)
+ {
+ // We only need to check for a negative value in sourceReg
+ emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, 0);
+ emitJumpKind jmpLT = genJumpKindForOper(GT_LT, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpLT, SCK_OVERFLOW);
+ noway_assert(genTypeSize(srcType) == 4 || genTypeSize(srcType) == 8);
+ // This is only interesting case to ensure zero-upper bits.
+ if ((srcType == TYP_INT) && (dstType == TYP_ULONG))
+ {
+ // cast to TYP_ULONG:
+ // We use a mov with size=EA_4BYTE
+ // which will zero out the upper bits
+ movSize = EA_4BYTE;
+ movRequired = true;
+ }
+ }
+ else if (castInfo.unsignedSource || castInfo.unsignedDest)
+ {
+ // When we are converting from/to unsigned,
+ // we only have to check for any bits set in 'typeMask'
+
+ noway_assert(castInfo.typeMask != 0);
+ emit->emitIns_R_I(INS_tst, cmpSize, sourceReg, castInfo.typeMask);
+ emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
+ }
+ else
+ {
+ // For a narrowing signed cast
+ //
+ // We must check the value is in a signed range.
+
+ // Compare with the MAX
+
+ noway_assert((castInfo.typeMin != 0) && (castInfo.typeMax != 0));
+
+#if defined(_TARGET_ARM_)
+ if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMax, INS_FLAGS_DONT_CARE))
+#elif defined(_TARGET_ARM64_)
+ if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMax, cmpSize))
+#endif // _TARGET_*
+ {
+ emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, castInfo.typeMax);
+ }
+ else
+ {
+ noway_assert(tmpReg != REG_NA);
+ instGen_Set_Reg_To_Imm(cmpSize, tmpReg, castInfo.typeMax);
+ emit->emitIns_R_R(INS_cmp, cmpSize, sourceReg, tmpReg);
+ }
+
+ emitJumpKind jmpGT = genJumpKindForOper(GT_GT, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpGT, SCK_OVERFLOW);
+
+// Compare with the MIN
+
+#if defined(_TARGET_ARM_)
+ if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMin, INS_FLAGS_DONT_CARE))
+#elif defined(_TARGET_ARM64_)
+ if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMin, cmpSize))
+#endif // _TARGET_*
+ {
+ emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, castInfo.typeMin);
+ }
+ else
+ {
+ noway_assert(tmpReg != REG_NA);
+ instGen_Set_Reg_To_Imm(cmpSize, tmpReg, castInfo.typeMin);
+ emit->emitIns_R_R(INS_cmp, cmpSize, sourceReg, tmpReg);
+ }
+
+ emitJumpKind jmpLT = genJumpKindForOper(GT_LT, CK_SIGNED);
+ genJumpToThrowHlpBlk(jmpLT, SCK_OVERFLOW);
+ }
+ ins = INS_mov;
+ }
+ else // Non-overflow checking cast.
+ {
+ if (genTypeSize(srcType) == genTypeSize(dstType))
+ {
+ ins = INS_mov;
+ }
+ else
+ {
+ var_types extendType = TYP_UNKNOWN;
+
+ // If we need to treat a signed type as unsigned
+ if ((treeNode->gtFlags & GTF_UNSIGNED) != 0)
+ {
+ extendType = genUnsignedType(srcType);
+ movSize = emitTypeSize(extendType);
+ movRequired = true;
+ }
+ else
+ {
+ if (genTypeSize(srcType) < genTypeSize(dstType))
+ {
+ extendType = srcType;
+#ifdef _TARGET_ARM_
+ movSize = emitTypeSize(srcType);
+#endif // _TARGET_ARM_
+ if (srcType == TYP_UINT)
+ {
+#ifdef _TARGET_ARM64_
+ // If we are casting from a smaller type to
+ // a larger type, then we need to make sure the
+ // higher 4 bytes are zero to gaurentee the correct value.
+ // Therefore using a mov with EA_4BYTE in place of EA_8BYTE
+ // will zero the upper bits
+ movSize = EA_4BYTE;
+#endif // _TARGET_ARM64_
+ movRequired = true;
+ }
+ }
+ else // (genTypeSize(srcType) > genTypeSize(dstType))
+ {
+ extendType = dstType;
+#if defined(_TARGET_ARM_)
+ movSize = emitTypeSize(dstType);
+#elif defined(_TARGET_ARM64_)
+ if (dstType == TYP_INT)
+ {
+ movSize = EA_8BYTE; // a sxtw instruction requires EA_8BYTE
+ }
+#endif // _TARGET_*
+ }
+ }
+
+ ins = ins_Move_Extend(extendType, castOp->InReg());
+ }
+ }
+
+ // We should never be generating a load from memory instruction here!
+ assert(!emit->emitInsIsLoad(ins));
+
+ if ((ins != INS_mov) || movRequired || (targetReg != sourceReg))
+ {
+ emit->emitIns_R_R(ins, movSize, targetReg, sourceReg);
+ }
+
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genFloatToFloatCast: Generate code for a cast between float and double
+//
+// Arguments:
+// treeNode - The GT_CAST node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// Cast is a non-overflow conversion.
+// The treeNode must have an assigned register.
+// The cast is between float and double.
+//
+void CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
+{
+ // float <--> double conversions are always non-overflow ones
+ assert(treeNode->OperGet() == GT_CAST);
+ assert(!treeNode->gtOverflow());
+
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(genIsValidFloatReg(targetReg));
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ assert(!op1->isContained()); // Cannot be contained
+ assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg.
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = op1->TypeGet();
+ assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
+
+ genConsumeOperands(treeNode->AsOp());
+
+ // treeNode must be a reg
+ assert(!treeNode->isContained());
+
+#if defined(_TARGET_ARM_)
+
+ if (srcType != dstType)
+ {
+ instruction insVcvt = (srcType == TYP_FLOAT) ? INS_vcvt_f2d // convert Float to Double
+ : INS_vcvt_d2f; // convert Double to Float
+
+ getEmitter()->emitIns_R_R(insVcvt, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum);
+ }
+ else if (treeNode->gtRegNum != op1->gtRegNum)
+ {
+ getEmitter()->emitIns_R_R(INS_vmov, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum);
+ }
+
+#elif defined(_TARGET_ARM64_)
+
+ if (srcType != dstType)
+ {
+ insOpts cvtOption = (srcType == TYP_FLOAT) ? INS_OPTS_S_TO_D // convert Single to Double
+ : INS_OPTS_D_TO_S; // convert Double to Single
+
+ getEmitter()->emitIns_R_R(INS_fcvt, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum, cvtOption);
+ }
+ else if (treeNode->gtRegNum != op1->gtRegNum)
+ {
+ // If double to double cast or float to float cast. Emit a move instruction.
+ getEmitter()->emitIns_R_R(INS_mov, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum);
+ }
+
+#endif // _TARGET_*
+
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genCreateAndStoreGCInfo: Create and record GC Info for the function.
+//
+void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
+ unsigned prologSize,
+ unsigned epilogSize DEBUGARG(void* codePtr))
+{
+ IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
+ GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC)
+ GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
+ assert(gcInfoEncoder != nullptr);
+
+ // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
+ gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
+
+ // We keep the call count for the second call to gcMakeRegPtrTable() below.
+ unsigned callCnt = 0;
+
+ // First we figure out the encoder ID's for the stack slots and registers.
+ gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS, &callCnt);
+
+ // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
+ gcInfoEncoder->FinalizeSlotIds();
+
+ // Now we can actually use those slot ID's to declare live ranges.
+ gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK, &callCnt);
+
+#ifdef _TARGET_ARM64_
+
+ if (compiler->opts.compDbgEnC)
+ {
+ // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp)
+ // which is:
+ // -return address
+ // -saved off RBP
+ // -saved 'this' pointer and bool for synchronized methods
+
+ // 4 slots for RBP + return address + RSI + RDI
+ int preservedAreaSize = 4 * REGSIZE_BYTES;
+
+ if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
+ {
+ if (!(compiler->info.compFlags & CORINFO_FLG_STATIC))
+ preservedAreaSize += REGSIZE_BYTES;
+
+ preservedAreaSize += 1; // bool for synchronized methods
+ }
+
+ // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the
+ // frame
+ gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
+ }
+
+#endif // _TARGET_ARM64_
+
+ gcInfoEncoder->Build();
+
+ // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
+ // let's save the values anyway for debugging purposes
+ compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
+ compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
+}
+
+#endif // _TARGET_ARMARCH_
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/codegenclassic.h b/src/jit/codegenclassic.h
index 3a88c83915..eb4aeb7754 100644
--- a/src/jit/codegenclassic.h
+++ b/src/jit/codegenclassic.h
@@ -283,7 +283,7 @@ void genCodeForJumpTable(GenTreePtr tree);
void genCodeForSwitchTable(GenTreePtr tree);
void genCodeForSwitch(GenTreePtr tree);
-size_t genPushArgList(GenTreePtr call);
+size_t genPushArgList(GenTreeCall* call);
#ifdef _TARGET_ARM_
// We are generating code for a promoted struct local variable. Fill the next slot (register or
@@ -351,15 +351,15 @@ bool genFillSlotFromPromotedStruct(GenTreePtr arg,
// of cpBlk).
regMaskTP genFindDeadFieldRegs(GenTreePtr cpBlk);
-void SetupLateArgs(GenTreePtr call);
+void SetupLateArgs(GenTreeCall* call);
#ifdef _TARGET_ARM_
void PushMkRefAnyArg(GenTreePtr mkRefAnyTree, fgArgTabEntryPtr curArgTabEntry, regMaskTP regNeedMask);
#endif // _TARGET_ARM_
-regMaskTP genLoadIndirectCallTarget(GenTreePtr call);
+regMaskTP genLoadIndirectCallTarget(GenTreeCall* call);
-regMaskTP genCodeForCall(GenTreePtr call, bool valUsed);
+regMaskTP genCodeForCall(GenTreeCall* call, bool valUsed);
GenTreePtr genGetAddrModeBase(GenTreePtr tree);
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
index b1e474b755..89d6a4ca34 100644
--- a/src/jit/codegencommon.cpp
+++ b/src/jit/codegencommon.cpp
@@ -107,6 +107,11 @@ CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler)
m_stkArgVarNum = BAD_VAR_NUM;
#endif
+#if defined(UNIX_X86_ABI)
+ curNestedAlignment = 0;
+ maxNestedAlignment = 0;
+#endif
+
regTracker.rsTrackInit(compiler, &regSet);
gcInfo.regSet = &regSet;
m_cgEmitter = new (compiler->getAllocator()) emitter();
@@ -647,7 +652,7 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
#if defined(_TARGET_AMD64_)
return RBM_RSI | RBM_RDI | RBM_CALLEE_TRASH;
#elif defined(_TARGET_ARM64_)
- return RBM_CALLEE_TRASH_NOGC;
+ return RBM_WRITE_BARRIER_SRC_BYREF | RBM_WRITE_BARRIER_DST_BYREF | RBM_CALLEE_TRASH_NOGC;
#elif defined(_TARGET_X86_)
return RBM_ESI | RBM_EDI | RBM_ECX;
#else
@@ -717,6 +722,8 @@ regMaskTP Compiler::compNoGCHelperCallKillSet(CorInfoHelpFunc helper)
#elif defined(_TARGET_X86_)
// This helper only trashes ECX.
return RBM_ECX;
+#elif defined(_TARGET_ARM64_)
+ return RBM_CALLEE_TRASH_NOGC & ~(RBM_WRITE_BARRIER_SRC_BYREF | RBM_WRITE_BARRIER_DST_BYREF);
#else
return RBM_CALLEE_TRASH_NOGC;
#endif // defined(_TARGET_AMD64_)
@@ -1095,9 +1102,9 @@ void Compiler::compChangeLife(VARSET_VALARG_TP newLife DEBUGARG(GenTreePtr tree)
/* Can't simultaneously become live and dead at the same time */
// (deadSet UNION bornSet) != EMPTY
- noway_assert(!VarSetOps::IsEmpty(this, VarSetOps::Union(this, deadSet, bornSet)));
+ noway_assert(!VarSetOps::IsEmptyUnion(this, deadSet, bornSet));
// (deadSet INTERSECTION bornSet) == EMPTY
- noway_assert(VarSetOps::IsEmpty(this, VarSetOps::Intersection(this, deadSet, bornSet)));
+ noway_assert(VarSetOps::IsEmptyIntersection(this, deadSet, bornSet));
#ifdef LEGACY_BACKEND
// In the LEGACY_BACKEND case, we only consider variables that are fully enregisterd
@@ -1406,9 +1413,8 @@ void CodeGenInterface::reloadFloatReg(var_types type, TempDsc* tmp, regNumber re
#endif // LEGACY_BACKEND
// inline
-regNumber CodeGenInterface::genGetThisArgReg(GenTreePtr call)
+regNumber CodeGenInterface::genGetThisArgReg(GenTreeCall* call) const
{
- noway_assert(call->IsCall());
return REG_ARG_0;
}
@@ -1633,7 +1639,7 @@ void CodeGen::genDefineTempLabel(BasicBlock* label)
void CodeGen::genAdjustSP(ssize_t delta)
{
-#ifdef _TARGET_X86_
+#if defined(_TARGET_X86_) && !defined(UNIX_X86_ABI)
if (delta == sizeof(int))
inst_RV(INS_pop, REG_ECX, TYP_INT);
else
@@ -1663,14 +1669,14 @@ void CodeGen::genAdjustStackLevel(BasicBlock* block)
{
noway_assert(block->bbFlags & BBF_JMP_TARGET);
- genStackLevel = compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int);
+ SetStackLevel(compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int));
if (genStackLevel != 0)
{
#ifdef _TARGET_X86_
getEmitter()->emitMarkStackLvl(genStackLevel);
inst_RV_IV(INS_add, REG_SPBASE, genStackLevel, EA_PTRSIZE);
- genStackLevel = 0;
+ SetStackLevel(0);
#else // _TARGET_X86_
NYI("Need emitMarkStackLvl()");
#endif // _TARGET_X86_
@@ -1863,26 +1869,26 @@ bool CodeGen::genCreateAddrMode(GenTreePtr addr,
The following indirections are valid address modes on x86/x64:
[ icon] * not handled here
- [reg ] * not handled here
+ [reg ]
[reg + icon]
- [reg2 + reg1 ]
- [reg2 + reg1 + icon]
- [reg2 + 2 * reg1 ]
- [reg2 + 4 * reg1 ]
- [reg2 + 8 * reg1 ]
- [ 2 * reg1 + icon]
- [ 4 * reg1 + icon]
- [ 8 * reg1 + icon]
- [reg2 + 2 * reg1 + icon]
- [reg2 + 4 * reg1 + icon]
- [reg2 + 8 * reg1 + icon]
+ [reg1 + reg2 ]
+ [reg1 + reg2 + icon]
+ [reg1 + 2 * reg2 ]
+ [reg1 + 4 * reg2 ]
+ [reg1 + 8 * reg2 ]
+ [ 2 * reg2 + icon]
+ [ 4 * reg2 + icon]
+ [ 8 * reg2 + icon]
+ [reg1 + 2 * reg2 + icon]
+ [reg1 + 4 * reg2 + icon]
+ [reg1 + 8 * reg2 + icon]
The following indirections are valid address modes on arm64:
[reg]
[reg + icon]
- [reg2 + reg1]
- [reg2 + reg1 * natural-scale]
+ [reg1 + reg2]
+ [reg1 + reg2 * natural-scale]
*/
@@ -2442,6 +2448,11 @@ FOUND_AM:
noway_assert(FitsIn<INT32>(cns));
+ if (rv1 == nullptr && rv2 == nullptr)
+ {
+ return false;
+ }
+
/* Success - return the various components to the caller */
*revPtr = rev;
@@ -2604,6 +2615,51 @@ emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind
return result;
}
+#ifndef LEGACY_BACKEND
+#ifdef _TARGET_ARMARCH_
+//------------------------------------------------------------------------
+// genEmitGSCookieCheck: Generate code to check that the GS cookie
+// wasn't thrashed by a buffer overrun. Coomon code for ARM32 and ARM64
+//
+void CodeGen::genEmitGSCookieCheck(bool pushReg)
+{
+ noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
+
+ // Make sure that the return register is reported as live GC-ref so that any GC that kicks in while
+ // executing GS cookie check will not collect the object pointed to by REG_INTRET (R0).
+ if (!pushReg && (compiler->info.compRetType == TYP_REF))
+ gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
+
+ regNumber regGSConst = REG_TMP_0;
+ regNumber regGSValue = REG_TMP_1;
+
+ if (compiler->gsGlobalSecurityCookieAddr == nullptr)
+ {
+ // load the GS cookie constant into a reg
+ //
+ genSetRegToIcon(regGSConst, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
+ }
+ else
+ {
+ // Ngen case - GS cookie constant needs to be accessed through an indirection.
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+ getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSConst, regGSConst, 0);
+ }
+ // Load this method's GS value from the stack frame
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0);
+ // Compare with the GC cookie constant
+ getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regGSConst, regGSValue);
+
+ BasicBlock* gsCheckBlk = genCreateTempLabel();
+ emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+ inst_JMP(jmpEqual, gsCheckBlk);
+ // regGSConst and regGSValue aren't needed anymore, we can use them for helper call
+ genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN, regGSConst);
+ genDefineTempLabel(gsCheckBlk);
+}
+#endif // _TARGET_ARMARCH_
+#endif // !LEGACY_BACKEND
+
/*****************************************************************************
*
* Generate an exit sequence for a return from a method (note: when compiling
@@ -2814,6 +2870,37 @@ void CodeGen::genUpdateCurrentFunclet(BasicBlock* block)
}
}
}
+
+#if defined(_TARGET_ARM_)
+void CodeGen::genInsertNopForUnwinder(BasicBlock* block)
+{
+ // If this block is the target of a finally return, we need to add a preceding NOP, in the same EH region,
+ // so the unwinder doesn't get confused by our "movw lr, xxx; movt lr, xxx; b Lyyy" calling convention that
+ // calls the funclet during non-exceptional control flow.
+ if (block->bbFlags & BBF_FINALLY_TARGET)
+ {
+ assert(block->bbFlags & BBF_JMP_TARGET);
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\nEmitting finally target NOP predecessor for BB%02u\n", block->bbNum);
+ }
+#endif
+ // Create a label that we'll use for computing the start of an EH region, if this block is
+ // at the beginning of such a region. If we used the existing bbEmitCookie as is for
+ // determining the EH regions, then this NOP would end up outside of the region, if this
+ // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
+ // would be executed, which we would prefer not to do.
+
+ block->bbUnwindNopEmitCookie =
+ getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+
+ instGen(INS_nop);
+ }
+}
+#endif
+
#endif // FEATURE_EH_FUNCLETS
/*****************************************************************************
@@ -2946,7 +3033,8 @@ void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
if (compiler->fgHaveProfileData())
{
- printf("; with IBC profile data\n");
+ printf("; with IBC profile data, edge weights are %s, and fgCalledCount is %u\n",
+ compiler->fgHaveValidEdgeWeights ? "valid" : "invalid", compiler->fgCalledCount);
}
if (compiler->fgProfileData_ILSizeMismatch)
@@ -3120,14 +3208,11 @@ void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
bool trackedStackPtrsContig; // are tracked stk-ptrs contiguous ?
-#ifdef _TARGET_AMD64_
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
trackedStackPtrsContig = false;
#elif defined(_TARGET_ARM_)
// On arm due to prespilling of arguments, tracked stk-ptrs may not be contiguous
trackedStackPtrsContig = !compiler->opts.compDbgEnC && !compiler->compIsProfilerHookNeeded();
-#elif defined(_TARGET_ARM64_)
- // Incoming vararg registers are homed on the top of the stack. Tracked var may not be contiguous.
- trackedStackPtrsContig = !compiler->opts.compDbgEnC && !compiler->info.compIsVarArgs;
#else
trackedStackPtrsContig = !compiler->opts.compDbgEnC;
#endif
@@ -3171,7 +3256,7 @@ void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc
(compiler->compTailCallUsed ? 4 : 0); // CORINFO_HELP_TAILCALL args
#if defined(UNIX_X86_ABI)
- maxAllowedStackDepth += genTypeStSz(TYP_INT) * 3; // stack align for x86 - allow up to 3 INT's for padding
+ maxAllowedStackDepth += maxNestedAlignment;
#endif
noway_assert(getEmitter()->emitMaxStackDepth <= maxAllowedStackDepth);
}
@@ -3896,12 +3981,12 @@ void CodeGen::genGCWriteBarrier(GenTreePtr tgt, GCInfo::WriteBarrierForm wbf)
}
#endif // DEBUG
#endif // 0
- genStackLevel += 4;
+ AddStackLevel(4);
inst_IV(INS_push, wbKind);
genEmitHelperCall(helper,
4, // argSize
EA_PTRSIZE); // retSize
- genStackLevel -= 4;
+ SubtractStackLevel(4);
}
else
{
@@ -7520,6 +7605,7 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
//
if (compiler->fgPtrArgCntMax < 1)
{
+ JITDUMP("Upping fgPtrArgCntMax from %d to 1\n", compiler->fgPtrArgCntMax);
compiler->fgPtrArgCntMax = 1;
}
#elif defined(_TARGET_ARM_)
@@ -7536,7 +7622,7 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
/* Restore the stack level */
- genStackLevel = saveStackLvl2;
+ SetStackLevel(saveStackLvl2);
#else // target
NYI("Emit Profiler Enter callback");
@@ -7679,6 +7765,7 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FC
//
if (compiler->fgPtrArgCntMax < 1)
{
+ JITDUMP("Upping fgPtrArgCntMax from %d to 1\n", compiler->fgPtrArgCntMax);
compiler->fgPtrArgCntMax = 1;
}
@@ -7765,7 +7852,7 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FC
#endif // target
/* Restore the stack level */
- genStackLevel = saveStackLvl2;
+ SetStackLevel(saveStackLvl2);
}
#endif // PROFILING_SUPPORTED
@@ -8054,6 +8141,14 @@ void CodeGen::genFinalizeFrame()
}
#endif // defined(_TARGET_ARMARCH_)
+#if defined(_TARGET_ARM_)
+ // If there are any reserved registers, add them to the
+ if (regSet.rsMaskResvd != RBM_NONE)
+ {
+ regSet.rsSetRegsModified(regSet.rsMaskResvd);
+ }
+#endif // _TARGET_ARM_
+
#ifdef DEBUG
if (verbose)
{
@@ -9239,16 +9334,23 @@ void CodeGen::genFnEpilog(BasicBlock* block)
* the same descriptor with some minor adjustments.
*/
- getEmitter()->emitIns_Call(callType, methHnd, INDEBUG_LDISASM_COMMA(nullptr) addr,
+ // clang-format off
+ getEmitter()->emitIns_Call(callType,
+ methHnd,
+ INDEBUG_LDISASM_COMMA(nullptr)
+ addr,
0, // argSize
EA_UNKNOWN, // retSize
- gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+ gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur,
BAD_IL_OFFSET, // IL offset
indCallReg, // ireg
REG_NA, // xreg
0, // xmul
0, // disp
true); // isJump
+ // clang-format on
}
else
{
@@ -9341,13 +9443,21 @@ void CodeGen::genFnEpilog(BasicBlock* block)
// Simply emit a jump to the methodHnd. This is similar to a call so we can use
// the same descriptor with some minor adjustments.
- getEmitter()->emitIns_Call(callType, methHnd, INDEBUG_LDISASM_COMMA(nullptr) addrInfo.addr,
+
+ // clang-format off
+ getEmitter()->emitIns_Call(callType,
+ methHnd,
+ INDEBUG_LDISASM_COMMA(nullptr)
+ addrInfo.addr,
0, // argSize
EA_UNKNOWN, // retSize
EA_UNKNOWN, // secondRetSize
- gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+ gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur,
BAD_IL_OFFSET, REG_NA, REG_NA, 0, 0, /* iloffset, ireg, xreg, xmul, disp */
true); /* isJump */
+ // clang-format on
}
#if FEATURE_FASTTAILCALL
else
@@ -9419,6 +9529,20 @@ void CodeGen::genFnEpilog(BasicBlock* block)
genRestoreCalleeSavedFltRegs(compiler->compLclFrameSize);
#endif // !FEATURE_STACK_FP_X87
+#ifdef JIT32_GCENCODER
+ // When using the JIT32 GC encoder, we do not start the OS-reported portion of the epilog until after
+ // the above call to `genRestoreCalleeSavedFltRegs` because that function
+ // a) does not actually restore any registers: there are none when targeting the Windows x86 ABI,
+ // which is the only target that uses the JIT32 GC encoder
+ // b) may issue a `vzeroupper` instruction to eliminate AVX -> SSE transition penalties.
+ // Because the `vzeroupper` instruction is not recognized by the VM's unwinder and there are no
+ // callee-save FP restores that the unwinder would need to see, we can avoid the need to change the
+ // unwinder (and break binary compat with older versions of the runtime) by starting the epilog
+ // after any `vzeroupper` instruction has been emitted. If either of the above conditions changes,
+ // we will need to rethink this.
+ getEmitter()->emitStartEpilog();
+#endif
+
/* Compute the size in bytes we've pushed/popped */
if (!doubleAlignOrFramePointerUsed())
@@ -9615,14 +9739,21 @@ void CodeGen::genFnEpilog(BasicBlock* block)
// Simply emit a jump to the methodHnd. This is similar to a call so we can use
// the same descriptor with some minor adjustments.
- getEmitter()->emitIns_Call(callType, methHnd, INDEBUG_LDISASM_COMMA(nullptr) addrInfo.addr,
+
+ // clang-format off
+ getEmitter()->emitIns_Call(callType,
+ methHnd,
+ INDEBUG_LDISASM_COMMA(nullptr)
+ addrInfo.addr,
0, // argSize
EA_UNKNOWN // retSize
- FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(EA_UNKNOWN), // secondRetSize
+ MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN), // secondRetSize
gcInfo.gcVarPtrSetCur,
- gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA,
- 0, 0, /* iloffset, ireg, xreg, xmul, disp */
+ gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur,
+ BAD_IL_OFFSET, REG_NA, REG_NA, 0, 0, /* iloffset, ireg, xreg, xmul, disp */
true); /* isJump */
+ // clang-format on
}
#if FEATURE_FASTTAILCALL
else
@@ -9644,17 +9775,25 @@ void CodeGen::genFnEpilog(BasicBlock* block)
unsigned stkArgSize = 0; // Zero on all platforms except x86
#if defined(_TARGET_X86_)
-
- noway_assert(compiler->compArgSize >= intRegState.rsCalleeRegArgCount * sizeof(void*));
- stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * sizeof(void*);
-
- noway_assert(compiler->compArgSize < 0x10000); // "ret" only has 2 byte operand
+ bool fCalleePop = true;
// varargs has caller pop
if (compiler->info.compIsVarArgs)
- stkArgSize = 0;
+ fCalleePop = false;
-#endif // defined(_TARGET_X86_)
+#ifdef UNIX_X86_ABI
+ if (IsCallerPop(compiler->info.compMethodInfo->args.callConv))
+ fCalleePop = false;
+#endif // UNIX_X86_ABI
+
+ if (fCalleePop)
+ {
+ noway_assert(compiler->compArgSize >= intRegState.rsCalleeRegArgCount * sizeof(void*));
+ stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * sizeof(void*);
+
+ noway_assert(compiler->compArgSize < 0x10000); // "ret" only has 2 byte operand
+ }
+#endif // _TARGET_X86_
/* Return, popping our arguments (if any) */
instGen_Return(stkArgSize);
@@ -10271,6 +10410,22 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
/*****************************************************************************
*
* Generates code for an EH funclet prolog.
+ *
+ *
+ * Funclets have the following incoming arguments:
+ *
+ * catch/filter-handler: eax = the exception object that was caught (see GT_CATCH_ARG)
+ * filter: eax = the exception object that was caught (see GT_CATCH_ARG)
+ * finally/fault: none
+ *
+ * Funclets set the following registers on exit:
+ *
+ * catch/filter-handler: eax = the address at which execution should resume (see BBJ_EHCATCHRET)
+ * filter: eax = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
+ * finally/fault: none
+ *
+ * Funclet prolog/epilog sequence and funclet frame layout are TBD.
+ *
*/
void CodeGen::genFuncletProlog(BasicBlock* block)
@@ -10284,12 +10439,17 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
- compiler->unwindBegProlog();
+ gcInfo.gcResetForBB();
- // TODO Save callee-saved registers
+ compiler->unwindBegProlog();
// This is the end of the OS-reported prolog for purposes of unwinding
compiler->unwindEndProlog();
+
+ // TODO We may need EBP restore sequence here if we introduce PSPSym
+
+ // Add a padding for 16-byte alignment
+ inst_RV_IV(INS_sub, REG_SPBASE, 12, EA_PTRSIZE);
}
/*****************************************************************************
@@ -10308,7 +10468,8 @@ void CodeGen::genFuncletEpilog()
ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
- // TODO Restore callee-saved registers
+ // Revert a padding that was added for 16-byte alignment
+ inst_RV_IV(INS_add, REG_SPBASE, 12, EA_PTRSIZE);
instGen_Return(0);
}
@@ -11061,7 +11222,7 @@ unsigned CodeGen::getFirstArgWithStackSlot()
//
void CodeGen::genSinglePush()
{
- genStackLevel += sizeof(void*);
+ AddStackLevel(REGSIZE_BYTES);
}
//------------------------------------------------------------------------
@@ -11069,7 +11230,7 @@ void CodeGen::genSinglePush()
//
void CodeGen::genSinglePop()
{
- genStackLevel -= sizeof(void*);
+ SubtractStackLevel(REGSIZE_BYTES);
}
//------------------------------------------------------------------------
diff --git a/src/jit/codegeninterface.h b/src/jit/codegeninterface.h
index 3950673e3a..08d854eaaf 100644
--- a/src/jit/codegeninterface.h
+++ b/src/jit/codegeninterface.h
@@ -191,7 +191,7 @@ public:
int genSPtoFPdelta();
int genTotalFrameSize();
- regNumber genGetThisArgReg(GenTreePtr call);
+ regNumber genGetThisArgReg(GenTreeCall* call) const;
#ifdef _TARGET_XARCH_
#ifdef _TARGET_AMD64_
diff --git a/src/jit/codegenlegacy.cpp b/src/jit/codegenlegacy.cpp
index 0530863d81..d65351115f 100644
--- a/src/jit/codegenlegacy.cpp
+++ b/src/jit/codegenlegacy.cpp
@@ -1897,10 +1897,22 @@ void CodeGen::genRangeCheck(GenTreePtr oper)
{
// If we need "arrRef" or "arrLen", and evaluating "index" displaced whichever of them we're using
// from its register, get it back in a register.
+ regMaskTP indRegMask = RBM_ALLINT;
+ regMaskTP arrRegMask = RBM_ALLINT;
+ if (!(index->gtFlags & GTF_SPILLED))
+ arrRegMask = ~genRegMask(index->gtRegNum);
if (arrRef != NULL)
- genRecoverReg(arrRef, ~genRegMask(index->gtRegNum), RegSet::KEEP_REG);
+ {
+ genRecoverReg(arrRef, arrRegMask, RegSet::KEEP_REG);
+ indRegMask &= ~genRegMask(arrRef->gtRegNum);
+ }
else if (!arrLen->IsCnsIntOrI())
- genRecoverReg(arrLen, ~genRegMask(index->gtRegNum), RegSet::KEEP_REG);
+ {
+ genRecoverReg(arrLen, arrRegMask, RegSet::KEEP_REG);
+ indRegMask &= ~genRegMask(arrLen->gtRegNum);
+ }
+ if (index->gtFlags & GTF_SPILLED)
+ regSet.rsUnspillReg(index, indRegMask, RegSet::KEEP_REG);
/* Make sure we have the values we expect */
noway_assert(index->gtFlags & GTF_REG_VAL);
@@ -5183,6 +5195,7 @@ void CodeGen::genCodeForTreeLeaf_GT_JMP(GenTreePtr tree)
//
if (compiler->fgPtrArgCntMax < 1)
{
+ JITDUMP("Upping fgPtrArgCntMax from %d to 1\n", compiler->fgPtrArgCntMax);
compiler->fgPtrArgCntMax = 1;
}
@@ -5214,7 +5227,7 @@ void CodeGen::genCodeForTreeLeaf_GT_JMP(GenTreePtr tree)
#endif //_TARGET_X86_
/* Restore the stack level */
- genStackLevel = saveStackLvl2;
+ SetStackLevel(saveStackLvl2);
}
#endif // PROFILING_SUPPORTED
@@ -10002,7 +10015,7 @@ void CodeGen::genCodeForTreeSmpOp(GenTreePtr tree, regMaskTP destReg, regMaskTP
// We have a return call() because we failed to tail call.
// In any case, just generate the call and be done.
assert(compiler->IsHfa(op1));
- genCodeForCall(op1, true);
+ genCodeForCall(op1->AsCall(), true);
genMarkTreeInReg(op1, REG_FLOATRET);
}
else
@@ -11226,7 +11239,7 @@ void CodeGen::genStoreFromFltRetRegs(GenTreePtr tree)
assert(op2->gtOper == GT_CALL);
// Generate code for call and copy the return registers into the local.
- regMaskTP retMask = genCodeForCall(op2, true);
+ regMaskTP retMask = genCodeForCall(op2->AsCall(), true);
// Ret mask should be contiguously set from s0, up to s3 or starting from d0 upto d3.
CLANG_FORMAT_COMMENT_ANCHOR;
@@ -12076,7 +12089,7 @@ void CodeGen::genCodeForTreeSpecialOp(GenTreePtr tree, regMaskTP destReg, regMas
switch (oper)
{
case GT_CALL:
- regs = genCodeForCall(tree, true);
+ regs = genCodeForCall(tree->AsCall(), true);
/* If the result is in a register, make sure it ends up in the right place */
@@ -12680,30 +12693,7 @@ void CodeGen::genCodeForBBlist()
#if FEATURE_EH_FUNCLETS
#if defined(_TARGET_ARM_)
- // If this block is the target of a finally return, we need to add a preceding NOP, in the same EH region,
- // so the unwinder doesn't get confused by our "movw lr, xxx; movt lr, xxx; b Lyyy" calling convention that
- // calls the funclet during non-exceptional control flow.
- if (block->bbFlags & BBF_FINALLY_TARGET)
- {
- assert(block->bbFlags & BBF_JMP_TARGET);
-
-#ifdef DEBUG
- if (compiler->verbose)
- {
- printf("\nEmitting finally target NOP predecessor for BB%02u\n", block->bbNum);
- }
-#endif
- // Create a label that we'll use for computing the start of an EH region, if this block is
- // at the beginning of such a region. If we used the existing bbEmitCookie as is for
- // determining the EH regions, then this NOP would end up outside of the region, if this
- // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
- // would be executed, which we would prefer not to do.
-
- block->bbUnwindNopEmitCookie =
- getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
-
- instGen(INS_nop);
- }
+ genInsertNopForUnwinder(block);
#endif // defined(_TARGET_ARM_)
genUpdateCurrentFunclet(block);
@@ -12755,7 +12745,7 @@ void CodeGen::genCodeForBBlist()
/* Both stacks are always empty on entry to a basic block */
- genStackLevel = 0;
+ SetStackLevel(0);
#if FEATURE_STACK_FP_X87
genResetFPstkLevel();
#endif // FEATURE_STACK_FP_X87
@@ -12854,7 +12844,7 @@ void CodeGen::genCodeForBBlist()
// Managed Retval under managed debugger - we need to make sure that the returned ref-type is
// reported as alive even though not used within the caller for managed debugger sake. So
// consider the return value of the method as used if generating debuggable code.
- genCodeForCall(tree, compiler->opts.MinOpts() || compiler->opts.compDbgCode);
+ genCodeForCall(tree->AsCall(), compiler->opts.MinOpts() || compiler->opts.compDbgCode);
genUpdateLife(tree);
gcInfo.gcMarkRegSetNpt(RBM_INTRET);
break;
@@ -12961,7 +12951,7 @@ void CodeGen::genCodeForBBlist()
}
}
- genStackLevel -= savedStkLvl;
+ SubtractStackLevel(savedStkLvl);
gcInfo.gcMarkRegSetNpt(gcrefRegs | byrefRegs);
@@ -15055,7 +15045,7 @@ void CodeGen::genCodeForTreeLng(GenTreePtr tree, regMaskTP needReg, regMaskTP av
{
regMaskTP retMask;
case GT_CALL:
- retMask = genCodeForCall(tree, true);
+ retMask = genCodeForCall(tree->AsCall(), true);
if (retMask == RBM_NONE)
regPair = REG_PAIR_NONE;
else
@@ -15676,9 +15666,9 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize)
#pragma warning(push)
#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
#endif
-size_t CodeGen::genPushArgList(GenTreePtr call)
+size_t CodeGen::genPushArgList(GenTreeCall* call)
{
- GenTreeArgList* regArgs = call->gtCall.gtCallLateArgs;
+ GenTreeArgList* regArgs = call->gtCallLateArgs;
size_t size = 0;
regMaskTP addrReg;
@@ -15686,14 +15676,14 @@ size_t CodeGen::genPushArgList(GenTreePtr call)
// Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument,
// so we can iterate over this argument list more uniformly.
// Need to provide a temporary non-null first argument here: if we use this, we'll replace it.
- GenTreeArgList firstForObjp(/*temp dummy arg*/ call, call->gtCall.gtCallArgs);
- if (call->gtCall.gtCallObjp == NULL)
+ GenTreeArgList firstForObjp(/*temp dummy arg*/ call, call->gtCallArgs);
+ if (call->gtCallObjp == NULL)
{
- args = call->gtCall.gtCallArgs;
+ args = call->gtCallArgs;
}
else
{
- firstForObjp.Current() = call->gtCall.gtCallObjp;
+ firstForObjp.Current() = call->gtCallObjp;
args = &firstForObjp;
}
@@ -16358,7 +16348,7 @@ size_t CodeGen::genPushArgList(GenTreePtr call)
}
inst_RV_IV(INS_sub, REG_SPBASE, stkDisp, EA_PTRSIZE);
- genStackLevel += stkDisp;
+ AddStackLevel(stkDisp);
while (curDisp < stkDisp)
{
@@ -16508,10 +16498,9 @@ size_t CodeGen::genPushArgList(GenTreePtr call)
// ARM and AMD64 uses this method to pass the stack based args
//
// returns size pushed (always zero)
-size_t CodeGen::genPushArgList(GenTreePtr call)
+size_t CodeGen::genPushArgList(GenTreeCall* call)
{
-
- GenTreeArgList* lateArgs = call->gtCall.gtCallLateArgs;
+ GenTreeArgList* lateArgs = call->gtCallLateArgs;
GenTreePtr curr;
var_types type;
int argSize;
@@ -16520,14 +16509,14 @@ size_t CodeGen::genPushArgList(GenTreePtr call)
// Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument,
// so we can iterate over this argument list more uniformly.
// Need to provide a temporary non-null first argument here: if we use this, we'll replace it.
- GenTreeArgList objpArgList(/*temp dummy arg*/ call, call->gtCall.gtCallArgs);
- if (call->gtCall.gtCallObjp == NULL)
+ GenTreeArgList objpArgList(/*temp dummy arg*/ call, call->gtCallArgs);
+ if (call->gtCallObjp == NULL)
{
- args = call->gtCall.gtCallArgs;
+ args = call->gtCallArgs;
}
else
{
- objpArgList.Current() = call->gtCall.gtCallObjp;
+ objpArgList.Current() = call->gtCallObjp;
args = &objpArgList;
}
@@ -17478,14 +17467,14 @@ regMaskTP CodeGen::genFindDeadFieldRegs(GenTreePtr cpBlk)
return res;
}
-void CodeGen::SetupLateArgs(GenTreePtr call)
+void CodeGen::SetupLateArgs(GenTreeCall* call)
{
GenTreeArgList* lateArgs;
GenTreePtr curr;
/* Generate the code to move the late arguments into registers */
- for (lateArgs = call->gtCall.gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
+ for (lateArgs = call->gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
{
curr = lateArgs->Current();
assert(curr);
@@ -18035,7 +18024,7 @@ void CodeGen::SetupLateArgs(GenTreePtr call)
/* If any of the previously loaded arguments were spilled - reload them */
- for (lateArgs = call->gtCall.gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
+ for (lateArgs = call->gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
{
curr = lateArgs->Current();
assert(curr);
@@ -18139,9 +18128,9 @@ void CodeGen::PushMkRefAnyArg(GenTreePtr mkRefAnyTree, fgArgTabEntryPtr curArgTa
#endif // FEATURE_FIXED_OUT_ARGS
-regMaskTP CodeGen::genLoadIndirectCallTarget(GenTreePtr call)
+regMaskTP CodeGen::genLoadIndirectCallTarget(GenTreeCall* call)
{
- assert((gtCallTypes)call->gtCall.gtCallType == CT_INDIRECT);
+ assert((gtCallTypes)call->gtCallType == CT_INDIRECT);
regMaskTP fptrRegs;
@@ -18192,7 +18181,7 @@ regMaskTP CodeGen::genLoadIndirectCallTarget(GenTreePtr call)
}
/* Record the register(s) used for the indirect call func ptr */
- fptrRegs = genMakeRvalueAddressable(call->gtCall.gtCallAddr, prefRegs, RegSet::KEEP_REG, false);
+ fptrRegs = genMakeRvalueAddressable(call->gtCallAddr, prefRegs, RegSet::KEEP_REG, false);
/* If any of the previously loaded arguments were spilled, reload them */
@@ -18215,7 +18204,7 @@ regMaskTP CodeGen::genLoadIndirectCallTarget(GenTreePtr call)
/* Make sure the target is still addressable while avoiding the argument registers */
- fptrRegs = genKeepAddressable(call->gtCall.gtCallAddr, fptrRegs, argRegs);
+ fptrRegs = genKeepAddressable(call->gtCallAddr, fptrRegs, argRegs);
return fptrRegs;
}
@@ -18231,7 +18220,7 @@ regMaskTP CodeGen::genLoadIndirectCallTarget(GenTreePtr call)
#pragma warning(push)
#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
#endif
-regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
+regMaskTP CodeGen::genCodeForCall(GenTreeCall* call, bool valUsed)
{
emitAttr retSize;
size_t argSize;
@@ -18263,7 +18252,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
}
#endif
- gtCallTypes callType = (gtCallTypes)call->gtCall.gtCallType;
+ gtCallTypes callType = (gtCallTypes)call->gtCallType;
IL_OFFSETX ilOffset = BAD_IL_OFFSET;
CORINFO_SIG_INFO* sigInfo = nullptr;
@@ -18275,13 +18264,11 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
/* Make some sanity checks on the call node */
- // This is a call
- noway_assert(call->IsCall());
// "this" only makes sense for user functions
- noway_assert(call->gtCall.gtCallObjp == 0 || callType == CT_USER_FUNC || callType == CT_INDIRECT);
+ noway_assert(call->gtCallObjp == 0 || callType == CT_USER_FUNC || callType == CT_INDIRECT);
// tailcalls won't be done for helpers, caller-pop args, and check that
// the global flag is set
- noway_assert(!call->gtCall.IsTailCall() ||
+ noway_assert(!call->IsTailCall() ||
(callType != CT_HELPER && !(call->gtFlags & GTF_CALL_POP_ARGS) && compiler->compTailCallUsed));
#ifdef DEBUG
@@ -18289,7 +18276,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
// native call sites with the signatures they were generated from.
if (callType != CT_HELPER)
{
- sigInfo = call->gtCall.callSig;
+ sigInfo = call->callSig;
}
#endif // DEBUG
@@ -18338,7 +18325,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
/* Pass the arguments */
- if ((call->gtCall.gtCallObjp != NULL) || (call->gtCall.gtCallArgs != NULL))
+ if ((call->gtCallObjp != NULL) || (call->gtCallArgs != NULL))
{
argSize += genPushArgList(call);
}
@@ -18422,8 +18409,8 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
/* Do not spill the argument registers.
Multi-use of RBM_ARG_REGS should be prevented by genPushArgList() */
- noway_assert((regSet.rsMaskMult & call->gtCall.gtCallRegUsedMask) == 0);
- spillRegs &= ~call->gtCall.gtCallRegUsedMask;
+ noway_assert((regSet.rsMaskMult & call->gtCallRegUsedMask) == 0);
+ spillRegs &= ~call->gtCallRegUsedMask;
if (spillRegs)
{
@@ -18449,7 +18436,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
compCurFPState.Push(regReturn);
}
#else
- SpillForCallRegisterFP(call->gtCall.gtCallRegUsedMask);
+ SpillForCallRegisterFP(call->gtCallRegUsedMask);
#endif
/* If the method returns a GC ref, set size to EA_GCREF or EA_BYREF */
@@ -18487,7 +18474,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
/* fire the event at the call site */
/* alas, right now I can only handle calls via a method handle */
- if (compiler->compIsProfilerHookNeeded() && (callType == CT_USER_FUNC) && call->gtCall.IsTailCall())
+ if (compiler->compIsProfilerHookNeeded() && (callType == CT_USER_FUNC) && call->IsTailCall())
{
unsigned saveStackLvl2 = genStackLevel;
@@ -18499,7 +18486,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
#ifdef _TARGET_X86_
regMaskTP byrefPushedRegs;
regMaskTP norefPushedRegs;
- regMaskTP pushedArgRegs = genPushRegs(call->gtCall.gtCallRegUsedMask, &byrefPushedRegs, &norefPushedRegs);
+ regMaskTP pushedArgRegs = genPushRegs(call->gtCallRegUsedMask, &byrefPushedRegs, &norefPushedRegs);
if (compiler->compProfilerMethHndIndirected)
{
@@ -18521,6 +18508,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
//
if (compiler->fgPtrArgCntMax < 1)
{
+ JITDUMP("Upping fgPtrArgCntMax from %d to 1\n", compiler->fgPtrArgCntMax);
compiler->fgPtrArgCntMax = 1;
}
@@ -18582,7 +18570,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
#endif //_TARGET_X86_
/* Restore the stack level */
- genStackLevel = saveStackLvl2;
+ SetStackLevel(saveStackLvl2);
}
#endif // PROFILING_SUPPORTED
@@ -18597,7 +18585,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
// check the stacks as frequently as possible
&& !call->IsHelperCall()
#else
- && call->gtCall.gtCallType == CT_USER_FUNC
+ && call->gtCallType == CT_USER_FUNC
#endif
)
{
@@ -18617,18 +18605,18 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
bool fTailCallTargetIsVSD = false;
- bool fTailCall = (call->gtCall.gtCallMoreFlags & GTF_CALL_M_TAILCALL) != 0;
+ bool fTailCall = (call->gtCallMoreFlags & GTF_CALL_M_TAILCALL) != 0;
/* Check for Delegate.Invoke. If so, we inline it. We get the
target-object and target-function from the delegate-object, and do
an indirect call.
*/
- if ((call->gtCall.gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV) && !fTailCall)
+ if ((call->gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV) && !fTailCall)
{
- noway_assert(call->gtCall.gtCallType == CT_USER_FUNC);
+ noway_assert(call->gtCallType == CT_USER_FUNC);
- assert((compiler->info.compCompHnd->getMethodAttribs(call->gtCall.gtCallMethHnd) &
+ assert((compiler->info.compCompHnd->getMethodAttribs(call->gtCallMethHnd) &
(CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL)) ==
(CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL));
@@ -18644,7 +18632,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
firstTgtOffs = pInfo->offsetOfDelegateFirstTarget;
#ifdef _TARGET_ARM_
- if ((call->gtCall.gtCallMoreFlags & GTF_CALL_M_SECURE_DELEGATE_INV))
+ if ((call->gtCallMoreFlags & GTF_CALL_M_SECURE_DELEGATE_INV))
{
getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_VIRTUAL_STUB_PARAM, regThis,
pInfo->offsetOfSecureDelegateIndirectCell);
@@ -18725,13 +18713,13 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
// No need to null check the this pointer - the dispatch code will deal with this.
- noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
+ noway_assert(genStillAddressable(call->gtCallAddr));
// Now put the address in REG_VIRTUAL_STUB_PARAM.
// This is typically a nop when the register used for
// the gtCallAddr is REG_VIRTUAL_STUB_PARAM
//
- inst_RV_TT(INS_mov, REG_VIRTUAL_STUB_PARAM, call->gtCall.gtCallAddr);
+ inst_RV_TT(INS_mov, REG_VIRTUAL_STUB_PARAM, call->gtCallAddr);
regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM);
#if defined(_TARGET_X86_)
@@ -18749,11 +18737,11 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
emitCallType = emitter::EC_INDIR_ARD;
indReg = REG_VIRTUAL_STUB_PARAM;
- genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
+ genDoneAddressable(call->gtCallAddr, fptrRegs, RegSet::KEEP_REG);
#elif CPU_LOAD_STORE_ARCH // ARM doesn't allow us to use an indirection for the call
- genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
+ genDoneAddressable(call->gtCallAddr, fptrRegs, RegSet::KEEP_REG);
// Make the virtual stub call:
// ldr indReg, [REG_VIRTUAL_STUB_PARAM]
@@ -18764,7 +18752,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
// Now dereference [REG_VIRTUAL_STUB_PARAM] and put it in a new temp register 'indReg'
//
indReg = regSet.rsGrabReg(RBM_ALLINT & ~RBM_VIRTUAL_STUB_PARAM);
- assert(call->gtCall.gtCallAddr->gtFlags & GTF_REG_VAL);
+ assert(call->gtCallAddr->gtFlags & GTF_REG_VAL);
getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indReg, REG_VIRTUAL_STUB_PARAM, 0);
regTracker.rsTrackRegTrash(indReg);
@@ -18787,7 +18775,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
// Get stub addr. This will return NULL if virtual call stubs are not active
void* stubAddr = NULL;
- stubAddr = (void*)call->gtCall.gtStubCallStubAddr;
+ stubAddr = (void*)call->gtStubCallStubAddr;
noway_assert(stubAddr != NULL);
@@ -18803,7 +18791,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
int disp = 0;
regNumber callReg = REG_NA;
- if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT)
+ if (call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT)
{
#if CPU_LOAD_STORE_ARCH
callReg = regSet.rsGrabReg(RBM_VIRTUAL_STUB_PARAM);
@@ -18833,7 +18821,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
if (callTypeStubAddr != emitter::EC_INDIR_R)
#endif
{
- getEmitter()->emitIns_Call(callTypeStubAddr, call->gtCall.gtCallMethHnd,
+ getEmitter()->emitIns_Call(callTypeStubAddr, call->gtCallMethHnd,
INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
gcInfo.gcRegByrefSetCur, ilOffset, callReg, REG_NA, 0, disp);
@@ -18855,21 +18843,21 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
if (callType == CT_INDIRECT)
{
- noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
+ noway_assert(genStillAddressable(call->gtCallAddr));
// Now put the address in EAX.
- inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCall.gtCallAddr);
+ inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCallAddr);
regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
- genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
+ genDoneAddressable(call->gtCallAddr, fptrRegs, RegSet::KEEP_REG);
}
else
{
// importer/EE should guarantee the indirection
- noway_assert(call->gtCall.gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
+ noway_assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR,
- ssize_t(call->gtCall.gtStubCallStubAddr));
+ ssize_t(call->gtStubCallStubAddr));
}
fTailCallTargetIsVSD = true;
@@ -18903,12 +18891,11 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
VPTR_OFFS);
regTracker.rsTrackRegTrash(vptrReg);
- noway_assert(vptrMask & ~call->gtCall.gtCallRegUsedMask);
+ noway_assert(vptrMask & ~call->gtCallRegUsedMask);
/* Get hold of the vtable offset (note: this might be expensive) */
- compiler->info.compCompHnd->getMethodVTableOffset(call->gtCall.gtCallMethHnd,
- &vtabOffsOfIndirection,
+ compiler->info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection,
&vtabOffsAfterIndirection);
/* Get the appropriate vtable chunk */
@@ -18935,13 +18922,13 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg,
vtabOffsAfterIndirection);
- getEmitter()->emitIns_Call(emitter::EC_INDIR_R, call->gtCall.gtCallMethHnd,
+ getEmitter()->emitIns_Call(emitter::EC_INDIR_R, call->gtCallMethHnd,
INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
gcInfo.gcRegByrefSetCur, ilOffset,
vptrReg); // ireg
#else
- getEmitter()->emitIns_Call(emitter::EC_FUNC_VIRTUAL, call->gtCall.gtCallMethHnd,
+ getEmitter()->emitIns_Call(emitter::EC_FUNC_VIRTUAL, call->gtCallMethHnd,
INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
gcInfo.gcRegByrefSetCur, ilOffset,
@@ -18967,7 +18954,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
// - Indirect calls to computed addresses
// - Tailcall versions of all of the above
- CORINFO_METHOD_HANDLE methHnd = call->gtCall.gtCallMethHnd;
+ CORINFO_METHOD_HANDLE methHnd = call->gtCallMethHnd;
//------------------------------------------------------
// Non-virtual/Indirect calls: Insert a null check on the "this" pointer if needed
@@ -19015,10 +19002,10 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
if (callType == CT_INDIRECT)
{
- noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
+ noway_assert(genStillAddressable(call->gtCallAddr));
- if (call->gtCall.gtCallAddr->gtFlags & GTF_REG_VAL)
- indCallReg = call->gtCall.gtCallAddr->gtRegNum;
+ if (call->gtCallAddr->gtFlags & GTF_REG_VAL)
+ indCallReg = call->gtCallAddr->gtRegNum;
nArgSize = (call->gtFlags & GTF_CALL_POP_ARGS) ? 0 : (int)argSize;
methHnd = 0;
@@ -19053,7 +19040,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
anyways.
*/
- inst_RV_TT(INS_mov, indCallReg, call->gtCall.gtCallAddr);
+ inst_RV_TT(INS_mov, indCallReg, call->gtCallAddr);
regTracker.rsTrackRegTrash(indCallReg);
}
@@ -19121,7 +19108,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
ilOffset, indCallReg);
if (callType == CT_INDIRECT)
- genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
+ genDoneAddressable(call->gtCallAddr, fptrRegs, RegSet::KEEP_REG);
getEmitter()->emitEnableRandomNops();
@@ -19131,15 +19118,15 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
if (callType == CT_INDIRECT)
{
- noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
+ noway_assert(genStillAddressable(call->gtCallAddr));
- if (call->gtCall.gtCallCookie)
+ if (call->gtCallCookie)
{
//------------------------------------------------------
// Non-virtual indirect calls via the P/Invoke stub
- GenTreePtr cookie = call->gtCall.gtCallCookie;
- GenTreePtr target = call->gtCall.gtCallAddr;
+ GenTreePtr cookie = call->gtCallCookie;
+ GenTreePtr target = call->gtCallAddr;
noway_assert((call->gtFlags & GTF_CALL_POP_ARGS) == 0);
@@ -19188,8 +19175,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
// Ensure that we don't trash any of these registers if we have to load
// the helper call target into a register to invoke it.
regMaskTP regsUsed;
- regSet.rsLockReg(call->gtCall.gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM |
- RBM_PINVOKE_COOKIE_PARAM,
+ regSet.rsLockReg(call->gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM | RBM_PINVOKE_COOKIE_PARAM,
&regsUsed);
#else
NYI("Non-virtual indirect calls via the P/Invoke stub");
@@ -19201,7 +19187,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
genEmitHelperCall(CORINFO_HELP_PINVOKE_CALLI, (int)args, retSize);
#if defined(_TARGET_ARM_)
- regSet.rsUnlockReg(call->gtCall.gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM |
+ regSet.rsUnlockReg(call->gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM |
RBM_PINVOKE_COOKIE_PARAM,
regsUsed);
#endif
@@ -19218,14 +19204,14 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
if (fTailCall)
{
- inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCall.gtCallAddr);
+ inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCallAddr);
regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
}
else
instEmit_indCall(call, args, retSize);
}
- genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
+ genDoneAddressable(call->gtCallAddr, fptrRegs, RegSet::KEEP_REG);
// Done with indirect calls
break;
@@ -19264,7 +19250,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
- if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_NONVIRT_SAME_THIS)
+ if (call->gtCallMoreFlags & GTF_CALL_M_NONVIRT_SAME_THIS)
aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
if ((call->gtFlags & GTF_CALL_NULLCHECK) == 0)
@@ -19362,7 +19348,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
getEmitter()->emitCurIGsize + // size of the current IG
4; // size of the jump instruction
// that we are now emitting
- if (compiler->gtIsRecursiveCall(call->AsCall()) && codeOffset <= -CALL_DIST_MAX_NEG)
+ if (compiler->gtIsRecursiveCall(call) && codeOffset <= -CALL_DIST_MAX_NEG)
{
getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, methHnd,
INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
@@ -19576,7 +19562,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_INT);
// Is this one of the used argument registers?
- if ((curArgMask & call->gtCall.gtCallRegUsedMask) == 0)
+ if ((curArgMask & call->gtCallRegUsedMask) == 0)
continue;
#ifdef _TARGET_ARM_
@@ -19609,7 +19595,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_FLOAT);
// Is this one of the used argument registers?
- if ((curArgMask & call->gtCall.gtCallRegUsedMask) == 0)
+ if ((curArgMask & call->gtCallRegUsedMask) == 0)
continue;
regSet.rsMaskUsed &= ~curArgMask;
@@ -19660,7 +19646,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
{
#ifdef _TARGET_ARM_
- if (call->gtCall.IsVarargs() || compiler->opts.compUseSoftFP)
+ if (call->IsVarargs() || compiler->opts.compUseSoftFP)
{
// Result return for vararg methods is in r0, r1, but our callers would
// expect the return in s0, s1 because of floating type. Do the move now.
@@ -19680,7 +19666,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
/* The function will pop all arguments before returning */
- genStackLevel = saveStackLvl;
+ SetStackLevel(saveStackLvl);
/* No trashed registers may possibly hold a pointer at this point */
CLANG_FORMAT_COMMENT_ANCHOR;
@@ -19885,9 +19871,9 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
#ifdef _TARGET_ARM_
case TYP_STRUCT:
{
- assert(call->gtCall.gtRetClsHnd != NULL);
- assert(compiler->IsHfa(call->gtCall.gtRetClsHnd));
- int retSlots = compiler->GetHfaCount(call->gtCall.gtRetClsHnd);
+ assert(call->gtRetClsHnd != NULL);
+ assert(compiler->IsHfa(call->gtRetClsHnd));
+ int retSlots = compiler->GetHfaCount(call->gtRetClsHnd);
assert(retSlots > 0 && retSlots <= MAX_HFA_RET_SLOTS);
assert(MAX_HFA_RET_SLOTS < sizeof(int) * 8);
retVal = ((1 << retSlots) - 1) << REG_FLOATRET;
@@ -19924,7 +19910,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
if (frameListRoot)
genPInvokeCallEpilog(frameListRoot, retVal);
- if (frameListRoot && (call->gtCall.gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH))
+ if (frameListRoot && (call->gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH))
{
if (frameListRoot->lvRegister)
{
@@ -19940,7 +19926,7 @@ regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
// check the stack as frequently as possible
&& !call->IsHelperCall()
#else
- && call->gtCall.gtCallType == CT_USER_FUNC
+ && call->gtCallType == CT_USER_FUNC
#endif
)
{
@@ -20175,12 +20161,14 @@ void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize
// Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
+ // We keep the call count for the second call to gcMakeRegPtrTable() below.
+ unsigned callCnt = 0;
// First we figure out the encoder ID's for the stack slots and registers.
- gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS);
+ gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS, &callCnt);
// Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
gcInfoEncoder->FinalizeSlotIds();
// Now we can actually use those slot ID's to declare live ranges.
- gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
+ gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK, &callCnt);
gcInfoEncoder->Build();
@@ -21041,8 +21029,6 @@ regMaskTP CodeGen::genPInvokeMethodProlog(regMaskTP initRegs)
}
else
{
- noway_assert(pInfo->osMajor >= 5);
-
DWORD basePtr = WIN_NT5_TLS_HIGHOFFSET;
threadTlsIndex -= 64;
diff --git a/src/jit/codegenlinear.cpp b/src/jit/codegenlinear.cpp
index 329c4a755f..c8fcd88c10 100644
--- a/src/jit/codegenlinear.cpp
+++ b/src/jit/codegenlinear.cpp
@@ -246,6 +246,10 @@ void CodeGen::genCodeForBBlist()
}
}
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+ genInsertNopForUnwinder(block);
+#endif
+
/* Start a new code output block */
genUpdateCurrentFunclet(block);
@@ -292,7 +296,7 @@ void CodeGen::genCodeForBBlist()
/* Both stacks are always empty on entry to a basic block */
- genStackLevel = 0;
+ SetStackLevel(0);
genAdjustStackLevel(block);
savedStkLvl = genStackLevel;
@@ -486,7 +490,7 @@ void CodeGen::genCodeForBBlist()
}
}
- genStackLevel -= savedStkLvl;
+ SubtractStackLevel(savedStkLvl);
#ifdef DEBUG
// compCurLife should be equal to the liveOut set, except that we don't keep
@@ -1731,43 +1735,73 @@ void CodeGen::genTransferRegGCState(regNumber dst, regNumber src)
// pass in 'addr' for a relative call or 'base' for a indirect register call
// methHnd - optional, only used for pretty printing
// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
+//
+// clang-format off
void CodeGen::genEmitCall(int callType,
CORINFO_METHOD_HANDLE methHnd,
- INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) void* addr X86_ARG(ssize_t argSize),
- emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
- IL_OFFSETX ilOffset,
- regNumber base,
- bool isJump,
- bool isNoGC)
+ INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo)
+ void* addr
+ X86_ARG(ssize_t argSize),
+ emitAttr retSize
+ MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+ IL_OFFSETX ilOffset,
+ regNumber base,
+ bool isJump,
+ bool isNoGC)
{
#if !defined(_TARGET_X86_)
ssize_t argSize = 0;
#endif // !defined(_TARGET_X86_)
- getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, argSize,
- retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), gcInfo.gcVarPtrSetCur,
- gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset, base, REG_NA, 0, 0, isJump,
+ getEmitter()->emitIns_Call(emitter::EmitCallType(callType),
+ methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo)
+ addr,
+ argSize,
+ retSize
+ MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+ gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur,
+ ilOffset, base, REG_NA, 0, 0, isJump,
emitter::emitNoGChelper(compiler->eeGetHelperNum(methHnd)));
}
+// clang-format on
// generates an indirect call via addressing mode (call []) given an indir node
// methHnd - optional, only used for pretty printing
// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
+//
+// clang-format off
void CodeGen::genEmitCall(int callType,
CORINFO_METHOD_HANDLE methHnd,
- INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) GenTreeIndir* indir X86_ARG(ssize_t argSize),
- emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
- IL_OFFSETX ilOffset)
+ INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo)
+ GenTreeIndir* indir
+ X86_ARG(ssize_t argSize),
+ emitAttr retSize
+ MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+ IL_OFFSETX ilOffset)
{
#if !defined(_TARGET_X86_)
ssize_t argSize = 0;
#endif // !defined(_TARGET_X86_)
genConsumeAddress(indir->Addr());
- getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) nullptr,
- argSize, retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
- gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset,
- indir->Base() ? indir->Base()->gtRegNum : REG_NA,
- indir->Index() ? indir->Index()->gtRegNum : REG_NA, indir->Scale(), indir->Offset());
+ getEmitter()->emitIns_Call(emitter::EmitCallType(callType),
+ methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo)
+ nullptr,
+ argSize,
+ retSize
+ MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+ gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur,
+ ilOffset,
+ (indir->Base() != nullptr) ? indir->Base()->gtRegNum : REG_NA,
+ (indir->Index() != nullptr) ? indir->Index()->gtRegNum : REG_NA,
+ indir->Scale(),
+ indir->Offset());
}
+// clang-format on
#endif // !LEGACY_BACKEND
diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h
index c8a5af657a..fa0c85c749 100644
--- a/src/jit/codegenlinear.h
+++ b/src/jit/codegenlinear.h
@@ -57,6 +57,10 @@ void genCompareInt(GenTreePtr treeNode);
#if !defined(_TARGET_64BIT_)
void genCompareLong(GenTreePtr treeNode);
+#if defined(_TARGET_ARM_)
+void genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool isUnsigned = false);
+void genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse);
+#endif // defined(_TARGET_ARM_)
#endif
#ifdef FEATURE_SIMD
@@ -154,7 +158,7 @@ void genSetRegToIcon(regNumber reg, ssize_t val, var_types type = TYP_INT, insFl
void genCodeForShift(GenTreePtr tree);
-#if defined(_TARGET_X86_)
+#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
void genCodeForShiftLong(GenTreePtr tree);
#endif
@@ -170,6 +174,44 @@ void genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode);
void genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode);
+void genAlignStackBeforeCall(GenTreePutArgStk* putArgStk);
+void genAlignStackBeforeCall(GenTreeCall* call);
+void genRemoveAlignmentAfterCall(GenTreeCall* call, unsigned bias = 0);
+
+#if defined(UNIX_X86_ABI)
+
+unsigned curNestedAlignment; // Keep track of alignment adjustment required during codegen.
+unsigned maxNestedAlignment; // The maximum amount of alignment adjustment required.
+
+void SubtractNestedAlignment(unsigned adjustment)
+{
+ assert(curNestedAlignment >= adjustment);
+ unsigned newNestedAlignment = curNestedAlignment - adjustment;
+ if (curNestedAlignment != newNestedAlignment)
+ {
+ JITDUMP("Adjusting stack nested alignment from %d to %d\n", curNestedAlignment, newNestedAlignment);
+ }
+ curNestedAlignment = newNestedAlignment;
+}
+
+void AddNestedAlignment(unsigned adjustment)
+{
+ unsigned newNestedAlignment = curNestedAlignment + adjustment;
+ if (curNestedAlignment != newNestedAlignment)
+ {
+ JITDUMP("Adjusting stack nested alignment from %d to %d\n", curNestedAlignment, newNestedAlignment);
+ }
+ curNestedAlignment = newNestedAlignment;
+
+ if (curNestedAlignment > maxNestedAlignment)
+ {
+ JITDUMP("Max stack nested alignment changed from %d to %d\n", maxNestedAlignment, curNestedAlignment);
+ maxNestedAlignment = curNestedAlignment;
+ }
+}
+
+#endif
+
#ifdef FEATURE_PUT_STRUCT_ARG_STK
#ifdef _TARGET_X86_
bool genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk);
@@ -179,10 +221,10 @@ void genPutArgStkFieldList(GenTreePutArgStk* putArgStk);
void genPutStructArgStk(GenTreePutArgStk* treeNode);
-int genMove8IfNeeded(unsigned size, regNumber tmpReg, GenTree* srcAddr, unsigned offset);
-int genMove4IfNeeded(unsigned size, regNumber tmpReg, GenTree* srcAddr, unsigned offset);
-int genMove2IfNeeded(unsigned size, regNumber tmpReg, GenTree* srcAddr, unsigned offset);
-int genMove1IfNeeded(unsigned size, regNumber tmpReg, GenTree* srcAddr, unsigned offset);
+unsigned genMove8IfNeeded(unsigned size, regNumber tmpReg, GenTree* srcAddr, unsigned offset);
+unsigned genMove4IfNeeded(unsigned size, regNumber tmpReg, GenTree* srcAddr, unsigned offset);
+unsigned genMove2IfNeeded(unsigned size, regNumber tmpReg, GenTree* srcAddr, unsigned offset);
+unsigned genMove1IfNeeded(unsigned size, regNumber tmpReg, GenTree* srcAddr, unsigned offset);
void genStructPutArgRepMovs(GenTreePutArgStk* putArgStkNode);
void genStructPutArgUnroll(GenTreePutArgStk* putArgStkNode);
void genStoreRegToStackArg(var_types type, regNumber reg, int offset);
@@ -190,7 +232,13 @@ void genStoreRegToStackArg(var_types type, regNumber reg, int offset);
void genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset);
-void genCodeForStoreOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset);
+void genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset);
+
+#ifdef _TARGET_ARM64_
+void genCodeForLoadPairOffset(regNumber dst, regNumber dst2, GenTree* base, unsigned offset);
+
+void genCodeForStorePairOffset(regNumber src, regNumber src2, GenTree* base, unsigned offset);
+#endif // _TARGET_ARM64_
void genCodeForStoreBlk(GenTreeBlk* storeBlkNode);
@@ -214,7 +262,7 @@ void genStoreInd(GenTreePtr node);
bool genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data);
-void genCallInstruction(GenTreePtr call);
+void genCallInstruction(GenTreeCall* call);
void genJmpMethod(GenTreePtr jmp);
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index e893da6035..23c2a186a4 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -241,7 +241,9 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block)
if ((compiler->lvaPSPSym == BAD_VAR_NUM) ||
(!compiler->compLocallocUsed && (compiler->funCurrentFunc()->funKind == FUNC_ROOT)))
{
+#ifndef UNIX_X86_ABI
inst_RV_RV(INS_mov, REG_ARG_0, REG_SPBASE, TYP_I_IMPL);
+#endif // !UNIX_X86_ABI
}
else
{
@@ -1264,8 +1266,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
if (compiler->verbose)
{
unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio
- printf("Generating: ");
- compiler->gtDispTree(treeNode, nullptr, nullptr, true);
+ compiler->gtDispLIRNode(treeNode, "Generating: ");
}
#endif // DEBUG
@@ -1313,7 +1314,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_CNS_INT:
#ifdef _TARGET_X86_
- NYI_IF(treeNode->IsIconHandle(GTF_ICON_TLS_HDL), "TLS constants");
+ assert(!treeNode->IsIconHandle(GTF_ICON_TLS_HDL));
#endif // _TARGET_X86_
__fallthrough;
@@ -1624,6 +1625,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
break;
case GT_IND:
+ {
#ifdef FEATURE_SIMD
// Handling of Vector3 type values loaded through indirection.
if (treeNode->TypeGet() == TYP_SIMD12)
@@ -1633,10 +1635,21 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
}
#endif // FEATURE_SIMD
- genConsumeAddress(treeNode->AsIndir()->Addr());
- emit->emitInsMov(ins_Load(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode);
+ GenTree* addr = treeNode->AsIndir()->Addr();
+ if (addr->IsCnsIntOrI() && addr->IsIconHandle(GTF_ICON_TLS_HDL))
+ {
+ noway_assert(EA_ATTR(genTypeSize(treeNode->gtType)) == EA_PTRSIZE);
+ emit->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, treeNode->gtRegNum, FLD_GLOBAL_FS,
+ (int)addr->gtIntCon.gtIconVal);
+ }
+ else
+ {
+ genConsumeAddress(addr);
+ emit->emitInsMov(ins_Load(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode);
+ }
genProduceReg(treeNode);
- break;
+ }
+ break;
case GT_MULHI:
#ifdef _TARGET_X86_
@@ -2008,7 +2021,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
break;
case GT_CALL:
- genCallInstruction(treeNode);
+ genCallInstruction(treeNode->AsCall());
break;
case GT_JMP:
@@ -3223,7 +3236,7 @@ void CodeGen::genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode)
// On x86, longTmpReg must be an xmm reg; on x64 it must be an integer register.
// This is checked by genStoreRegToStackArg.
//
-int CodeGen::genMove8IfNeeded(unsigned size, regNumber longTmpReg, GenTree* srcAddr, unsigned offset)
+unsigned CodeGen::genMove8IfNeeded(unsigned size, regNumber longTmpReg, GenTree* srcAddr, unsigned offset)
{
#ifdef _TARGET_X86_
instruction longMovIns = INS_movq;
@@ -3257,7 +3270,7 @@ int CodeGen::genMove8IfNeeded(unsigned size, regNumber longTmpReg, GenTree* srcA
// intTmpReg must be an integer register.
// This is checked by genStoreRegToStackArg.
//
-int CodeGen::genMove4IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset)
+unsigned CodeGen::genMove4IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset)
{
if ((size & 4) != 0)
{
@@ -3286,7 +3299,7 @@ int CodeGen::genMove4IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAd
// intTmpReg must be an integer register.
// This is checked by genStoreRegToStackArg.
//
-int CodeGen::genMove2IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset)
+unsigned CodeGen::genMove2IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset)
{
if ((size & 2) != 0)
{
@@ -3315,7 +3328,7 @@ int CodeGen::genMove2IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAd
// intTmpReg must be an integer register.
// This is checked by genStoreRegToStackArg.
//
-int CodeGen::genMove1IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset)
+unsigned CodeGen::genMove1IfNeeded(unsigned size, regNumber intTmpReg, GenTree* srcAddr, unsigned offset)
{
if ((size & 1) != 0)
@@ -3352,7 +3365,7 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode)
GenTreePtr dstAddr = putArgNode;
GenTreePtr src = putArgNode->gtOp.gtOp1;
- size_t size = putArgNode->getArgSize();
+ unsigned size = putArgNode->getArgSize();
assert(size <= CPBLK_UNROLL_LIMIT);
emitter* emit = getEmitter();
@@ -3813,6 +3826,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
regNumber targetReg = treeNode->gtRegNum;
regNumber dataReg = data->gtRegNum;
regNumber addrReg = addr->gtRegNum;
+ var_types type = genActualType(data->TypeGet());
instruction ins;
// The register allocator should have extended the lifetime of the address
@@ -3827,7 +3841,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
genConsumeOperands(treeNode);
if (targetReg != REG_NA && dataReg != REG_NA && dataReg != targetReg)
{
- inst_RV_RV(ins_Copy(data->TypeGet()), targetReg, dataReg);
+ inst_RV_RV(ins_Copy(type), targetReg, dataReg);
data->gtRegNum = targetReg;
// TODO-XArch-Cleanup: Consider whether it is worth it, for debugging purposes, to restore the
@@ -3853,8 +3867,8 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
// all of these nodes implicitly do an indirection on op1
// so create a temporary node to feed into the pattern matching
- GenTreeIndir i = indirForm(data->TypeGet(), addr);
- getEmitter()->emitInsBinary(ins, emitTypeSize(data), &i, data);
+ GenTreeIndir i = indirForm(type, addr);
+ getEmitter()->emitInsBinary(ins, emitTypeSize(type), &i, data);
if (treeNode->gtRegNum != REG_NA)
{
@@ -4749,10 +4763,9 @@ bool CodeGen::genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarri
}
// Produce code for a GT_CALL node
-void CodeGen::genCallInstruction(GenTreePtr node)
+void CodeGen::genCallInstruction(GenTreeCall* call)
{
- GenTreeCall* call = node->AsCall();
- assert(call->gtOper == GT_CALL);
+ genAlignStackBeforeCall(call);
gtCallTypes callType = (gtCallTypes)call->gtCallType;
@@ -4913,7 +4926,7 @@ void CodeGen::genCallInstruction(GenTreePtr node)
if (callType == CT_INDIRECT)
{
assert(target == nullptr);
- target = call->gtCall.gtCallAddr;
+ target = call->gtCallAddr;
methHnd = nullptr;
}
else
@@ -4993,16 +5006,30 @@ void CodeGen::genCallInstruction(GenTreePtr node)
}
#if defined(_TARGET_X86_)
+ bool fCallerPop = (call->gtFlags & GTF_CALL_POP_ARGS) != 0;
+
+#ifdef UNIX_X86_ABI
+ {
+ CorInfoCallConv callConv = CORINFO_CALLCONV_DEFAULT;
+
+ if ((callType != CT_HELPER) && call->callSig)
+ {
+ callConv = call->callSig->callConv;
+ }
+
+ fCallerPop |= IsCallerPop(callConv);
+ }
+#endif // UNIX_X86_ABI
+
// If the callee pops the arguments, we pass a positive value as the argSize, and the emitter will
// adjust its stack level accordingly.
// If the caller needs to explicitly pop its arguments, we must pass a negative value, and then do the
// pop when we're done.
ssize_t argSizeForEmitter = stackArgBytes;
- if ((call->gtFlags & GTF_CALL_POP_ARGS) != 0)
+ if (fCallerPop)
{
argSizeForEmitter = -stackArgBytes;
}
-
#endif // defined(_TARGET_X86_)
#ifdef FEATURE_AVX_SUPPORT
@@ -5044,11 +5071,20 @@ void CodeGen::genCallInstruction(GenTreePtr node)
genCopyRegIfNeeded(addr, REG_VIRTUAL_STUB_TARGET);
getEmitter()->emitIns_Nop(3);
- getEmitter()->emitIns_Call(emitter::EmitCallType(emitter::EC_INDIR_ARD), methHnd,
- INDEBUG_LDISASM_COMMA(sigInfo) nullptr, argSizeForEmitter,
- retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
- gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+
+ // clang-format off
+ getEmitter()->emitIns_Call(emitter::EmitCallType(emitter::EC_INDIR_ARD),
+ methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo)
+ nullptr,
+ argSizeForEmitter,
+ retSize
+ MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+ gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur,
ilOffset, REG_VIRTUAL_STUB_TARGET, REG_NA, 1, 0);
+ // clang-format on
}
else
#endif
@@ -5060,18 +5096,29 @@ void CodeGen::genCallInstruction(GenTreePtr node)
// contained only if it can be encoded as PC-relative offset.
assert(target->AsIndir()->Base()->AsIntConCommon()->FitsInAddrBase(compiler));
- genEmitCall(emitter::EC_FUNC_TOKEN_INDIR, methHnd,
- INDEBUG_LDISASM_COMMA(sigInfo)(void*) target->AsIndir()
- ->Base()
- ->AsIntConCommon()
- ->IconValue() X86_ARG(argSizeForEmitter),
- retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
+ // clang-format off
+ genEmitCall(emitter::EC_FUNC_TOKEN_INDIR,
+ methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo)
+ (void*) target->AsIndir()->Base()->AsIntConCommon()->IconValue()
+ X86_ARG(argSizeForEmitter),
+ retSize
+ MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+ ilOffset);
+ // clang-format on
}
else
{
- genEmitCall(emitter::EC_INDIR_ARD, methHnd,
- INDEBUG_LDISASM_COMMA(sigInfo) target->AsIndir() X86_ARG(argSizeForEmitter),
- retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
+ // clang-format off
+ genEmitCall(emitter::EC_INDIR_ARD,
+ methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo)
+ target->AsIndir()
+ X86_ARG(argSizeForEmitter),
+ retSize
+ MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+ ilOffset);
+ // clang-format on
}
}
else
@@ -5079,19 +5126,34 @@ void CodeGen::genCallInstruction(GenTreePtr node)
// We have already generated code for gtControlExpr evaluating it into a register.
// We just need to emit "call reg" in this case.
assert(genIsValidIntReg(target->gtRegNum));
- genEmitCall(emitter::EC_INDIR_R, methHnd,
- INDEBUG_LDISASM_COMMA(sigInfo) nullptr // addr
+
+ // clang-format off
+ genEmitCall(emitter::EC_INDIR_R,
+ methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo)
+ nullptr // addr
X86_ARG(argSizeForEmitter),
- retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset, genConsumeReg(target));
+ retSize
+ MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+ ilOffset,
+ genConsumeReg(target));
+ // clang-format on
}
}
#ifdef FEATURE_READYTORUN_COMPILER
else if (call->gtEntryPoint.addr != nullptr)
{
+ // clang-format off
genEmitCall((call->gtEntryPoint.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN
: emitter::EC_FUNC_TOKEN_INDIR,
- methHnd, INDEBUG_LDISASM_COMMA(sigInfo)(void*) call->gtEntryPoint.addr X86_ARG(argSizeForEmitter),
- retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
+ methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo)
+ (void*) call->gtEntryPoint.addr
+ X86_ARG(argSizeForEmitter),
+ retSize
+ MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+ ilOffset);
+ // clang-format on
}
#endif
else
@@ -5127,18 +5189,18 @@ void CodeGen::genCallInstruction(GenTreePtr node)
}
// Non-virtual direct calls to known addresses
- genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr X86_ARG(argSizeForEmitter),
- retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
- }
-#if defined(UNIX_X86_ABI)
- // Put back the stack pointer if there was any padding for stack alignment
- unsigned padStackAlign = call->fgArgInfo->GetPadStackAlign();
- if (padStackAlign != 0)
- {
- inst_RV_IV(INS_add, REG_SPBASE, padStackAlign * TARGET_POINTER_SIZE, EA_PTRSIZE);
+ // clang-format off
+ genEmitCall(emitter::EC_FUNC_TOKEN,
+ methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo)
+ addr
+ X86_ARG(argSizeForEmitter),
+ retSize
+ MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+ ilOffset);
+ // clang-format on
}
-#endif // UNIX_X86_ABI
// if it was a pinvoke we may have needed to get the address of a label
if (genPendingCallLabel)
@@ -5148,11 +5210,6 @@ void CodeGen::genCallInstruction(GenTreePtr node)
genPendingCallLabel = nullptr;
}
-#if defined(_TARGET_X86_)
- // The call will pop its arguments.
- genStackLevel -= stackArgBytes;
-#endif // defined(_TARGET_X86_)
-
// Update GC info:
// All Callee arg registers are trashed and no longer contain any GC pointers.
// TODO-XArch-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here?
@@ -5253,7 +5310,7 @@ void CodeGen::genCallInstruction(GenTreePtr node)
gcInfo.gcMarkRegSetNpt(RBM_INTRET);
}
-#if defined(_TARGET_X86_)
+#if !FEATURE_EH_FUNCLETS
//-------------------------------------------------------------------------
// Create a label for tracking of region protected by the monitor in synchronized methods.
// This needs to be here, rather than above where fPossibleSyncHelperCall is set,
@@ -5281,13 +5338,21 @@ void CodeGen::genCallInstruction(GenTreePtr node)
break;
}
}
+#endif // !FEATURE_EH_FUNCLETS
+
+ unsigned stackAdjustBias = 0;
+#if defined(_TARGET_X86_)
// Is the caller supposed to pop the arguments?
- if (((call->gtFlags & GTF_CALL_POP_ARGS) != 0) && (stackArgBytes != 0))
+ if (fCallerPop && (stackArgBytes != 0))
{
- genAdjustSP(stackArgBytes);
+ stackAdjustBias = stackArgBytes;
}
+
+ SubtractStackLevel(stackArgBytes);
#endif // _TARGET_X86_
+
+ genRemoveAlignmentAfterCall(call, stackAdjustBias);
}
// Produce code for a GT_JMP node.
@@ -7137,7 +7202,7 @@ int CodeGenInterface::genSPtoFPdelta()
{
int delta;
-#ifdef PLATFORM_UNIX
+#ifdef UNIX_AMD64_ABI
// We require frame chaining on Unix to support native tool unwinding (such as
// unwinding by the native debugger). We have a CLR-only extension to the
@@ -7145,7 +7210,7 @@ int CodeGenInterface::genSPtoFPdelta()
// If Unix ever supports EnC, the RSP == RBP assumption will have to be reevaluated.
delta = genTotalFrameSize();
-#else // !PLATFORM_UNIX
+#else // !UNIX_AMD64_ABI
// As per Amd64 ABI, RBP offset from initial RSP can be between 0 and 240 if
// RBP needs to be reported in unwind codes. This case would arise for methods
@@ -7171,7 +7236,7 @@ int CodeGenInterface::genSPtoFPdelta()
delta = genTotalFrameSize();
}
-#endif // !PLATFORM_UNIX
+#endif // !UNIX_AMD64_ABI
return delta;
}
@@ -7372,11 +7437,16 @@ void CodeGen::genIntrinsic(GenTreePtr treeNode)
switch (treeNode->gtIntrinsic.gtIntrinsicId)
{
case CORINFO_INTRINSIC_Sqrt:
- noway_assert(treeNode->TypeGet() == TYP_DOUBLE);
+ {
+ // Both operand and its result must be of the same floating point type.
+ GenTreePtr srcNode = treeNode->gtOp.gtOp1;
+ assert(varTypeIsFloating(srcNode));
+ assert(srcNode->TypeGet() == treeNode->TypeGet());
+
genConsumeOperands(treeNode->AsOp());
- getEmitter()->emitInsBinary(ins_FloatSqrt(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode,
- treeNode->gtOp.gtOp1);
+ getEmitter()->emitInsBinary(ins_FloatSqrt(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode, srcNode);
break;
+ }
case CORINFO_INTRINSIC_Abs:
genSSE2BitwiseOp(treeNode);
@@ -7415,16 +7485,10 @@ unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode)
unsigned baseVarNum;
-#if FEATURE_FASTTAILCALL
- bool putInIncomingArgArea = treeNode->AsPutArgStk()->putInIncomingArgArea;
-#else
- const bool putInIncomingArgArea = false;
-#endif
-
// Whether to setup stk arg in incoming or out-going arg area?
// Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area.
// All other calls - stk arg is setup in out-going arg area.
- if (putInIncomingArgArea)
+ if (treeNode->AsPutArgStk()->putInIncomingArgArea())
{
// See the note in the function header re: finding the first stack passed argument.
baseVarNum = getFirstArgWithStackSlot();
@@ -7461,7 +7525,96 @@ unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode)
return baseVarNum;
}
+//---------------------------------------------------------------------
+// genAlignStackBeforeCall: Align the stack if necessary before a call.
+//
+// Arguments:
+// putArgStk - the putArgStk node.
+//
+void CodeGen::genAlignStackBeforeCall(GenTreePutArgStk* putArgStk)
+{
+#if defined(UNIX_X86_ABI)
+
+ genAlignStackBeforeCall(putArgStk->gtCall);
+
+#endif // UNIX_X86_ABI
+}
+
+//---------------------------------------------------------------------
+// genAlignStackBeforeCall: Align the stack if necessary before a call.
+//
+// Arguments:
+// call - the call node.
+//
+void CodeGen::genAlignStackBeforeCall(GenTreeCall* call)
+{
+#if defined(UNIX_X86_ABI)
+
+ // Have we aligned the stack yet?
+ if (!call->fgArgInfo->IsStkAlignmentDone())
+ {
+ // We haven't done any stack alignment yet for this call. We might need to create
+ // an alignment adjustment, even if this function itself doesn't have any stack args.
+ // This can happen if this function call is part of a nested call sequence, and the outer
+ // call has already pushed some arguments.
+
+ unsigned stkLevel = genStackLevel + call->fgArgInfo->GetStkSizeBytes();
+ call->fgArgInfo->ComputeStackAlignment(stkLevel);
+
+ unsigned padStkAlign = call->fgArgInfo->GetStkAlign();
+ if (padStkAlign != 0)
+ {
+ // Now generate the alignment
+ inst_RV_IV(INS_sub, REG_SPBASE, padStkAlign, EA_PTRSIZE);
+ AddStackLevel(padStkAlign);
+ AddNestedAlignment(padStkAlign);
+ }
+
+ call->fgArgInfo->SetStkAlignmentDone();
+ }
+
+#endif // UNIX_X86_ABI
+}
+
+//---------------------------------------------------------------------
+// genRemoveAlignmentAfterCall: After a call, remove the alignment
+// added before the call, if any.
+//
+// Arguments:
+// call - the call node.
+// bias - additional stack adjustment
+//
+// Note:
+// When bias > 0, caller should adjust stack level appropriately as
+// bias is not considered when adjusting stack level.
+//
+void CodeGen::genRemoveAlignmentAfterCall(GenTreeCall* call, unsigned bias)
+{
+#if defined(_TARGET_X86_)
+#if defined(UNIX_X86_ABI)
+ // Put back the stack pointer if there was any padding for stack alignment
+ unsigned padStkAlign = call->fgArgInfo->GetStkAlign();
+ unsigned padStkAdjust = padStkAlign + bias;
+
+ if (padStkAdjust != 0)
+ {
+ inst_RV_IV(INS_add, REG_SPBASE, padStkAdjust, EA_PTRSIZE);
+ SubtractStackLevel(padStkAlign);
+ SubtractNestedAlignment(padStkAlign);
+ }
+#else // UNIX_X86_ABI
+ if (bias != 0)
+ {
+ genAdjustSP(bias);
+ }
+#endif // !UNIX_X86_ABI_
+#else // _TARGET_X86_
+ assert(bias == 0);
+#endif // !_TARGET_X86
+}
+
#ifdef _TARGET_X86_
+
//---------------------------------------------------------------------
// genAdjustStackForPutArgStk:
// adjust the stack pointer for a putArgStk node if necessary.
@@ -7484,7 +7637,7 @@ bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk)
{
const unsigned argSize = genTypeSize(putArgStk);
inst_RV_IV(INS_sub, REG_SPBASE, argSize, EA_PTRSIZE);
- genStackLevel += argSize;
+ AddStackLevel(argSize);
m_pushStkArg = false;
return true;
}
@@ -7528,7 +7681,7 @@ bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk)
{
m_pushStkArg = false;
inst_RV_IV(INS_sub, REG_SPBASE, argSize, EA_PTRSIZE);
- genStackLevel += argSize;
+ AddStackLevel(argSize);
return true;
}
}
@@ -7616,7 +7769,7 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
{
inst_IV(INS_push, 0);
currentOffset -= pushSize;
- genStackLevel += pushSize;
+ AddStackLevel(pushSize);
adjustment -= pushSize;
}
m_pushStkArg = true;
@@ -7638,7 +7791,7 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
// Adjust the stack pointer to the next slot boundary.
inst_RV_IV(INS_sub, REG_SPBASE, adjustment, EA_PTRSIZE);
currentOffset -= adjustment;
- genStackLevel += adjustment;
+ AddStackLevel(adjustment);
}
// Does it need to be in a byte register?
@@ -7691,7 +7844,7 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
}
}
currentOffset -= TARGET_POINTER_SIZE;
- genStackLevel += TARGET_POINTER_SIZE;
+ AddStackLevel(TARGET_POINTER_SIZE);
}
else
{
@@ -7713,14 +7866,14 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
}
else
{
-#if defined(_TARGET_X86_) && defined(FEATURE_SIMD)
+#if defined(FEATURE_SIMD)
if (fieldType == TYP_SIMD12)
{
assert(genIsValidFloatReg(simdTmpReg));
genStoreSIMD12ToStack(argReg, simdTmpReg);
}
else
-#endif // defined(_TARGET_X86_) && defined(FEATURE_SIMD)
+#endif // defined(FEATURE_SIMD)
{
genStoreRegToStackArg(fieldType, argReg, fieldOffset - currentOffset);
}
@@ -7737,7 +7890,7 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
{
// We don't expect padding at the beginning of a struct, but it could happen with explicit layout.
inst_RV_IV(INS_sub, REG_SPBASE, currentOffset, EA_PTRSIZE);
- genStackLevel += currentOffset;
+ AddStackLevel(currentOffset);
}
}
#endif // _TARGET_X86_
@@ -7758,15 +7911,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
#ifdef _TARGET_X86_
-#if defined(UNIX_X86_ABI)
- // For each call, first stack argument has the padding for alignment
- // if this value is not zero, use it to adjust the ESP
- unsigned argPadding = putArgStk->getArgPadding();
- if (argPadding != 0)
- {
- inst_RV_IV(INS_sub, REG_SPBASE, argPadding * TARGET_POINTER_SIZE, EA_PTRSIZE);
- }
-#endif
+ genAlignStackBeforeCall(putArgStk);
if (varTypeIsStruct(targetType))
{
@@ -7797,7 +7942,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
{
inst_IV(INS_push, data->gtIntCon.gtIconVal);
}
- genStackLevel += argSize;
+ AddStackLevel(argSize);
}
else if (data->OperGet() == GT_FIELD_LIST)
{
@@ -7896,7 +8041,7 @@ void CodeGen::genPushReg(var_types type, regNumber srcReg)
inst_RV_IV(INS_sub, REG_SPBASE, size, EA_PTRSIZE);
getEmitter()->emitIns_AR_R(ins, attr, srcReg, REG_SPBASE, 0);
}
- genStackLevel += size;
+ AddStackLevel(size);
}
#endif // _TARGET_X86_
@@ -8094,7 +8239,7 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
{
getEmitter()->emitIns_S(INS_push, slotAttr, srcLclNum, srcLclOffset + offset);
}
- genStackLevel += TARGET_POINTER_SIZE;
+ AddStackLevel(TARGET_POINTER_SIZE);
}
#else // !defined(_TARGET_X86_)
@@ -8354,12 +8499,14 @@ void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize
// Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
+ // We keep the call count for the second call to gcMakeRegPtrTable() below.
+ unsigned callCnt = 0;
// First we figure out the encoder ID's for the stack slots and registers.
- gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS);
+ gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS, &callCnt);
// Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
gcInfoEncoder->FinalizeSlotIds();
// Now we can actually use those slot ID's to declare live ranges.
- gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
+ gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK, &callCnt);
if (compiler->opts.compDbgEnC)
{
@@ -8466,14 +8613,22 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
}
}
- getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize,
- retSize FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(EA_UNKNOWN), gcInfo.gcVarPtrSetCur,
- gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+ // clang-format off
+ getEmitter()->emitIns_Call(callType,
+ compiler->eeFindHelper(helper),
+ INDEBUG_LDISASM_COMMA(nullptr) addr,
+ argSize,
+ retSize
+ MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(EA_UNKNOWN),
+ gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur,
BAD_IL_OFFSET, // IL offset
callTarget, // ireg
REG_NA, 0, 0, // xreg, xmul, disp
false, // isJump
emitter::emitNoGChelper(helper));
+ // clang-format on
regTracker.rsTrashRegSet(killMask);
regTracker.rsTrashRegsForGCInterruptability();
diff --git a/src/jit/compatjit/CMakeLists.txt b/src/jit/compatjit/CMakeLists.txt
deleted file mode 100644
index 1e0615e431..0000000000
--- a/src/jit/compatjit/CMakeLists.txt
+++ /dev/null
@@ -1,66 +0,0 @@
-project(compatjit)
-
-# This compatjit.dll is only built if we are not building JIT32 as compatjit.dll.
-# It is the same build as legacyjit.dll, just with a different name, and not
-# built as an altjit.
-
-add_definitions(-DLEGACY_BACKEND)
-
-add_definitions(-DFEATURE_NO_HOST)
-add_definitions(-DSELF_NO_HOST)
-add_definitions(-DFEATURE_READYTORUN_COMPILER)
-remove_definitions(-DFEATURE_MERGE_JIT_AND_ENGINE)
-
-# No SIMD in legacy back-end.
-remove_definitions(-DFEATURE_SIMD)
-remove_definitions(-DFEATURE_AVX_SUPPORT)
-
-if(WIN32)
- add_definitions(-DFX_VER_INTERNALNAME_STR=compatjit.dll)
-endif(WIN32)
-
-add_library_clr(compatjit
- SHARED
- ${SHARED_LIB_SOURCES}
-)
-
-add_dependencies(compatjit jit_exports)
-
-set_property(TARGET compatjit APPEND_STRING PROPERTY LINK_FLAGS ${JIT_EXPORTS_LINKER_OPTION})
-set_property(TARGET compatjit APPEND_STRING PROPERTY LINK_DEPENDS ${JIT_EXPORTS_FILE})
-
-set(RYUJIT_LINK_LIBRARIES
- utilcodestaticnohost
- gcinfo
-)
-
-if(CLR_CMAKE_PLATFORM_UNIX)
- list(APPEND RYUJIT_LINK_LIBRARIES
- mscorrc_debug
- coreclrpal
- palrt
- )
-else()
- list(APPEND RYUJIT_LINK_LIBRARIES
- ${STATIC_MT_CRT_LIB}
- ${STATIC_MT_VCRT_LIB}
- kernel32.lib
- advapi32.lib
- ole32.lib
- oleaut32.lib
- uuid.lib
- user32.lib
- version.lib
- shlwapi.lib
- bcrypt.lib
- crypt32.lib
- RuntimeObject.lib
- )
-endif(CLR_CMAKE_PLATFORM_UNIX)
-
-target_link_libraries(compatjit
- ${RYUJIT_LINK_LIBRARIES}
-)
-
-# add the install targets
-install_clr(compatjit)
diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp
index 01c7f8d6a7..14b2abafb6 100644
--- a/src/jit/compiler.cpp
+++ b/src/jit/compiler.cpp
@@ -73,9 +73,9 @@ inline bool _our_GetThreadCycles(unsigned __int64* cycleOut)
inline bool _our_GetThreadCycles(unsigned __int64* cycleOut)
{
- uint64_t cycles;
- asm volatile("rdtsc" : "=A"(cycles));
- *cycleOut = cycles;
+ uint32_t hi, lo;
+ __asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
+ *cycleOut = (static_cast<unsigned __int64>(hi) << 32) | static_cast<unsigned __int64>(lo);
return true;
}
@@ -2296,7 +2296,6 @@ void Compiler::compSetProcessor()
#ifdef FEATURE_AVX_SUPPORT
// COMPlus_EnableAVX can be used to disable using AVX if available on a target machine.
- // Note that FEATURE_AVX_SUPPORT is not enabled for ctpjit
opts.compCanUseAVX = false;
if (!jitFlags.IsSet(JitFlags::JIT_FLAG_PREJIT) && jitFlags.IsSet(JitFlags::JIT_FLAG_USE_AVX2))
{
@@ -2471,7 +2470,8 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
opts.jitFlags = jitFlags;
opts.compFlags = CLFLG_MAXOPT; // Default value is for full optimization
- if (jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_CODE) || jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT))
+ if (jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_CODE) || jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT) ||
+ jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0))
{
opts.compFlags = CLFLG_MINOPT;
}
@@ -2496,7 +2496,8 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
//
// If the EE sets SPEED_OPT we will optimize for speed at the expense of code size
//
- else if (jitFlags->IsSet(JitFlags::JIT_FLAG_SPEED_OPT))
+ else if (jitFlags->IsSet(JitFlags::JIT_FLAG_SPEED_OPT) ||
+ (jitFlags->IsSet(JitFlags::JIT_FLAG_TIER1) && !jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT)))
{
opts.compCodeOpt = FAST_CODE;
assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_SIZE_OPT));
@@ -3031,13 +3032,31 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
setUsesSIMDTypes(false);
#endif // FEATURE_SIMD
- if (compIsForInlining() || compIsForImportOnly())
+ if (compIsForImportOnly())
{
return;
}
+
+#if FEATURE_TAILCALL_OPT
+ // By default opportunistic tail call optimization is enabled.
+ // Recognition is done in the importer so this must be set for
+ // inlinees as well.
+ opts.compTailCallOpt = true;
+#endif // FEATURE_TAILCALL_OPT
+
+ if (compIsForInlining())
+ {
+ return;
+ }
+
// The rest of the opts fields that we initialize here
// should only be used when we generate code for the method
// They should not be used when importing or inlining
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_TAILCALL_OPT
+ opts.compTailCallLoopOpt = true;
+#endif // FEATURE_TAILCALL_OPT
opts.genFPorder = true;
opts.genFPopt = true;
@@ -3045,12 +3064,6 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
opts.instrCount = 0;
opts.lvRefCount = 0;
-#if FEATURE_TAILCALL_OPT
- // By default opportunistic tail call optimization is enabled
- opts.compTailCallOpt = true;
- opts.compTailCallLoopOpt = true;
-#endif
-
#ifdef PROFILING_SUPPORTED
opts.compJitELTHookEnabled = false;
#endif // PROFILING_SUPPORTED
@@ -3308,11 +3321,9 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
info.compMethodInfo->args.pSig);
#endif
-//-------------------------------------------------------------------------
+ //-------------------------------------------------------------------------
-#if RELOC_SUPPORT
opts.compReloc = jitFlags->IsSet(JitFlags::JIT_FLAG_RELOC);
-#endif
#ifdef DEBUG
#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
@@ -4444,7 +4455,7 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags
bool doRangeAnalysis = true;
int iterations = 1;
-#ifdef DEBUG
+#if defined(OPT_CONFIG)
doSsa = (JitConfig.JitDoSsa() != 0);
doEarlyProp = doSsa && (JitConfig.JitDoEarlyProp() != 0);
doValueNum = doSsa && (JitConfig.JitDoValueNumber() != 0);
@@ -4457,7 +4468,7 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags
{
iterations = JitConfig.JitOptRepeatCount();
}
-#endif
+#endif // defined(OPT_CONFIG)
while (iterations > 0)
{
@@ -4978,17 +4989,13 @@ int Compiler::compCompile(CORINFO_METHOD_HANDLE methodHnd,
// with an ARM-targeting "altjit").
info.compMatchedVM = IMAGE_FILE_MACHINE_TARGET == info.compCompHnd->getExpectedTargetArchitecture();
-#if defined(ALT_JIT) && defined(UNIX_AMD64_ABI)
- // ToDo: This code is to allow us to run UNIX codegen on Windows for now. Remove when appropriate.
- // Make sure that the generated UNIX altjit code is skipped on Windows. The static jit codegen is used to run.
+#if (defined(_TARGET_UNIX_) && !defined(_HOST_UNIX_)) || (!defined(_TARGET_UNIX_) && defined(_HOST_UNIX_))
+ // The host and target platforms don't match. This info isn't handled by the existing
+ // getExpectedTargetArchitecture() JIT-EE interface method.
info.compMatchedVM = false;
-#endif // UNIX_AMD64_ABI
+#endif
-#if COR_JIT_EE_VERSION > 460
compMaxUncheckedOffsetForNullObject = eeGetEEInfo()->maxUncheckedOffsetForNullObject;
-#else // COR_JIT_EE_VERSION <= 460
- compMaxUncheckedOffsetForNullObject = MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT;
-#endif // COR_JIT_EE_VERSION > 460
// Set the context for token lookup.
if (compIsForInlining())
@@ -5406,7 +5413,7 @@ void Compiler::compCompileFinish()
{
if (compJitHaltMethod())
{
-#if !defined(_TARGET_ARM64_) && !defined(PLATFORM_UNIX)
+#if !defined(_TARGET_ARM64_) && !defined(_HOST_UNIX_)
// TODO-ARM64-NYI: re-enable this when we have an OS that supports a pop-up dialog
// Don't do an assert, but just put up the dialog box so we get just-in-time debugger
@@ -5651,12 +5658,6 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
info.compCallUnmanaged = 0;
info.compLvFrameListRoot = BAD_VAR_NUM;
-#if FEATURE_FIXED_OUT_ARGS
- lvaOutgoingArgSpaceSize = 0;
-#endif
-
- lvaGenericsContextUsed = false;
-
info.compInitMem = ((methodInfo->options & CORINFO_OPT_INIT_LOCALS) != 0);
/* Allocate the local variable table */
@@ -7206,29 +7207,34 @@ double JitTimer::s_cyclesPerSec = CycleTimer::CyclesPerSecond();
#if defined(FEATURE_JIT_METHOD_PERF) || DUMP_FLOWGRAPHS || defined(FEATURE_TRACELOGGING)
const char* PhaseNames[] = {
-#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) string_nm,
+#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent, measureIR) string_nm,
#include "compphases.h"
};
const char* PhaseEnums[] = {
-#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) #enum_nm,
+#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent, measureIR) #enum_nm,
#include "compphases.h"
};
const LPCWSTR PhaseShortNames[] = {
-#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) W(short_nm),
+#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent, measureIR) W(short_nm),
#include "compphases.h"
};
#endif // defined(FEATURE_JIT_METHOD_PERF) || DUMP_FLOWGRAPHS
#ifdef FEATURE_JIT_METHOD_PERF
bool PhaseHasChildren[] = {
-#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) hasChildren,
+#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent, measureIR) hasChildren,
#include "compphases.h"
};
int PhaseParent[] = {
-#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) parent,
+#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent, measureIR) parent,
+#include "compphases.h"
+};
+
+bool PhaseReportsIRSize[] = {
+#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent, measureIR) measureIR,
#include "compphases.h"
};
@@ -7636,7 +7642,7 @@ JitTimer::JitTimer(unsigned byteCodeSize) : m_info(byteCodeSize)
}
}
-void JitTimer::EndPhase(Phases phase)
+void JitTimer::EndPhase(Compiler* compiler, Phases phase)
{
// Otherwise...
// We re-run some phases currently, so this following assert doesn't work.
@@ -7687,6 +7693,15 @@ void JitTimer::EndPhase(Phases phase)
m_curPhaseStart = threadCurCycles;
}
}
+
+ if ((JitConfig.JitMeasureIR() != 0) && PhaseReportsIRSize[phase])
+ {
+ m_info.m_nodeCountAfterPhase[phase] = compiler->fgMeasureIR();
+ }
+ else
+ {
+ m_info.m_nodeCountAfterPhase[phase] = 0;
+ }
}
#ifdef DEBUG
@@ -7795,6 +7810,9 @@ void JitTimer::PrintCsvHeader()
FILE* fp = _wfopen(jitTimeLogCsv, W("a"));
if (fp != nullptr)
{
+ // Seek to the end of the file s.t. `ftell` doesn't lie to us on Windows
+ fseek(fp, 0, SEEK_END);
+
// Write the header if the file is empty
if (ftell(fp) == 0)
{
@@ -7808,10 +7826,17 @@ void JitTimer::PrintCsvHeader()
for (int i = 0; i < PHASE_NUMBER_OF; i++)
{
fprintf(fp, "\"%s\",", PhaseNames[i]);
+ if (PhaseReportsIRSize[i])
+ {
+ fprintf(fp, "\"Node Count After %s\",", PhaseNames[i]);
+ }
}
InlineStrategy::DumpCsvHeader(fp);
+ fprintf(fp, "\"Executable Code Bytes\",");
+ fprintf(fp, "\"GC Info Bytes\",");
+ fprintf(fp, "\"Total Bytes Allocated\",");
fprintf(fp, "\"Total Cycles\",");
fprintf(fp, "\"CPS\"\n");
}
@@ -7858,10 +7883,18 @@ void JitTimer::PrintCsvMethodStats(Compiler* comp)
totCycles += m_info.m_cyclesByPhase[i];
}
fprintf(fp, "%I64u,", m_info.m_cyclesByPhase[i]);
+
+ if (PhaseReportsIRSize[i])
+ {
+ fprintf(fp, "%u,", m_info.m_nodeCountAfterPhase[i]);
+ }
}
comp->m_inlineStrategy->DumpCsvData(fp);
+ fprintf(fp, "%Iu,", comp->info.compNativeCodeSize);
+ fprintf(fp, "%Iu,", comp->compInfoBlkSize);
+ fprintf(fp, "%Iu,", comp->compGetAllocator()->getTotalBytesAllocated());
fprintf(fp, "%I64u,", m_info.m_totalCycles);
fprintf(fp, "%f\n", CycleTimer::CyclesPerSecond());
fclose(fp);
@@ -8126,11 +8159,12 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
* The versions that start with 'd' use the tlsCompiler, so don't require a Compiler*.
*
* Summary:
- * cBlock, dBlock : Display a basic block (call fgDispBasicBlock()).
+ * cBlock, dBlock : Display a basic block (call fgTableDispBasicBlock()).
* cBlocks, dBlocks : Display all the basic blocks of a function (call fgDispBasicBlocks()).
* cBlocksV, dBlocksV : Display all the basic blocks of a function (call fgDispBasicBlocks(true)).
* "V" means "verbose", and will dump all the trees.
* cTree, dTree : Display a tree (call gtDispTree()).
+ * cTreeLIR, dTreeLIR : Display a tree in LIR form (call gtDispLIRNode()).
* cTrees, dTrees : Display all the trees in a function (call fgDumpTrees()).
* cEH, dEH : Display the EH handler table (call fgDispHandlerTab()).
* cVar, dVar : Display a local variable given its number (call lvaDumpEntry()).
@@ -8200,6 +8234,13 @@ void cTree(Compiler* comp, GenTree* tree)
comp->gtDispTree(tree, nullptr, ">>>");
}
+void cTreeLIR(Compiler* comp, GenTree* tree)
+{
+ static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+ printf("===================================================================== *TreeLIR %u\n", sequenceNumber++);
+ comp->gtDispLIRNode(tree);
+}
+
void cTrees(Compiler* comp)
{
static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
@@ -8314,6 +8355,11 @@ void dTree(GenTree* tree)
cTree(JitTls::GetCompiler(), tree);
}
+void dTreeLIR(GenTree* tree)
+{
+ cTreeLIR(JitTls::GetCompiler(), tree);
+}
+
void dTrees()
{
cTrees(JitTls::GetCompiler());
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index 4239cf613b..9ca0e1a3e1 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -252,8 +252,10 @@ public:
unsigned char lvStackByref : 1; // This is a compiler temporary of TYP_BYREF that is known to point into our local
// stack frame.
- unsigned char lvArgWrite : 1; // variable is a parameter and STARG was used on it
- unsigned char lvIsTemp : 1; // Short-lifetime compiler temp
+ unsigned char lvHasILStoreOp : 1; // there is at least one STLOC or STARG on this local
+ unsigned char lvHasMultipleILStoreOp : 1; // there is more than one STLOC on this local
+
+ unsigned char lvIsTemp : 1; // Short-lifetime compiler temp
#if OPT_BOOL_OPS
unsigned char lvIsBoolean : 1; // set if variable is boolean
#endif
@@ -322,6 +324,12 @@ public:
#endif // FEATURE_SIMD
unsigned char lvRegStruct : 1; // This is a reg-sized non-field-addressed struct.
+ unsigned char lvClassIsExact : 1; // lvClassHandle is the exact type
+
+#ifdef DEBUG
+ unsigned char lvClassInfoUpdated : 1; // true if this var has updated class handle or exactness
+#endif
+
union {
unsigned lvFieldLclStart; // The index of the local var representing the first field in the promoted struct
// local.
@@ -704,6 +712,8 @@ public:
typeInfo lvVerTypeInfo; // type info needed for verification
+ CORINFO_CLASS_HANDLE lvClassHnd; // class handle for the local, or null if not known
+
BYTE* lvGcLayout; // GC layout info for structs
#if ASSERTION_PROP
@@ -917,7 +927,7 @@ struct ArrayInfo
// partition a compilation.
enum Phases
{
-#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) enum_nm,
+#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent, measureIR) enum_nm,
#include "compphases.h"
PHASE_NUMBER_OF
};
@@ -952,6 +962,7 @@ struct CompTimeInfo
static bool PhaseHasChildren[];
static int PhaseParent[];
+ static bool PhaseReportsIRSize[];
unsigned m_byteCodeBytes;
unsigned __int64 m_totalCycles;
@@ -961,6 +972,9 @@ struct CompTimeInfo
unsigned __int64 m_CLRinvokesByPhase[PHASE_NUMBER_OF];
unsigned __int64 m_CLRcyclesByPhase[PHASE_NUMBER_OF];
#endif
+
+ unsigned m_nodeCountAfterPhase[PHASE_NUMBER_OF];
+
// For better documentation, we call EndPhase on
// non-leaf phases. We should also call EndPhase on the
// last leaf subphase; obviously, the elapsed cycles between the EndPhase
@@ -1077,7 +1091,7 @@ public:
static void PrintCsvHeader();
// Ends the current phase (argument is for a redundant check).
- void EndPhase(Phases phase);
+ void EndPhase(Compiler* compiler, Phases phase);
#if MEASURE_CLRAPI_CALLS
// Start and end a timed CLR API call.
@@ -1186,11 +1200,6 @@ struct fgArgTabEntry
unsigned alignment; // 1 or 2 (slots/registers)
unsigned lateArgInx; // index into gtCallLateArgs list
unsigned tmpNum; // the LclVar number if we had to force evaluation of this arg
-#if defined(UNIX_X86_ABI)
- unsigned padStkAlign; // Count of number of padding slots for stack alignment. For each Call, only the first
- // argument may have a value to emit "sub esp, n" to adjust the stack before pushing
- // the argument.
-#endif
bool isSplit : 1; // True when this argument is split between the registers and OutArg area
bool needTmp : 1; // True when we force this argument's evaluation into a temp LclVar
@@ -1263,14 +1272,23 @@ typedef struct fgArgTabEntry* fgArgTabEntryPtr;
class fgArgInfo
{
- Compiler* compiler; // Back pointer to the compiler instance so that we can allocate memory
- GenTreePtr callTree; // Back pointer to the GT_CALL node for this fgArgInfo
- unsigned argCount; // Updatable arg count value
- unsigned nextSlotNum; // Updatable slot count value
- unsigned stkLevel; // Stack depth when we make this call (for x86)
+ Compiler* compiler; // Back pointer to the compiler instance so that we can allocate memory
+ GenTreeCall* callTree; // Back pointer to the GT_CALL node for this fgArgInfo
+ unsigned argCount; // Updatable arg count value
+ unsigned nextSlotNum; // Updatable slot count value
+ unsigned stkLevel; // Stack depth when we make this call (for x86)
+
#if defined(UNIX_X86_ABI)
- unsigned padStkAlign; // Count of number of padding slots for stack alignment. This value is used to turn back
- // stack pointer before it was adjusted after each Call
+ bool alignmentDone; // Updateable flag, set to 'true' after we've done any required alignment.
+ unsigned stkSizeBytes; // Size of stack used by this call, in bytes. Calculated during fgMorphArgs().
+ unsigned padStkAlign; // Stack alignment in bytes required before arguments are pushed for this call.
+ // Computed dynamically during codegen, based on stkSizeBytes and the current
+ // stack level (genStackLevel) when the first stack adjustment is made for
+ // this call.
+#endif
+
+#if FEATURE_FIXED_OUT_ARGS
+ unsigned outArgSize; // Size of the out arg area for the call, will be at least MIN_ARG_AREA_FOR_CALL
#endif
unsigned argTableSize; // size of argTable array (equal to the argCount when done with fgMorphArgs)
@@ -1284,8 +1302,8 @@ private:
void AddArg(fgArgTabEntryPtr curArgTabEntry);
public:
- fgArgInfo(Compiler* comp, GenTreePtr call, unsigned argCount);
- fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall);
+ fgArgInfo(Compiler* comp, GenTreeCall* call, unsigned argCount);
+ fgArgInfo(GenTreeCall* newCall, GenTreeCall* oldCall);
fgArgTabEntryPtr AddRegArg(
unsigned argNum, GenTreePtr node, GenTreePtr parent, regNumber regNum, unsigned numRegs, unsigned alignment);
@@ -1321,10 +1339,6 @@ public:
void ArgsComplete();
-#if defined(UNIX_X86_ABI)
- void ArgsAlignPadding();
-#endif
-
void SortArgs();
void EvalArgsToTemps();
@@ -1344,12 +1358,6 @@ public:
{
return nextSlotNum;
}
-#if defined(UNIX_X86_ABI)
- unsigned GetPadStackAlign()
- {
- return padStkAlign;
- }
-#endif
bool HasRegArgs()
{
return hasRegArgs;
@@ -1362,6 +1370,49 @@ public:
{
return argsComplete;
}
+#if FEATURE_FIXED_OUT_ARGS
+ unsigned GetOutArgSize() const
+ {
+ return outArgSize;
+ }
+ void SetOutArgSize(unsigned newVal)
+ {
+ outArgSize = newVal;
+ }
+#endif // FEATURE_FIXED_OUT_ARGS
+
+ void ComputeStackAlignment(unsigned curStackLevelInBytes)
+ {
+#if defined(UNIX_X86_ABI)
+ padStkAlign = AlignmentPad(curStackLevelInBytes, STACK_ALIGN);
+#endif // defined(UNIX_X86_ABI)
+ }
+
+ void SetStkSizeBytes(unsigned newStkSizeBytes)
+ {
+#if defined(UNIX_X86_ABI)
+ stkSizeBytes = newStkSizeBytes;
+#endif // defined(UNIX_X86_ABI)
+ }
+
+#if defined(UNIX_X86_ABI)
+ unsigned GetStkAlign()
+ {
+ return padStkAlign;
+ }
+ unsigned GetStkSizeBytes() const
+ {
+ return stkSizeBytes;
+ }
+ bool IsStkAlignmentDone() const
+ {
+ return alignmentDone;
+ }
+ void SetStkAlignmentDone()
+ {
+ alignmentDone = true;
+ }
+#endif // defined(UNIX_X86_ABI)
// Get the late arg for arg at position argIndex. Caller must ensure this position has a late arg.
GenTreePtr GetLateArg(unsigned argIndex);
@@ -2021,9 +2072,9 @@ public:
GenTreeArgList* gtNewArgList(GenTreePtr op1, GenTreePtr op2);
GenTreeArgList* gtNewArgList(GenTreePtr op1, GenTreePtr op2, GenTreePtr op3);
- static fgArgTabEntryPtr gtArgEntryByArgNum(GenTreePtr call, unsigned argNum);
- static fgArgTabEntryPtr gtArgEntryByNode(GenTreePtr call, GenTreePtr node);
- fgArgTabEntryPtr gtArgEntryByLateArgIndex(GenTreePtr call, unsigned lateArgInx);
+ static fgArgTabEntryPtr gtArgEntryByArgNum(GenTreeCall* call, unsigned argNum);
+ static fgArgTabEntryPtr gtArgEntryByNode(GenTreeCall* call, GenTreePtr node);
+ fgArgTabEntryPtr gtArgEntryByLateArgIndex(GenTreeCall* call, unsigned lateArgInx);
bool gtArgIsThisPtr(fgArgTabEntryPtr argEntry);
GenTreePtr gtNewAssignNode(GenTreePtr dst, GenTreePtr src);
@@ -2129,7 +2180,7 @@ public:
unsigned flags = GTF_SIDE_EFFECT,
bool ignoreRoot = false);
- GenTreePtr gtGetThisArg(GenTreePtr call);
+ GenTreePtr gtGetThisArg(GenTreeCall* call);
// Static fields of struct types (and sometimes the types that those are reduced to) are represented by having the
// static field contain an object pointer to the boxed struct. This simplifies the GC implementation...but
@@ -2138,9 +2189,10 @@ public:
bool gtIsStaticFieldPtrToBoxedStruct(var_types fieldNodeType, CORINFO_FIELD_HANDLE fldHnd);
// Return true if call is a recursive call; return false otherwise.
+ // Note when inlining, this looks for calls back to the root method.
bool gtIsRecursiveCall(GenTreeCall* call)
{
- return (call->gtCallMethHnd == info.compMethodHnd);
+ return (call->gtCallMethHnd == impInlineRoot()->info.compMethodHnd);
}
//-------------------------------------------------------------------------
@@ -2166,6 +2218,8 @@ public:
CORINFO_CLASS_HANDLE gtGetStructHandleIfPresent(GenTreePtr tree);
// Get the handle, and assert if not found.
CORINFO_CLASS_HANDLE gtGetStructHandle(GenTreePtr tree);
+ // Get the handle for a ref type.
+ CORINFO_CLASS_HANDLE gtGetClassHandle(GenTreePtr tree, bool* isExact, bool* isNonNull);
//-------------------------------------------------------------------------
// Functions to display the trees
@@ -2204,16 +2258,16 @@ public:
char* gtGetLclVarName(unsigned lclNum);
void gtDispLclVar(unsigned varNum, bool padForBiggestDisp = true);
void gtDispTreeList(GenTreePtr tree, IndentStack* indentStack = nullptr);
- void gtGetArgMsg(GenTreePtr call, GenTreePtr arg, unsigned argNum, int listCount, char* bufp, unsigned bufLength);
- void gtGetLateArgMsg(GenTreePtr call, GenTreePtr arg, int argNum, int listCount, char* bufp, unsigned bufLength);
- void gtDispArgList(GenTreePtr tree, IndentStack* indentStack);
+ void gtGetArgMsg(GenTreeCall* call, GenTreePtr arg, unsigned argNum, int listCount, char* bufp, unsigned bufLength);
+ void gtGetLateArgMsg(GenTreeCall* call, GenTreePtr arg, int argNum, int listCount, char* bufp, unsigned bufLength);
+ void gtDispArgList(GenTreeCall* call, IndentStack* indentStack);
void gtDispFieldSeq(FieldSeqNode* pfsn);
void gtDispRange(LIR::ReadOnlyRange const& range);
void gtDispTreeRange(LIR::Range& containingRange, GenTree* tree);
- void gtDispLIRNode(GenTree* node);
+ void gtDispLIRNode(GenTree* node, const char* prefixMsg = nullptr);
#endif
// For tree walks
@@ -2399,9 +2453,9 @@ public:
// in case there are multiple BBJ_RETURN blocks in the inlinee.
#if FEATURE_FIXED_OUT_ARGS
- unsigned lvaOutgoingArgSpaceVar; // dummy TYP_LCLBLK var for fixed outgoing argument space
- unsigned lvaOutgoingArgSpaceSize; // size of fixed outgoing argument space
-#endif // FEATURE_FIXED_OUT_ARGS
+ unsigned lvaOutgoingArgSpaceVar; // dummy TYP_LCLBLK var for fixed outgoing argument space
+ PhasedVar<unsigned> lvaOutgoingArgSpaceSize; // size of fixed outgoing argument space
+#endif // FEATURE_FIXED_OUT_ARGS
#ifdef _TARGET_ARM_
// On architectures whose ABIs allow structs to be passed in registers, struct promotion will sometimes
@@ -2417,7 +2471,7 @@ public:
unsigned lvaCallEspCheck; // confirms ESP not corrupted after a call
#endif
- bool lvaGenericsContextUsed;
+ unsigned lvaGenericsContextUseCount;
bool lvaKeepAliveAndReportThis(); // Synchronized instance method of a reference type, or
// CORINFO_GENERICS_CTXT_FROM_THIS?
@@ -2564,7 +2618,7 @@ public:
void lvaMarkLocalVars(); // Local variable ref-counting
- void lvaAllocOutgoingArgSpace(); // 'Commit' lvaOutgoingArgSpaceSize and lvaOutgoingArgSpaceVar
+ void lvaAllocOutgoingArgSpaceVar(); // Set up lvaOutgoingArgSpaceVar
VARSET_VALRET_TP lvaStmtLclMask(GenTreePtr stmt);
@@ -2624,11 +2678,16 @@ public:
// Returns true if this local var is a multireg struct
bool lvaIsMultiregStruct(LclVarDsc* varDsc);
- // If the class is a TYP_STRUCT, get/set a class handle describing it
-
+ // If the local is a TYP_STRUCT, get/set a class handle describing it
CORINFO_CLASS_HANDLE lvaGetStruct(unsigned varNum);
void lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, bool unsafeValueClsCheck, bool setTypeInfo = true);
+ // If the local is TYP_REF, set or update the associated class information.
+ void lvaSetClass(unsigned varNum, CORINFO_CLASS_HANDLE clsHnd, bool isExact = false);
+ void lvaSetClass(unsigned varNum, GenTreePtr tree, CORINFO_CLASS_HANDLE stackHandle = nullptr);
+ void lvaUpdateClass(unsigned varNum, CORINFO_CLASS_HANDLE clsHnd, bool isExact = false);
+ void lvaUpdateClass(unsigned varNum, GenTreePtr tree, CORINFO_CLASS_HANDLE stackHandle = nullptr);
+
#define MAX_NumOfFieldsInPromotableStruct 4 // Maximum number of fields in promotable struct
// Info about struct fields
@@ -2664,6 +2723,7 @@ public:
lvaStructPromotionInfo* StructPromotionInfo,
bool sortFields);
void lvaCanPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo* StructPromotionInfo);
+ bool lvaShouldPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo* structPromotionInfo);
void lvaPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo* StructPromotionInfo);
#if !defined(_TARGET_64BIT_)
void lvaPromoteLongVars();
@@ -2749,6 +2809,9 @@ protected:
static fgWalkPreFn lvaMarkLclRefsCallback;
void lvaMarkLclRefs(GenTreePtr tree);
+ bool IsDominatedByExceptionalEntry(BasicBlock* block);
+ void SetVolatileHint(LclVarDsc* varDsc);
+
// Keeps the mapping from SSA #'s to VN's for the implicit memory variables.
PerSsaArray lvMemoryPerSsaData;
unsigned lvMemoryNumSsaNames;
@@ -2820,6 +2883,7 @@ protected:
StackEntry impPopStack(CORINFO_CLASS_HANDLE& structTypeRet);
GenTreePtr impPopStack(typeInfo& ti);
StackEntry& impStackTop(unsigned n = 0);
+ unsigned impStackHeight();
void impSaveStackState(SavedStack* savePtr, bool copy);
void impRestoreStackState(SavedStack* savePtr);
@@ -2835,18 +2899,14 @@ protected:
bool impCanPInvokeInline();
bool impCanPInvokeInlineCallSite(BasicBlock* block);
void impCheckForPInvokeCall(
- GenTreePtr call, CORINFO_METHOD_HANDLE methHnd, CORINFO_SIG_INFO* sig, unsigned mflags, BasicBlock* block);
- GenTreePtr impImportIndirectCall(CORINFO_SIG_INFO* sig, IL_OFFSETX ilOffset = BAD_IL_OFFSET);
+ GenTreeCall* call, CORINFO_METHOD_HANDLE methHnd, CORINFO_SIG_INFO* sig, unsigned mflags, BasicBlock* block);
+ GenTreeCall* impImportIndirectCall(CORINFO_SIG_INFO* sig, IL_OFFSETX ilOffset = BAD_IL_OFFSET);
void impPopArgsForUnmanagedCall(GenTreePtr call, CORINFO_SIG_INFO* sig);
void impInsertHelperCall(CORINFO_HELPER_DESC* helperCall);
void impHandleAccessAllowed(CorInfoIsAccessAllowedResult result, CORINFO_HELPER_DESC* helperCall);
void impHandleAccessAllowedInternal(CorInfoIsAccessAllowedResult result, CORINFO_HELPER_DESC* helperCall);
- void impInsertCalloutForDelegate(CORINFO_METHOD_HANDLE callerMethodHnd,
- CORINFO_METHOD_HANDLE calleeMethodHnd,
- CORINFO_CLASS_HANDLE delegateTypeHnd);
-
var_types impImportCall(OPCODE opcode,
CORINFO_RESOLVED_TOKEN* pResolvedToken,
CORINFO_RESOLVED_TOKEN* pConstrainedResolvedToken, // Is this a "constrained." call on a
@@ -2856,9 +2916,14 @@ protected:
CORINFO_CALL_INFO* callInfo,
IL_OFFSET rawILOffset);
+ void impDevirtualizeCall(GenTreeCall* call,
+ GenTreePtr obj,
+ CORINFO_CALL_INFO* callInfo,
+ CORINFO_CONTEXT_HANDLE* exactContextHnd);
+
bool impMethodInfo_hasRetBuffArg(CORINFO_METHOD_INFO* methInfo);
- GenTreePtr impFixupCallStructReturn(GenTreePtr call, CORINFO_CLASS_HANDLE retClsHnd);
+ GenTreePtr impFixupCallStructReturn(GenTreeCall* call, CORINFO_CLASS_HANDLE retClsHnd);
GenTreePtr impFixupStructReturnType(GenTreePtr op, CORINFO_CLASS_HANDLE retClsHnd);
@@ -2995,11 +3060,11 @@ public:
GenTreePtr impReadyToRunLookupToTree(CORINFO_CONST_LOOKUP* pLookup, unsigned flags, void* compileTimeHandle);
- GenTreePtr impReadyToRunHelperToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken,
- CorInfoHelpFunc helper,
- var_types type,
- GenTreeArgList* arg = nullptr,
- CORINFO_LOOKUP_KIND* pGenericLookupKind = nullptr);
+ GenTreeCall* impReadyToRunHelperToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+ CorInfoHelpFunc helper,
+ var_types type,
+ GenTreeArgList* arg = nullptr,
+ CORINFO_LOOKUP_KIND* pGenericLookupKind = nullptr);
GenTreePtr impCastClassOrIsInstToTree(GenTreePtr op1,
GenTreePtr op2,
@@ -3072,6 +3137,11 @@ private:
//---------------- Spilling the importer stack ----------------------------
+ // The maximum number of bytes of IL processed without clean stack state.
+ // It allows to limit the maximum tree size and depth.
+ static const unsigned MAX_TREE_SIZE = 200;
+ bool impCanSpillNow(OPCODE prevOpcode);
+
struct PendingDsc
{
PendingDsc* pdNext;
@@ -3303,7 +3373,10 @@ private:
GenTreePtr variableBeingDereferenced,
InlArgInfo* inlArgInfo);
- void impMarkInlineCandidate(GenTreePtr call, CORINFO_CONTEXT_HANDLE exactContextHnd, CORINFO_CALL_INFO* callInfo);
+ void impMarkInlineCandidate(GenTreePtr call,
+ CORINFO_CONTEXT_HANDLE exactContextHnd,
+ bool exactContextNeedsRuntimeLookup,
+ CORINFO_CALL_INFO* callInfo);
bool impTailCallRetTypeCompatible(var_types callerRetType,
CORINFO_CLASS_HANDLE callerRetTypeClass,
@@ -3320,6 +3393,8 @@ private:
bool impIsImplicitTailCallCandidate(
OPCODE curOpcode, const BYTE* codeAddrOfNextOpcode, const BYTE* codeEnd, int prefixFlags, bool isRecursive);
+ CORINFO_RESOLVED_TOKEN* impAllocateToken(CORINFO_RESOLVED_TOKEN token);
+
/*
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
@@ -3470,6 +3545,8 @@ public:
void fgInsertBBafter(BasicBlock* insertAfterBlk, BasicBlock* newBlk);
void fgUnlinkBlock(BasicBlock* block);
+ unsigned fgMeasureIR();
+
#if OPT_BOOL_OPS // Used to detect multiple logical "not" assignments.
bool fgMultipleNots;
#endif
@@ -3517,7 +3594,7 @@ public:
bool fgSlopUsedInEdgeWeights; // true if their was some slop used when computing the edge weights
bool fgRangeUsedInEdgeWeights; // true if some of the edgeWeight are expressed in Min..Max form
bool fgNeedsUpdateFlowGraph; // true if we need to run fgUpdateFlowGraph
- BasicBlock::weight_t fgCalledWeight; // count of the number of times this method was called
+ BasicBlock::weight_t fgCalledCount; // count of the number of times this method was called
// This is derived from the profile data
// or is BB_UNITY_WEIGHT when we don't have profile data
@@ -3555,15 +3632,21 @@ public:
void fgRemoveEmptyFinally();
+ void fgMergeFinallyChains();
+
void fgCloneFinally();
void fgCleanupContinuation(BasicBlock* continuation);
void fgUpdateFinallyTargetFlags();
+ bool fgRetargetBranchesToCanonicalCallFinally(BasicBlock* block,
+ BasicBlock* handler,
+ BlockToBlockMap& continuationMap);
+
GenTreePtr fgGetCritSectOfStaticMethod();
-#if !defined(_TARGET_X86_)
+#if FEATURE_EH_FUNCLETS
void fgAddSyncMethodEnterExit();
@@ -3571,7 +3654,7 @@ public:
void fgConvertSyncReturnToLeave(BasicBlock* block);
-#endif // !_TARGET_X86_
+#endif // FEATURE_EH_FUNCLETS
void fgAddReversePInvokeEnterExit();
@@ -3627,9 +3710,9 @@ public:
GenTreePtr fgInitThisClass();
- GenTreePtr fgGetStaticsCCtorHelper(CORINFO_CLASS_HANDLE cls, CorInfoHelpFunc helper);
+ GenTreeCall* fgGetStaticsCCtorHelper(CORINFO_CLASS_HANDLE cls, CorInfoHelpFunc helper);
- GenTreePtr fgGetSharedCCtor(CORINFO_CLASS_HANDLE cls);
+ GenTreeCall* fgGetSharedCCtor(CORINFO_CLASS_HANDLE cls);
void fgLocalVarLiveness();
@@ -4010,6 +4093,8 @@ protected:
// Based on: A Simple, Fast Dominance Algorithm
// by Keith D. Cooper, Timothy J. Harvey, and Ken Kennedy
+ void fgCompDominatedByExceptionalEntryBlocks();
+
BlockSet_ValRet_T fgGetDominatorSet(BasicBlock* block); // Returns a set of blocks that dominate the given block.
// Note: this is relatively slow compared to calling fgDominate(),
// especially if dealing with a single block versus block check.
@@ -4487,12 +4572,22 @@ protected:
bool fgHaveProfileData();
bool fgGetProfileWeightForBasicBlock(IL_OFFSET offset, unsigned* weight);
+ void fgInstrumentMethod();
+public:
+ // fgIsUsingProfileWeights - returns true if we have real profile data for this method
+ // or if we have some fake profile data for the stress mode
bool fgIsUsingProfileWeights()
{
return (fgHaveProfileData() || fgStressBBProf());
}
- void fgInstrumentMethod();
+
+ // fgProfileRunsCount - returns total number of scenario runs for the profile data
+ // or BB_UNITY_WEIGHT when we aren't using profile data.
+ unsigned fgProfileRunsCount()
+ {
+ return fgIsUsingProfileWeights() ? fgNumProfileRuns : BB_UNITY_WEIGHT;
+ }
//-------- Insert a statement at the start or end of a basic block --------
@@ -4647,7 +4742,9 @@ private:
void fgNoteNonInlineCandidate(GenTreeStmt* stmt, GenTreeCall* call);
static fgWalkPreFn fgFindNonInlineCandidate;
#endif
- GenTreePtr fgOptimizeDelegateConstructor(GenTreePtr call, CORINFO_CONTEXT_HANDLE* ExactContextHnd);
+ GenTreePtr fgOptimizeDelegateConstructor(GenTreeCall* call,
+ CORINFO_CONTEXT_HANDLE* ExactContextHnd,
+ CORINFO_RESOLVED_TOKEN* ldftnToken);
GenTreePtr fgMorphLeaf(GenTreePtr tree);
void fgAssignSetVarDef(GenTreePtr tree);
GenTreePtr fgMorphOneAsgBlockOp(GenTreePtr tree);
@@ -4789,7 +4886,7 @@ private:
static fgWalkPreFn gtHasLocalsWithAddrOpCB;
bool gtCanOptimizeTypeEquality(GenTreePtr tree);
- bool gtIsTypeHandleToRuntimeTypeHelper(GenTreePtr tree);
+ bool gtIsTypeHandleToRuntimeTypeHelper(GenTreeCall* call);
bool gtIsActiveCSE_Candidate(GenTreePtr tree);
#ifdef DEBUG
@@ -5343,7 +5440,6 @@ protected:
// Keeps tracked cse indices
BitVecTraits* cseTraits;
EXPSET_TP cseFull;
- EXPSET_TP cseEmpty;
/* Generic list of nodes - used by the CSE logic */
@@ -5397,6 +5493,14 @@ protected:
CSEdsc** optCSEhash;
CSEdsc** optCSEtab;
+ typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, GenTreePtr, JitSimplerHashBehavior> NodeToNodeMap;
+
+ NodeToNodeMap* optCseArrLenMap; // Maps array length nodes to ancestor compares that should be
+ // re-numbered with the array length to improve range check elimination
+
+ // Given a compare, look for a cse candidate arrlen feeding it and add a map entry if found.
+ void optCseUpdateArrLenMap(GenTreePtr compare);
+
void optCSEstop();
CSEdsc* optCSEfindDsc(unsigned index);
@@ -5504,7 +5608,7 @@ protected:
callInterf ivaMaskCall; // What kind of calls are there?
};
- static callInterf optCallInterf(GenTreePtr call);
+ static callInterf optCallInterf(GenTreeCall* call);
public:
// VN based copy propagation.
@@ -5568,6 +5672,12 @@ public:
optMethodFlags &= ~OMF_HAS_FATPOINTER;
}
+ void addFatPointerCandidate(GenTreeCall* call)
+ {
+ setMethodHasFatPointer();
+ call->SetFatPointerCandidate();
+ }
+
unsigned optMethodFlags;
// Recursion bound controls how far we can go backwards tracking for a SSA value.
@@ -5602,7 +5712,6 @@ public:
// Data structures for assertion prop
BitVecTraits* apTraits;
ASSERT_TP apFull;
- ASSERT_TP apEmpty;
enum optAssertionKind
{
@@ -5773,8 +5882,20 @@ public:
bool HasSameOp1(AssertionDsc* that, bool vnBased)
{
- return (op1.kind == that->op1.kind) &&
- ((vnBased && (op1.vn == that->op1.vn)) || (!vnBased && (op1.lcl.lclNum == that->op1.lcl.lclNum)));
+ if (op1.kind != that->op1.kind)
+ {
+ return false;
+ }
+ else if (op1.kind == O1K_ARR_BND)
+ {
+ assert(vnBased);
+ return (op1.bnd.vnIdx == that->op1.bnd.vnIdx) && (op1.bnd.vnLen == that->op1.bnd.vnLen);
+ }
+ else
+ {
+ return ((vnBased && (op1.vn == that->op1.vn)) ||
+ (!vnBased && (op1.lcl.lclNum == that->op1.lcl.lclNum)));
+ }
}
bool HasSameOp2(AssertionDsc* that, bool vnBased)
@@ -5823,12 +5944,22 @@ public:
bool Equals(AssertionDsc* that, bool vnBased)
{
- return (assertionKind == that->assertionKind) && HasSameOp1(that, vnBased) && HasSameOp2(that, vnBased);
+ if (assertionKind != that->assertionKind)
+ {
+ return false;
+ }
+ else if (assertionKind == OAK_NO_THROW)
+ {
+ assert(op2.kind == O2K_INVALID);
+ return HasSameOp1(that, vnBased);
+ }
+ else
+ {
+ return HasSameOp1(that, vnBased) && HasSameOp2(that, vnBased);
+ }
}
};
- typedef unsigned short AssertionIndex;
-
protected:
static fgWalkPreFn optAddCopiesCallback;
static fgWalkPreFn optVNAssertionPropCurStmtVisitor;
@@ -5865,8 +5996,6 @@ public:
ValueNumToAssertsMap;
ValueNumToAssertsMap* optValueNumToAsserts;
- static const AssertionIndex NO_ASSERTION_INDEX = 0;
-
// Assertion prop helpers.
ASSERT_TP& GetAssertionDep(unsigned lclNum);
AssertionDsc* optGetAssertion(AssertionIndex assertIndex);
@@ -5887,8 +6016,8 @@ public:
// Assertion Gen functions.
void optAssertionGen(GenTreePtr tree);
AssertionIndex optAssertionGenPhiDefn(GenTreePtr tree);
- AssertionIndex optCreateJTrueBoundsAssertion(GenTreePtr tree);
- AssertionIndex optAssertionGenJtrue(GenTreePtr tree);
+ AssertionInfo optCreateJTrueBoundsAssertion(GenTreePtr tree);
+ AssertionInfo optAssertionGenJtrue(GenTreePtr tree);
AssertionIndex optCreateJtrueAssertions(GenTreePtr op1, GenTreePtr op2, Compiler::optAssertionKind assertionKind);
AssertionIndex optFindComplementary(AssertionIndex assertionIndex);
void optMapComplementary(AssertionIndex assertionIndex, AssertionIndex index);
@@ -5936,14 +6065,14 @@ public:
GenTreePtr optAssertionProp_LclVar(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
GenTreePtr optAssertionProp_Ind(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
GenTreePtr optAssertionProp_Cast(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
- GenTreePtr optAssertionProp_Call(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
+ GenTreePtr optAssertionProp_Call(ASSERT_VALARG_TP assertions, GenTreeCall* call, const GenTreePtr stmt);
GenTreePtr optAssertionProp_RelOp(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
GenTreePtr optAssertionProp_Comma(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
GenTreePtr optAssertionProp_BndsChk(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
GenTreePtr optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
GenTreePtr optAssertionPropLocal_RelOp(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
GenTreePtr optAssertionProp_Update(const GenTreePtr newTree, const GenTreePtr tree, const GenTreePtr stmt);
- GenTreePtr optNonNullAssertionProp_Call(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
+ GenTreePtr optNonNullAssertionProp_Call(ASSERT_VALARG_TP assertions, GenTreeCall* call, const GenTreePtr stmt);
// Implied assertion functions.
void optImpliedAssertions(AssertionIndex assertionIndex, ASSERT_TP& activeAssertions);
@@ -5951,9 +6080,6 @@ public:
void optImpliedByCopyAssertion(AssertionDsc* copyAssertion, AssertionDsc* depAssertion, ASSERT_TP& result);
void optImpliedByConstAssertion(AssertionDsc* curAssertion, ASSERT_TP& result);
- ASSERT_VALRET_TP optNewFullAssertSet();
- ASSERT_VALRET_TP optNewEmptyAssertSet();
-
#ifdef DEBUG
void optPrintAssertion(AssertionDsc* newAssertion, AssertionIndex assertionIndex = 0);
void optDebugCheckAssertion(AssertionDsc* assertion);
@@ -6469,11 +6595,7 @@ public:
// Returns the page size for the target machine as reported by the EE.
inline size_t eeGetPageSize()
{
-#if COR_JIT_EE_VERSION > 460
return eeGetEEInfo()->osPageSize;
-#else // COR_JIT_EE_VERSION <= 460
- return CORINFO_PAGE_SIZE;
-#endif // COR_JIT_EE_VERSION > 460
}
// Returns the frame size at which we will generate a loop to probe the stack.
@@ -6491,11 +6613,7 @@ public:
inline bool IsTargetAbi(CORINFO_RUNTIME_ABI abi)
{
-#if COR_JIT_EE_VERSION > 460
return eeGetEEInfo()->targetAbi == abi;
-#else
- return CORINFO_DESKTOP_ABI == abi;
-#endif
}
inline bool generateCFIUnwindCodes()
@@ -7690,28 +7808,20 @@ public:
// PInvoke transitions inline (e.g. when targeting CoreRT).
inline bool ShouldUsePInvokeHelpers()
{
-#if COR_JIT_EE_VERSION > 460
return jitFlags->IsSet(JitFlags::JIT_FLAG_USE_PINVOKE_HELPERS);
-#else
- return false;
-#endif
}
// true if we should use insert the REVERSE_PINVOKE_{ENTER,EXIT} helpers in the method
// prolog/epilog
inline bool IsReversePInvoke()
{
-#if COR_JIT_EE_VERSION > 460
return jitFlags->IsSet(JitFlags::JIT_FLAG_REVERSE_PINVOKE);
-#else
- return false;
-#endif
}
// true if we must generate code compatible with JIT32 quirks
inline bool IsJit32Compat()
{
-#if defined(_TARGET_X86_) && COR_JIT_EE_VERSION > 460
+#if defined(_TARGET_X86_)
return jitFlags->IsSet(JitFlags::JIT_FLAG_DESKTOP_QUIRKS);
#else
return false;
@@ -7721,9 +7831,9 @@ public:
// true if we must generate code compatible with Jit64 quirks
inline bool IsJit64Compat()
{
-#if defined(_TARGET_AMD64_) && COR_JIT_EE_VERSION > 460
+#if defined(_TARGET_AMD64_)
return jitFlags->IsSet(JitFlags::JIT_FLAG_DESKTOP_QUIRKS);
-#elif defined(_TARGET_AMD64_) && !defined(FEATURE_CORECLR)
+#elif !defined(FEATURE_CORECLR)
return true;
#else
return false;
@@ -7760,13 +7870,11 @@ public:
// (or)
// 3. When opts.compDbgEnC is true. (See also Compiler::compCompile).
//
-// When this flag is set, jit will allocate a gc-reference local variable (lvaSecurityObject),
-// which gets reported as a GC root to stackwalker.
-// (See also ICodeManager::GetAddrOfSecurityObject.)
+ // When this flag is set, jit will allocate a gc-reference local variable (lvaSecurityObject),
+ // which gets reported as a GC root to stackwalker.
+ // (See also ICodeManager::GetAddrOfSecurityObject.)
-#if RELOC_SUPPORT
bool compReloc;
-#endif
#ifdef DEBUG
#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
@@ -7790,8 +7898,11 @@ public:
bool genFPopt; // Can we do frame-pointer-omission optimization?
bool altJit; // True if we are an altjit and are compiling this method
+#ifdef OPT_CONFIG
+ bool optRepeat; // Repeat optimizer phases k times
+#endif
+
#ifdef DEBUG
- bool optRepeat; // Repeat optimizer phases k times
bool compProcedureSplittingEH; // Separate cold code from hot code for functions with EH
bool dspCode; // Display native code generated
bool dspEHTable; // Display the EH table reported to the VM
@@ -9371,6 +9482,10 @@ const instruction INS_ADDC = INS_adc;
const instruction INS_SUBC = INS_sbc;
const instruction INS_NOT = INS_mvn;
+const instruction INS_ABS = INS_vabs;
+const instruction INS_ROUND = INS_invalid;
+const instruction INS_SQRT = INS_vsqrt;
+
#endif
#ifdef _TARGET_ARM64_
@@ -9392,6 +9507,10 @@ const instruction INS_ADDC = INS_adc;
const instruction INS_SUBC = INS_sbc;
const instruction INS_NOT = INS_mvn;
+const instruction INS_ABS = INS_fabs;
+const instruction INS_ROUND = INS_frintn;
+const instruction INS_SQRT = INS_fsqrt;
+
#endif
/*****************************************************************************/
diff --git a/src/jit/compiler.hpp b/src/jit/compiler.hpp
index 6baf601892..88c082d499 100644
--- a/src/jit/compiler.hpp
+++ b/src/jit/compiler.hpp
@@ -2199,11 +2199,14 @@ inline bool Compiler::lvaKeepAliveAndReportThis()
return false;
}
+ const bool genericsContextIsThis = (info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_THIS) != 0;
+
#ifdef JIT32_GCENCODER
+
if (info.compFlags & CORINFO_FLG_SYNCH)
return true;
- if (info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_THIS)
+ if (genericsContextIsThis)
{
// TODO: Check if any of the exception clauses are
// typed using a generic type. Else, we do not need to report this.
@@ -2213,18 +2216,29 @@ inline bool Compiler::lvaKeepAliveAndReportThis()
if (opts.compDbgCode)
return true;
- if (lvaGenericsContextUsed)
+ if (lvaGenericsContextUseCount > 0)
+ {
+ JITDUMP("Reporting this as generic context: %u refs\n", lvaGenericsContextUseCount);
return true;
+ }
}
#else // !JIT32_GCENCODER
// If the generics context is the this pointer we need to report it if either
// the VM requires us to keep the generics context alive or it is used in a look-up.
- // We keep it alive in the lookup scenario, even when the VM didn't ask us too
+ // We keep it alive in the lookup scenario, even when the VM didn't ask us to,
// because collectible types need the generics context when gc-ing.
- if ((info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_THIS) &&
- (lvaGenericsContextUsed || (info.compMethodInfo->options & CORINFO_GENERICS_CTXT_KEEP_ALIVE)))
+ if (genericsContextIsThis)
{
- return true;
+ const bool isUsed = lvaGenericsContextUseCount > 0;
+ const bool mustKeep = (info.compMethodInfo->options & CORINFO_GENERICS_CTXT_KEEP_ALIVE) != 0;
+
+ if (isUsed || mustKeep)
+ {
+ JITDUMP("Reporting this as generic context: %u refs%s\n", lvaGenericsContextUseCount,
+ mustKeep ? ", must keep" : "");
+
+ return true;
+ }
}
#endif
@@ -2250,7 +2264,7 @@ inline bool Compiler::lvaReportParamTypeArg()
// Otherwise, if an exact type parameter is needed in the body, report the generics context.
// We do this because collectible types needs the generics context when gc-ing.
- if (lvaGenericsContextUsed)
+ if (lvaGenericsContextUseCount > 0)
{
return true;
}
@@ -2321,15 +2335,16 @@ inline
// On amd64, every param has a stack location, except on Unix-like systems.
assert(varDsc->lvIsParam);
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
-#elif defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
- // For !LEGACY_BACKEND on x86, a stack parameter that is enregistered will have a stack location.
- assert(varDsc->lvIsParam && !varDsc->lvIsRegArg);
-#else // !(_TARGET_AMD64 || !(defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)))
+#elif !defined(LEGACY_BACKEND)
+ // For !LEGACY_BACKEND on other targets, a stack parameter that is enregistered or prespilled
+ // for profiling on ARM will have a stack location.
+ assert((varDsc->lvIsParam && !varDsc->lvIsRegArg) || isPrespilledArg);
+#else // !(_TARGET_AMD64 || defined(LEGACY_BACKEND))
// Otherwise, we only have a valid stack location for:
// A parameter that was passed on the stack, being homed into its register home,
// or a prespilled argument on arm under profiler.
assert((varDsc->lvIsParam && !varDsc->lvIsRegArg && varDsc->lvRegister) || isPrespilledArg);
-#endif // !(_TARGET_AMD64 || !(defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)))
+#endif // !(_TARGET_AMD64 || defined(LEGACY_BACKEND))
}
FPbased = varDsc->lvFramePointerBased;
@@ -2516,10 +2531,10 @@ inline BOOL Compiler::lvaIsOriginalThisArg(unsigned varNum)
// copy to a new local, and mark the original as DoNotEnregister, to
// ensure that it is stack-allocated. It should not be the case that the original one can be modified -- it
// should not be written to, or address-exposed.
- assert(!varDsc->lvArgWrite &&
+ assert(!varDsc->lvHasILStoreOp &&
(!varDsc->lvAddrExposed || ((info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_THIS) != 0)));
#else
- assert(!varDsc->lvArgWrite && !varDsc->lvAddrExposed);
+ assert(!varDsc->lvHasILStoreOp && !varDsc->lvAddrExposed);
#endif
}
#endif
@@ -2877,9 +2892,7 @@ inline bool Compiler::fgIsThrowHlpBlk(BasicBlock* block)
if (!((call->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_RNGCHKFAIL)) ||
(call->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_THROWDIVZERO)) ||
-#if COR_JIT_EE_VERSION > 460
(call->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_THROWNULLREF)) ||
-#endif // COR_JIT_EE_VERSION
(call->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_OVERFLOW))))
{
return false;
@@ -2893,11 +2906,8 @@ inline bool Compiler::fgIsThrowHlpBlk(BasicBlock* block)
{
if (block == add->acdDstBlk)
{
- return add->acdKind == SCK_RNGCHK_FAIL || add->acdKind == SCK_DIV_BY_ZERO || add->acdKind == SCK_OVERFLOW
-#if COR_JIT_EE_VERSION > 460
- || add->acdKind == SCK_ARG_EXCPN || add->acdKind == SCK_ARG_RNG_EXCPN
-#endif // COR_JIT_EE_VERSION
- ;
+ return add->acdKind == SCK_RNGCHK_FAIL || add->acdKind == SCK_DIV_BY_ZERO || add->acdKind == SCK_OVERFLOW ||
+ add->acdKind == SCK_ARG_EXCPN || add->acdKind == SCK_ARG_RNG_EXCPN;
}
}
@@ -2919,11 +2929,8 @@ inline unsigned Compiler::fgThrowHlpBlkStkLevel(BasicBlock* block)
{
// Compute assert cond separately as assert macro cannot have conditional compilation directives.
bool cond =
- (add->acdKind == SCK_RNGCHK_FAIL || add->acdKind == SCK_DIV_BY_ZERO || add->acdKind == SCK_OVERFLOW
-#if COR_JIT_EE_VERSION > 460
- || add->acdKind == SCK_ARG_EXCPN || add->acdKind == SCK_ARG_RNG_EXCPN
-#endif // COR_JIT_EE_VERSION
- );
+ (add->acdKind == SCK_RNGCHK_FAIL || add->acdKind == SCK_DIV_BY_ZERO || add->acdKind == SCK_OVERFLOW ||
+ add->acdKind == SCK_ARG_EXCPN || add->acdKind == SCK_ARG_RNG_EXCPN);
assert(cond);
// TODO: bbTgtStkDepth is DEBUG-only.
@@ -4450,7 +4457,7 @@ inline void Compiler::EndPhase(Phases phase)
#if defined(FEATURE_JIT_METHOD_PERF)
if (pCompJitTimer != nullptr)
{
- pCompJitTimer->EndPhase(phase);
+ pCompJitTimer->EndPhase(this, phase);
}
#endif
#if DUMP_FLOWGRAPHS
diff --git a/src/jit/compilerbitsettraits.h b/src/jit/compilerbitsettraits.h
index 4365c518d7..d0436f4052 100644
--- a/src/jit/compilerbitsettraits.h
+++ b/src/jit/compilerbitsettraits.h
@@ -22,10 +22,10 @@
class CompAllocBitSetTraits
{
public:
- static inline IAllocator* GetAllocator(class Compiler* comp);
+ static inline void* Alloc(Compiler* comp, size_t byteSize);
#ifdef DEBUG
- static inline IAllocator* GetDebugOnlyAllocator(class Compiler* comp);
+ static inline void* DebugAlloc(Compiler* comp, size_t byteSize);
#endif // DEBUG
};
@@ -112,10 +112,10 @@ public:
{
}
- static inline IAllocator* GetAllocator(BitVecTraits* b);
+ static inline void* Alloc(BitVecTraits* b, size_t byteSize);
#ifdef DEBUG
- static inline IAllocator* GetDebugOnlyAllocator(BitVecTraits* b);
+ static inline void* DebugAlloc(BitVecTraits* b, size_t byteSize);
#endif // DEBUG
static inline unsigned GetSize(BitVecTraits* b);
diff --git a/src/jit/compilerbitsettraits.hpp b/src/jit/compilerbitsettraits.hpp
index e2ba2f8a7a..be30564701 100644
--- a/src/jit/compilerbitsettraits.hpp
+++ b/src/jit/compilerbitsettraits.hpp
@@ -15,16 +15,16 @@
///////////////////////////////////////////////////////////////////////////////
// static
-IAllocator* CompAllocBitSetTraits::GetAllocator(Compiler* comp)
+void* CompAllocBitSetTraits::Alloc(Compiler* comp, size_t byteSize)
{
- return comp->getAllocatorBitset();
+ return comp->compGetMem(byteSize, CMK_bitset);
}
#ifdef DEBUG
// static
-IAllocator* CompAllocBitSetTraits::GetDebugOnlyAllocator(Compiler* comp)
+void* CompAllocBitSetTraits::DebugAlloc(Compiler* comp, size_t byteSize)
{
- return comp->getAllocatorDebugOnly();
+ return comp->compGetMem(byteSize, CMK_DebugOnly);
}
#endif // DEBUG
@@ -139,16 +139,16 @@ BitSetSupport::BitSetOpCounter* BasicBlockBitSetTraits::GetOpCounter(Compiler* c
///////////////////////////////////////////////////////////////////////////////
// static
-IAllocator* BitVecTraits::GetAllocator(BitVecTraits* b)
+void* BitVecTraits::Alloc(BitVecTraits* b, size_t byteSize)
{
- return b->comp->getAllocatorBitset();
+ return b->comp->compGetMem(byteSize, CMK_bitset);
}
#ifdef DEBUG
// static
-IAllocator* BitVecTraits::GetDebugOnlyAllocator(BitVecTraits* b)
+void* BitVecTraits::DebugAlloc(BitVecTraits* b, size_t byteSize)
{
- return b->comp->getAllocatorDebugOnly();
+ return b->comp->compGetMem(byteSize, CMK_DebugOnly);
}
#endif // DEBUG
diff --git a/src/jit/compphases.h b/src/jit/compphases.h
index 5038d6e9c9..e4dfedd499 100644
--- a/src/jit/compphases.h
+++ b/src/jit/compphases.h
@@ -11,95 +11,98 @@
// corresponding array of string names of those phases. This include file undefines CompPhaseNameMacro
// after the last use.
// The arguments are:
-// CompPhaseNameMacro(enumName, stringName, shortName, hasChildren, parent)
+// CompPhaseNameMacro(enumName, stringName, shortName, hasChildren, parent, measureIR)
// "enumName" is an Enumeration-style all-caps name.
// "stringName" is a self-explanatory.
// "shortName" is an abbreviated form for stringName
// "hasChildren" is true if this phase is broken out into subphases.
// (We should never do EndPhase on a phase that has children, only on 'leaf phases.')
// "parent" is -1 for leaf phases, otherwise it is the "enumName" of the parent phase.
+// "measureIR" is true for phases that generate a count of IR nodes during EndPhase when JitConfig.MeasureIR is
+// true.
// clang-format off
-CompPhaseNameMacro(PHASE_PRE_IMPORT, "Pre-import", "PRE-IMP", false, -1)
-CompPhaseNameMacro(PHASE_IMPORTATION, "Importation", "IMPORT", false, -1)
-CompPhaseNameMacro(PHASE_POST_IMPORT, "Post-import", "POST-IMP", false, -1)
-CompPhaseNameMacro(PHASE_MORPH_INIT, "Morph - Init", "MOR-INIT" ,false, -1)
-CompPhaseNameMacro(PHASE_MORPH_INLINE, "Morph - Inlining", "MOR-INL", false, -1)
-CompPhaseNameMacro(PHASE_MORPH_IMPBYREF, "Morph - ByRefs", "MOR-BYREF",false, -1)
-CompPhaseNameMacro(PHASE_EMPTY_TRY, "Remove empty try", "EMPTYTRY", false, -1)
-CompPhaseNameMacro(PHASE_EMPTY_FINALLY, "Remove empty finally", "EMPTYFIN", false, -1)
-CompPhaseNameMacro(PHASE_CLONE_FINALLY, "Clone finally", "CLONEFIN", false, -1)
-CompPhaseNameMacro(PHASE_STR_ADRLCL, "Morph - Structs/AddrExp", "MOR-STRAL",false, -1)
-CompPhaseNameMacro(PHASE_MORPH_GLOBAL, "Morph - Global", "MOR-GLOB", false, -1)
-CompPhaseNameMacro(PHASE_MORPH_END, "Morph - Finish", "MOR-END", false, -1)
-CompPhaseNameMacro(PHASE_GS_COOKIE, "GS Cookie", "GS-COOK", false, -1)
-CompPhaseNameMacro(PHASE_COMPUTE_PREDS, "Compute preds", "PREDS", false, -1)
-CompPhaseNameMacro(PHASE_MARK_GC_POLL_BLOCKS, "Mark GC poll blocks", "GC-POLL", false, -1)
-CompPhaseNameMacro(PHASE_COMPUTE_EDGE_WEIGHTS, "Compute edge weights (1)", "EDG-WGT", false, -1)
+CompPhaseNameMacro(PHASE_PRE_IMPORT, "Pre-import", "PRE-IMP", false, -1, false)
+CompPhaseNameMacro(PHASE_IMPORTATION, "Importation", "IMPORT", false, -1, true)
+CompPhaseNameMacro(PHASE_POST_IMPORT, "Post-import", "POST-IMP", false, -1, false)
+CompPhaseNameMacro(PHASE_MORPH_INIT, "Morph - Init", "MOR-INIT" ,false, -1, false)
+CompPhaseNameMacro(PHASE_MORPH_INLINE, "Morph - Inlining", "MOR-INL", false, -1, true)
+CompPhaseNameMacro(PHASE_MORPH_IMPBYREF, "Morph - ByRefs", "MOR-BYREF",false, -1, false)
+CompPhaseNameMacro(PHASE_EMPTY_TRY, "Remove empty try", "EMPTYTRY", false, -1, false)
+CompPhaseNameMacro(PHASE_EMPTY_FINALLY, "Remove empty finally", "EMPTYFIN", false, -1, false)
+CompPhaseNameMacro(PHASE_MERGE_FINALLY_CHAINS, "Merge callfinally chains", "MRGCFCHN", false, -1, false)
+CompPhaseNameMacro(PHASE_CLONE_FINALLY, "Clone finally", "CLONEFIN", false, -1, false)
+CompPhaseNameMacro(PHASE_STR_ADRLCL, "Morph - Structs/AddrExp", "MOR-STRAL",false, -1, false)
+CompPhaseNameMacro(PHASE_MORPH_GLOBAL, "Morph - Global", "MOR-GLOB", false, -1, false)
+CompPhaseNameMacro(PHASE_MORPH_END, "Morph - Finish", "MOR-END", false, -1, true)
+CompPhaseNameMacro(PHASE_GS_COOKIE, "GS Cookie", "GS-COOK", false, -1, false)
+CompPhaseNameMacro(PHASE_COMPUTE_PREDS, "Compute preds", "PREDS", false, -1, false)
+CompPhaseNameMacro(PHASE_MARK_GC_POLL_BLOCKS, "Mark GC poll blocks", "GC-POLL", false, -1, false)
+CompPhaseNameMacro(PHASE_COMPUTE_EDGE_WEIGHTS, "Compute edge weights (1, false)", "EDG-WGT", false, -1, false)
#if FEATURE_EH_FUNCLETS
-CompPhaseNameMacro(PHASE_CREATE_FUNCLETS, "Create EH funclets", "EH-FUNC", false, -1)
+CompPhaseNameMacro(PHASE_CREATE_FUNCLETS, "Create EH funclets", "EH-FUNC", false, -1, false)
#endif // FEATURE_EH_FUNCLETS
-CompPhaseNameMacro(PHASE_OPTIMIZE_LAYOUT, "Optimize layout", "LAYOUT", false, -1)
-CompPhaseNameMacro(PHASE_ALLOCATE_OBJECTS, "Allocate Objects", "ALLOC-OBJ",false, -1)
-CompPhaseNameMacro(PHASE_OPTIMIZE_LOOPS, "Optimize loops", "LOOP-OPT", false, -1)
-CompPhaseNameMacro(PHASE_CLONE_LOOPS, "Clone loops", "LP-CLONE", false, -1)
-CompPhaseNameMacro(PHASE_UNROLL_LOOPS, "Unroll loops", "UNROLL", false, -1)
-CompPhaseNameMacro(PHASE_HOIST_LOOP_CODE, "Hoist loop code", "LP-HOIST", false, -1)
-CompPhaseNameMacro(PHASE_MARK_LOCAL_VARS, "Mark local vars", "MARK-LCL", false, -1)
-CompPhaseNameMacro(PHASE_OPTIMIZE_BOOLS, "Optimize bools", "OPT-BOOL", false, -1)
-CompPhaseNameMacro(PHASE_FIND_OPER_ORDER, "Find oper order", "OPER-ORD", false, -1)
-CompPhaseNameMacro(PHASE_SET_BLOCK_ORDER, "Set block order", "BLK-ORD", false, -1)
-CompPhaseNameMacro(PHASE_BUILD_SSA, "Build SSA representation", "SSA", true, -1)
-CompPhaseNameMacro(PHASE_BUILD_SSA_TOPOSORT, "SSA: topological sort", "SSA-SORT", false, PHASE_BUILD_SSA)
-CompPhaseNameMacro(PHASE_BUILD_SSA_DOMS, "SSA: Doms1", "SSA-DOMS", false, PHASE_BUILD_SSA)
-CompPhaseNameMacro(PHASE_BUILD_SSA_LIVENESS, "SSA: liveness", "SSA-LIVE", false, PHASE_BUILD_SSA)
-CompPhaseNameMacro(PHASE_BUILD_SSA_IDF, "SSA: IDF", "SSA-IDF", false, PHASE_BUILD_SSA)
-CompPhaseNameMacro(PHASE_BUILD_SSA_INSERT_PHIS, "SSA: insert phis", "SSA-PHI", false, PHASE_BUILD_SSA)
-CompPhaseNameMacro(PHASE_BUILD_SSA_RENAME, "SSA: rename", "SSA-REN", false, PHASE_BUILD_SSA)
+CompPhaseNameMacro(PHASE_OPTIMIZE_LAYOUT, "Optimize layout", "LAYOUT", false, -1, false)
+CompPhaseNameMacro(PHASE_ALLOCATE_OBJECTS, "Allocate Objects", "ALLOC-OBJ",false, -1, false)
+CompPhaseNameMacro(PHASE_OPTIMIZE_LOOPS, "Optimize loops", "LOOP-OPT", false, -1, false)
+CompPhaseNameMacro(PHASE_CLONE_LOOPS, "Clone loops", "LP-CLONE", false, -1, false)
+CompPhaseNameMacro(PHASE_UNROLL_LOOPS, "Unroll loops", "UNROLL", false, -1, false)
+CompPhaseNameMacro(PHASE_HOIST_LOOP_CODE, "Hoist loop code", "LP-HOIST", false, -1, false)
+CompPhaseNameMacro(PHASE_MARK_LOCAL_VARS, "Mark local vars", "MARK-LCL", false, -1, false)
+CompPhaseNameMacro(PHASE_OPTIMIZE_BOOLS, "Optimize bools", "OPT-BOOL", false, -1, false)
+CompPhaseNameMacro(PHASE_FIND_OPER_ORDER, "Find oper order", "OPER-ORD", false, -1, false)
+CompPhaseNameMacro(PHASE_SET_BLOCK_ORDER, "Set block order", "BLK-ORD", false, -1, true)
+CompPhaseNameMacro(PHASE_BUILD_SSA, "Build SSA representation", "SSA", true, -1, false)
+CompPhaseNameMacro(PHASE_BUILD_SSA_TOPOSORT, "SSA: topological sort", "SSA-SORT", false, PHASE_BUILD_SSA, false)
+CompPhaseNameMacro(PHASE_BUILD_SSA_DOMS, "SSA: Doms1", "SSA-DOMS", false, PHASE_BUILD_SSA, false)
+CompPhaseNameMacro(PHASE_BUILD_SSA_LIVENESS, "SSA: liveness", "SSA-LIVE", false, PHASE_BUILD_SSA, false)
+CompPhaseNameMacro(PHASE_BUILD_SSA_IDF, "SSA: IDF", "SSA-IDF", false, PHASE_BUILD_SSA, false)
+CompPhaseNameMacro(PHASE_BUILD_SSA_INSERT_PHIS, "SSA: insert phis", "SSA-PHI", false, PHASE_BUILD_SSA, false)
+CompPhaseNameMacro(PHASE_BUILD_SSA_RENAME, "SSA: rename", "SSA-REN", false, PHASE_BUILD_SSA, false)
-CompPhaseNameMacro(PHASE_EARLY_PROP, "Early Value Propagation", "ERL-PROP", false, -1)
-CompPhaseNameMacro(PHASE_VALUE_NUMBER, "Do value numbering", "VAL-NUM", false, -1)
+CompPhaseNameMacro(PHASE_EARLY_PROP, "Early Value Propagation", "ERL-PROP", false, -1, false)
+CompPhaseNameMacro(PHASE_VALUE_NUMBER, "Do value numbering", "VAL-NUM", false, -1, false)
-CompPhaseNameMacro(PHASE_OPTIMIZE_INDEX_CHECKS, "Optimize index checks", "OPT-CHK", false, -1)
+CompPhaseNameMacro(PHASE_OPTIMIZE_INDEX_CHECKS, "Optimize index checks", "OPT-CHK", false, -1, false)
#if FEATURE_VALNUM_CSE
-CompPhaseNameMacro(PHASE_OPTIMIZE_VALNUM_CSES, "Optimize Valnum CSEs", "OPT-CSE", false, -1)
+CompPhaseNameMacro(PHASE_OPTIMIZE_VALNUM_CSES, "Optimize Valnum CSEs", "OPT-CSE", false, -1, false)
#endif
-CompPhaseNameMacro(PHASE_VN_COPY_PROP, "VN based copy prop", "CP-PROP", false, -1)
+CompPhaseNameMacro(PHASE_VN_COPY_PROP, "VN based copy prop", "CP-PROP", false, -1, false)
#if ASSERTION_PROP
-CompPhaseNameMacro(PHASE_ASSERTION_PROP_MAIN, "Assertion prop", "AST-PROP", false, -1)
+CompPhaseNameMacro(PHASE_ASSERTION_PROP_MAIN, "Assertion prop", "AST-PROP", false, -1, false)
#endif
-CompPhaseNameMacro(PHASE_UPDATE_FLOW_GRAPH, "Update flow graph", "UPD-FG", false, -1)
-CompPhaseNameMacro(PHASE_COMPUTE_EDGE_WEIGHTS2, "Compute edge weights (2)", "EDG-WGT2", false, -1)
-CompPhaseNameMacro(PHASE_DETERMINE_FIRST_COLD_BLOCK, "Determine first cold block", "COLD-BLK", false, -1)
-CompPhaseNameMacro(PHASE_RATIONALIZE, "Rationalize IR", "RAT", false, -1)
-CompPhaseNameMacro(PHASE_SIMPLE_LOWERING, "Do 'simple' lowering", "SMP-LWR", false, -1)
+CompPhaseNameMacro(PHASE_UPDATE_FLOW_GRAPH, "Update flow graph", "UPD-FG", false, -1, false)
+CompPhaseNameMacro(PHASE_COMPUTE_EDGE_WEIGHTS2, "Compute edge weights (2, false)", "EDG-WGT2", false, -1, false)
+CompPhaseNameMacro(PHASE_DETERMINE_FIRST_COLD_BLOCK, "Determine first cold block", "COLD-BLK", false, -1, true)
+CompPhaseNameMacro(PHASE_RATIONALIZE, "Rationalize IR", "RAT", false, -1, false)
+CompPhaseNameMacro(PHASE_SIMPLE_LOWERING, "Do 'simple' lowering", "SMP-LWR", false, -1, false)
-CompPhaseNameMacro(PHASE_LCLVARLIVENESS, "Local var liveness", "LIVENESS", true, -1)
-CompPhaseNameMacro(PHASE_LCLVARLIVENESS_INIT, "Local var liveness init", "LIV-INIT", false, PHASE_LCLVARLIVENESS)
-CompPhaseNameMacro(PHASE_LCLVARLIVENESS_PERBLOCK,"Per block local var liveness", "LIV-BLK", false, PHASE_LCLVARLIVENESS)
-CompPhaseNameMacro(PHASE_LCLVARLIVENESS_INTERBLOCK, "Global local var liveness", "LIV-GLBL", false, PHASE_LCLVARLIVENESS)
+CompPhaseNameMacro(PHASE_LCLVARLIVENESS, "Local var liveness", "LIVENESS", true, -1, false)
+CompPhaseNameMacro(PHASE_LCLVARLIVENESS_INIT, "Local var liveness init", "LIV-INIT", false, PHASE_LCLVARLIVENESS, false)
+CompPhaseNameMacro(PHASE_LCLVARLIVENESS_PERBLOCK,"Per block local var liveness", "LIV-BLK", false, PHASE_LCLVARLIVENESS, false)
+CompPhaseNameMacro(PHASE_LCLVARLIVENESS_INTERBLOCK, "Global local var liveness", "LIV-GLBL", false, PHASE_LCLVARLIVENESS, false)
#ifdef LEGACY_BACKEND
-CompPhaseNameMacro(PHASE_RA_ASSIGN_VARS, "RA assign vars", "REGALLOC", false, -1)
+CompPhaseNameMacro(PHASE_RA_ASSIGN_VARS, "RA assign vars", "REGALLOC", false, -1, false)
#endif // LEGACY_BACKEND
-CompPhaseNameMacro(PHASE_LOWERING_DECOMP, "Lowering decomposition", "LWR-DEC", false, -1)
-CompPhaseNameMacro(PHASE_LOWERING, "Lowering nodeinfo", "LWR-INFO", false, -1)
+CompPhaseNameMacro(PHASE_LOWERING_DECOMP, "Lowering decomposition", "LWR-DEC", false, -1, false)
+CompPhaseNameMacro(PHASE_LOWERING, "Lowering nodeinfo", "LWR-INFO", false, -1, true)
#ifndef LEGACY_BACKEND
-CompPhaseNameMacro(PHASE_LINEAR_SCAN, "Linear scan register alloc", "LSRA", true, -1)
-CompPhaseNameMacro(PHASE_LINEAR_SCAN_BUILD, "LSRA build intervals", "LSRA-BLD", false, PHASE_LINEAR_SCAN)
-CompPhaseNameMacro(PHASE_LINEAR_SCAN_ALLOC, "LSRA allocate", "LSRA-ALL", false, PHASE_LINEAR_SCAN)
-CompPhaseNameMacro(PHASE_LINEAR_SCAN_RESOLVE, "LSRA resolve", "LSRA-RES", false, PHASE_LINEAR_SCAN)
+CompPhaseNameMacro(PHASE_LINEAR_SCAN, "Linear scan register alloc", "LSRA", true, -1, true)
+CompPhaseNameMacro(PHASE_LINEAR_SCAN_BUILD, "LSRA build intervals", "LSRA-BLD", false, PHASE_LINEAR_SCAN, false)
+CompPhaseNameMacro(PHASE_LINEAR_SCAN_ALLOC, "LSRA allocate", "LSRA-ALL", false, PHASE_LINEAR_SCAN, false)
+CompPhaseNameMacro(PHASE_LINEAR_SCAN_RESOLVE, "LSRA resolve", "LSRA-RES", false, PHASE_LINEAR_SCAN, false)
#endif // !LEGACY_BACKEND
-CompPhaseNameMacro(PHASE_GENERATE_CODE, "Generate code", "CODEGEN", false, -1)
-CompPhaseNameMacro(PHASE_EMIT_CODE, "Emit code", "EMIT", false, -1)
-CompPhaseNameMacro(PHASE_EMIT_GCEH, "Emit GC+EH tables", "EMT-GCEH", false, -1)
+CompPhaseNameMacro(PHASE_GENERATE_CODE, "Generate code", "CODEGEN", false, -1, false)
+CompPhaseNameMacro(PHASE_EMIT_CODE, "Emit code", "EMIT", false, -1, false)
+CompPhaseNameMacro(PHASE_EMIT_GCEH, "Emit GC+EH tables", "EMT-GCEH", false, -1, false)
#if MEASURE_CLRAPI_CALLS
// The following is a "pseudo-phase" - it aggregates timing info
// for calls through ICorJitInfo across all "real" phases.
-CompPhaseNameMacro(PHASE_CLR_API, "CLR API calls", "CLR-API", false, -1)
+CompPhaseNameMacro(PHASE_CLR_API, "CLR API calls", "CLR-API", false, -1, false)
#endif
// clang-format on
diff --git a/src/jit/crossgen/CMakeLists.txt b/src/jit/crossgen/CMakeLists.txt
index 6440e91a04..4d49a319b8 100644
--- a/src/jit/crossgen/CMakeLists.txt
+++ b/src/jit/crossgen/CMakeLists.txt
@@ -4,4 +4,4 @@ if(CLR_CMAKE_TARGET_ARCH_ARM)
add_definitions(-DLEGACY_BACKEND)
endif()
-add_library_clr(clrjit_crossgen ${SOURCES})
+add_library_clr(clrjit_crossgen ${SOURCES} ${JIT_ARCH_SOURCES})
diff --git a/src/jit/decomposelongs.cpp b/src/jit/decomposelongs.cpp
index 407ae1c35b..d284c1cb47 100644
--- a/src/jit/decomposelongs.cpp
+++ b/src/jit/decomposelongs.cpp
@@ -265,7 +265,7 @@ GenTree* DecomposeLongs::DecomposeNode(GenTree* tree)
default:
{
JITDUMP("Illegal TYP_LONG node %s in Decomposition.", GenTree::NodeName(tree->OperGet()));
- noway_assert(!"Illegal TYP_LONG node in Decomposition.");
+ assert(!"Illegal TYP_LONG node in Decomposition.");
break;
}
}
@@ -580,6 +580,8 @@ GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use)
srcType = genUnsignedType(srcType);
}
+ bool skipDecomposition = false;
+
if (varTypeIsLong(srcType))
{
if (cast->gtOverflow() && (varTypeIsUnsigned(srcType) != varTypeIsUnsigned(dstType)))
@@ -605,9 +607,7 @@ GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use)
hiResult->gtFlags &= ~GTF_UNSIGNED;
hiResult->gtOp.gtOp1 = hiSrcOp;
- Range().Remove(cast);
Range().Remove(srcOp);
- Range().InsertAfter(hiSrcOp, hiResult);
}
else
{
@@ -634,13 +634,26 @@ GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use)
}
else
{
- if (varTypeIsUnsigned(srcType))
+ if (!use.IsDummyUse() && (use.User()->OperGet() == GT_MUL))
+ {
+ //
+ // This int->long cast is used by a GT_MUL that will be transformed by DecomposeMul into a
+ // GT_LONG_MUL and as a result the high operand produced by the cast will become dead.
+ // Skip cast decomposition so DecomposeMul doesn't need to bother with dead code removal,
+ // especially in the case of sign extending casts that also introduce new lclvars.
+ //
+
+ assert((use.User()->gtFlags & GTF_MUL_64RSLT) != 0);
+
+ skipDecomposition = true;
+ }
+ else if (varTypeIsUnsigned(srcType))
{
loResult = cast->gtGetOp1();
hiResult = m_compiler->gtNewZeroConNode(TYP_INT);
+ Range().InsertAfter(cast, hiResult);
Range().Remove(cast);
- Range().InsertAfter(loResult, hiResult);
}
else
{
@@ -653,9 +666,10 @@ GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use)
GenTree* shiftBy = m_compiler->gtNewIconNode(31, TYP_INT);
hiResult = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, loCopy, shiftBy);
- Range().Remove(cast);
- Range().InsertAfter(loResult, loCopy, shiftBy, hiResult);
+ Range().InsertAfter(cast, loCopy, shiftBy, hiResult);
m_compiler->lvaIncRefCnts(loCopy);
+
+ Range().Remove(cast);
}
}
}
@@ -664,6 +678,11 @@ GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use)
NYI("Unimplemented cast decomposition");
}
+ if (skipDecomposition)
+ {
+ return cast->gtNext;
+ }
+
return FinalizeDecomposition(use, loResult, hiResult, hiResult);
}
@@ -994,15 +1013,25 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use)
{
assert(use.IsInitialized());
- GenTree* tree = use.Def();
- GenTree* gtLong = tree->gtGetOp1();
+ GenTree* shift = use.Def();
+ GenTree* gtLong = shift->gtGetOp1();
GenTree* loOp1 = gtLong->gtGetOp1();
GenTree* hiOp1 = gtLong->gtGetOp2();
- GenTree* shiftByOp = tree->gtGetOp2();
+ GenTree* shiftByOp = shift->gtGetOp2();
- genTreeOps oper = tree->OperGet();
+ genTreeOps oper = shift->OperGet();
genTreeOps shiftByOper = shiftByOp->OperGet();
+ // tLo = ...
+ // ...
+ // tHi = ...
+ // ...
+ // tLong = long tLo, tHi
+ // ...
+ // tShiftAmount = ...
+ // ...
+ // tShift = shift tLong, tShiftAmount
+
assert((oper == GT_LSH) || (oper == GT_RSH) || (oper == GT_RSZ));
// If we are shifting by a constant int, we do not want to use a helper, instead, we decompose.
@@ -1013,9 +1042,9 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use)
if (count == 0)
{
- GenTree* next = tree->gtNext;
- // Remove tree and don't do anything else.
- Range().Remove(tree);
+ GenTree* next = shift->gtNext;
+ // Remove shift and don't do anything else.
+ Range().Remove(shift);
use.ReplaceWith(m_compiler, gtLong);
return next;
}
@@ -1029,15 +1058,27 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use)
{
case GT_LSH:
{
- Range().Remove(hiOp1);
if (count < 32)
{
- // Hi is a GT_LSH_HI, lo is a GT_LSH. Will produce:
- // reg1 = lo
- // shl lo, shift
- // shld hi, reg1, shift
+ // For shifts of < 32 bits, we transform the code to:
+ //
+ // tLo = ...
+ // st.lclVar vLo, tLo
+ // ...
+ // tHi = ...
+ // ...
+ // tShiftLo = lsh vLo, tShiftAmountLo
+ // tShitHiLong = long vLo, tHi
+ // tShiftHi = lsh_hi tShiftHiLong, tShiftAmountHi
+ //
+ // This will produce:
+ //
+ // reg1 = lo
+ // shl lo, shift
+ // shld hi, reg1, shift
Range().Remove(gtLong);
+
loOp1 = RepresentOpAsLocalVar(loOp1, gtLong, &gtLong->gtOp.gtOp1);
unsigned loOp1LclNum = loOp1->AsLclVarCommon()->gtLclNum;
Range().Remove(loOp1);
@@ -1055,16 +1096,25 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use)
m_compiler->lvaIncRefCnts(loCopy);
- Range().InsertBefore(tree, loCopy, hiOp1, hiOp);
- Range().InsertBefore(tree, shiftByHi, hiResult);
- Range().InsertBefore(tree, loOp1, shiftByLo, loResult);
+ Range().InsertBefore(shift, loOp1, shiftByLo, loResult);
+ Range().InsertBefore(shift, loCopy, hiOp, shiftByHi, hiResult);
- insertAfter = loResult;
+ insertAfter = hiResult;
}
else
{
assert(count >= 32);
+ // Since we're left shifting at least 32 bits, we can remove the hi part of the shifted value iff
+ // it has no side effects.
+ //
+ // TODO-CQ: we could go perform this removal transitively (i.e. iteratively remove everything that
+ // feeds the hi operand while there are no side effects)
+ if ((hiOp1->gtFlags & GTF_ALL_EFFECT) == 0)
+ {
+ Range().Remove(hiOp1);
+ }
+
if (count < 64)
{
if (count == 32)
@@ -1083,7 +1133,6 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use)
else
{
Range().Remove(gtLong);
- Range().Remove(loOp1);
assert(count > 32 && count < 64);
// Move loOp1 into hiResult, do a GT_LSH with count - 32.
@@ -1091,23 +1140,33 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use)
// temp
GenTree* shiftBy = m_compiler->gtNewIconNode(count - 32, TYP_INT);
hiResult = m_compiler->gtNewOperNode(oper, TYP_INT, loOp1, shiftBy);
- Range().InsertBefore(tree, loOp1, shiftBy, hiResult);
+ Range().InsertBefore(shift, shiftBy, hiResult);
}
}
else
{
- Range().Remove(gtLong);
- Range().Remove(loOp1);
assert(count >= 64);
+ Range().Remove(gtLong);
+
+ // Since we're left shifting at least 64 bits, we can remove the lo part of the shifted value
+ // iff it has no side effects.
+ //
+ // TODO-CQ: we could go perform this removal transitively (i.e. iteratively remove everything
+ // that feeds the lo operand while there are no side effects)
+ if ((loOp1->gtFlags & GTF_ALL_EFFECT) == 0)
+ {
+ Range().Remove(loOp1);
+ }
+
// Zero out hi (shift of >= 64 bits moves all the bits out of the two registers)
hiResult = m_compiler->gtNewZeroConNode(TYP_INT);
- Range().InsertBefore(tree, hiResult);
+ Range().InsertBefore(shift, hiResult);
}
// Zero out loResult (shift of >= 32 bits shifts all lo bits to hiResult)
loResult = m_compiler->gtNewZeroConNode(TYP_INT);
- Range().InsertBefore(tree, loResult);
+ Range().InsertBefore(shift, loResult);
insertAfter = loResult;
}
@@ -1140,14 +1199,22 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use)
GenTree* loOp = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loOp1, hiCopy);
loResult = m_compiler->gtNewOperNode(GT_RSH_LO, TYP_INT, loOp, shiftByLo);
- Range().InsertBefore(tree, hiCopy, loOp);
- Range().InsertBefore(tree, shiftByLo, loResult);
- Range().InsertBefore(tree, shiftByHi, hiResult);
+ Range().InsertBefore(shift, hiCopy, loOp);
+ Range().InsertBefore(shift, shiftByLo, loResult);
+ Range().InsertBefore(shift, shiftByHi, hiResult);
}
else
{
- Range().Remove(loOp1);
- Range().Remove(hiOp1);
+ // Since we're right shifting at least 32 bits, we can remove the lo part of the shifted value iff
+ // it has no side effects.
+ //
+ // TODO-CQ: we could go perform this removal transitively (i.e. iteratively remove everything that
+ // feeds the lo operand while there are no side effects)
+ if ((loOp1->gtFlags & GTF_ALL_EFFECT) == 0)
+ {
+ Range().Remove(loOp1);
+ }
+
assert(count >= 32);
if (count < 64)
{
@@ -1155,7 +1222,6 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use)
{
// Move hiOp1 into loResult.
loResult = hiOp1;
- Range().InsertBefore(tree, loResult);
}
else
{
@@ -1164,21 +1230,31 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use)
// Move hiOp1 into loResult, do a GT_RSZ with count - 32.
GenTree* shiftBy = m_compiler->gtNewIconNode(count - 32, TYP_INT);
loResult = m_compiler->gtNewOperNode(oper, TYP_INT, hiOp1, shiftBy);
- Range().InsertBefore(tree, hiOp1, shiftBy, loResult);
+ Range().InsertBefore(shift, shiftBy, loResult);
}
}
else
{
assert(count >= 64);
+ // Since we're right shifting at least 64 bits, we can remove the hi part of the shifted value
+ // iff it has no side effects.
+ //
+ // TODO-CQ: we could go perform this removal transitively (i.e. iteratively remove everything
+ // that feeds the hi operand while there are no side effects)
+ if ((hiOp1->gtFlags & GTF_ALL_EFFECT) == 0)
+ {
+ Range().Remove(hiOp1);
+ }
+
// Zero out lo
loResult = m_compiler->gtNewZeroConNode(TYP_INT);
- Range().InsertBefore(tree, loResult);
+ Range().InsertBefore(shift, loResult);
}
// Zero out hi
hiResult = m_compiler->gtNewZeroConNode(TYP_INT);
- Range().InsertBefore(tree, hiResult);
+ Range().InsertBefore(shift, hiResult);
}
insertAfter = hiResult;
@@ -1187,7 +1263,6 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use)
case GT_RSH:
{
Range().Remove(gtLong);
- Range().Remove(loOp1);
hiOp1 = RepresentOpAsLocalVar(hiOp1, gtLong, &gtLong->gtOp.gtOp2);
unsigned hiOp1LclNum = hiOp1->AsLclVarCommon()->gtLclNum;
@@ -1212,20 +1287,31 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use)
GenTree* loOp = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loOp1, hiCopy);
loResult = m_compiler->gtNewOperNode(GT_RSH_LO, TYP_INT, loOp, shiftByLo);
- Range().InsertBefore(tree, loOp1, hiCopy, loOp);
- Range().InsertBefore(tree, shiftByLo, loResult);
- Range().InsertBefore(tree, shiftByHi, hiOp1, hiResult);
+ Range().InsertBefore(shift, hiCopy, loOp);
+ Range().InsertBefore(shift, shiftByLo, loResult);
+ Range().InsertBefore(shift, shiftByHi, hiOp1, hiResult);
}
else
{
assert(count >= 32);
+
+ // Since we're right shifting at least 32 bits, we can remove the lo part of the shifted value iff
+ // it has no side effects.
+ //
+ // TODO-CQ: we could go perform this removal transitively (i.e. iteratively remove everything that
+ // feeds the lo operand while there are no side effects)
+ if ((loOp1->gtFlags & GTF_ALL_EFFECT) == 0)
+ {
+ Range().Remove(loOp1);
+ }
+
if (count < 64)
{
if (count == 32)
{
// Move hiOp1 into loResult.
loResult = hiOp1;
- Range().InsertBefore(tree, loResult);
+ Range().InsertBefore(shift, loResult);
}
else
{
@@ -1234,13 +1320,13 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use)
// Move hiOp1 into loResult, do a GT_RSH with count - 32.
GenTree* shiftBy = m_compiler->gtNewIconNode(count - 32, TYP_INT);
loResult = m_compiler->gtNewOperNode(oper, TYP_INT, hiOp1, shiftBy);
- Range().InsertBefore(tree, hiOp1, shiftBy, loResult);
+ Range().InsertBefore(shift, hiOp1, shiftBy, loResult);
}
// Propagate sign bit in hiResult
GenTree* shiftBy = m_compiler->gtNewIconNode(31, TYP_INT);
hiResult = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, hiCopy, shiftBy);
- Range().InsertBefore(tree, shiftBy, hiCopy, hiResult);
+ Range().InsertBefore(shift, shiftBy, hiCopy, hiResult);
m_compiler->lvaIncRefCnts(hiCopy);
}
@@ -1251,12 +1337,12 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use)
// Propagate sign bit in loResult
GenTree* loShiftBy = m_compiler->gtNewIconNode(31, TYP_INT);
loResult = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, hiCopy, loShiftBy);
- Range().InsertBefore(tree, hiCopy, loShiftBy, loResult);
+ Range().InsertBefore(shift, hiCopy, loShiftBy, loResult);
// Propagate sign bit in hiResult
GenTree* shiftBy = m_compiler->gtNewIconNode(31, TYP_INT);
hiResult = m_compiler->gtNewOperNode(GT_RSH, TYP_INT, hiOp1, shiftBy);
- Range().InsertBefore(tree, shiftBy, hiOp1, hiResult);
+ Range().InsertBefore(shift, shiftBy, hiOp1, hiResult);
m_compiler->lvaIncRefCnts(hiCopy);
}
@@ -1269,15 +1355,16 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use)
unreached();
}
- // Remove tree from Range
- Range().Remove(tree);
+ // Remove shift from Range
+ Range().Remove(shift);
return FinalizeDecomposition(use, loResult, hiResult, insertAfter);
}
else
{
- // arguments are single used, but LIR call can work only with local vars.
- shiftByOp = RepresentOpAsLocalVar(shiftByOp, tree, &tree->gtOp.gtOp2);
+ // Because calls must be created as HIR and lowered to LIR, we need to dump
+ // any LIR temps into lclVars before using them as arguments.
+ shiftByOp = RepresentOpAsLocalVar(shiftByOp, shift, &shift->gtOp.gtOp2);
loOp1 = RepresentOpAsLocalVar(loOp1, gtLong, &gtLong->gtOp.gtOp1);
hiOp1 = RepresentOpAsLocalVar(hiOp1, gtLong, &gtLong->gtOp.gtOp2);
@@ -1306,16 +1393,16 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use)
GenTreeArgList* argList = m_compiler->gtNewArgList(loOp1, hiOp1, shiftByOp);
GenTree* call = m_compiler->gtNewHelperCallNode(helper, TYP_LONG, 0, argList);
- call->gtFlags |= tree->gtFlags & GTF_ALL_EFFECT;
+ call->gtFlags |= shift->gtFlags & GTF_ALL_EFFECT;
GenTreeCall* callNode = call->AsCall();
ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
retTypeDesc->InitializeLongReturnType(m_compiler);
call = m_compiler->fgMorphArgs(callNode);
- Range().InsertAfter(tree, LIR::SeqTree(m_compiler, call));
+ Range().InsertAfter(shift, LIR::SeqTree(m_compiler, call));
- Range().Remove(tree);
+ Range().Remove(shift);
use.ReplaceWith(m_compiler, call);
return call;
}
@@ -1486,19 +1573,16 @@ GenTree* DecomposeLongs::DecomposeMul(LIR::Use& use)
GenTree* op1 = tree->gtGetOp1();
GenTree* op2 = tree->gtGetOp2();
- GenTree* loOp1 = op1->gtGetOp1();
- GenTree* hiOp1 = op1->gtGetOp2();
- GenTree* loOp2 = op2->gtGetOp1();
- GenTree* hiOp2 = op2->gtGetOp2();
+ // We expect both operands to be int->long casts. DecomposeCast specifically
+ // ignores such casts when they are used by GT_MULs.
+ assert((op1->OperGet() == GT_CAST) && (op1->TypeGet() == TYP_LONG));
+ assert((op2->OperGet() == GT_CAST) && (op2->TypeGet() == TYP_LONG));
- Range().Remove(hiOp1);
- Range().Remove(hiOp2);
Range().Remove(op1);
Range().Remove(op2);
- // Get rid of the hi ops. We don't need them.
- tree->gtOp.gtOp1 = loOp1;
- tree->gtOp.gtOp2 = loOp2;
+ tree->gtOp.gtOp1 = op1->gtGetOp1();
+ tree->gtOp.gtOp2 = op2->gtGetOp1();
tree->SetOperRaw(GT_MUL_LONG);
return StoreNodeToVar(use);
diff --git a/src/jit/dll/CMakeLists.txt b/src/jit/dll/CMakeLists.txt
index 43ed07eae5..6d247fe0d0 100644
--- a/src/jit/dll/CMakeLists.txt
+++ b/src/jit/dll/CMakeLists.txt
@@ -4,32 +4,18 @@ if(CLR_CMAKE_TARGET_ARCH_ARM)
add_definitions(-DLEGACY_BACKEND)
endif(CLR_CMAKE_TARGET_ARCH_ARM)
-# Disable the following for UNIX altjit on Windows
if(CLR_CMAKE_PLATFORM_UNIX)
add_compile_options(-fPIC)
add_library_clr(clrjit_static
STATIC
${SHARED_LIB_SOURCES}
+ ${JIT_ARCH_SOURCES}
)
add_dependencies(clrjit_static coreclrpal gcinfo)
else()
add_library_clr(clrjit_static
- ${SOURCES}
+ ${SHARED_LIB_SOURCES}
+ ${JIT_ARCH_SOURCES}
)
-# Disable up to here (see above) the following for UNIX altjit on Windows
-# Enable the following for UNIX altjit on Windows
-# add_library_clr(ClrJit
-# SHARED
-# ${SHARED_LIB_SOURCES}
-# )
-
-# Enable the following for UNIX altjit on Windows
-#target_link_libraries(ClrJit
-# utilcode
-# gcinfo
-# runtime_library
-# )
-
-# Disable the following for UNIX altjit on Windows
endif(CLR_CMAKE_PLATFORM_UNIX)
diff --git a/src/jit/ee_il_dll.cpp b/src/jit/ee_il_dll.cpp
index d5705ab353..c0384f3858 100644
--- a/src/jit/ee_il_dll.cpp
+++ b/src/jit/ee_il_dll.cpp
@@ -20,7 +20,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "emit.h"
#include "corexcep.h"
-#if !defined(PLATFORM_UNIX)
+#if !defined(_HOST_UNIX_)
#include <io.h> // For _dup, _setmode
#include <fcntl.h> // For _O_TEXT
#include <errno.h> // For EINVAL
@@ -66,9 +66,9 @@ extern "C" void __stdcall jitStartup(ICorJitHost* jitHost)
assert(!JitConfig.isInitialized());
JitConfig.initialize(jitHost);
-#if defined(PLATFORM_UNIX)
+#if defined(_HOST_UNIX_)
jitstdout = procstdout();
-#else
+#else // !_HOST_UNIX_
if (jitstdout == nullptr)
{
int stdoutFd = _fileno(procstdout());
@@ -99,7 +99,7 @@ extern "C" void __stdcall jitStartup(ICorJitHost* jitHost)
{
jitstdout = procstdout();
}
-#endif // PLATFORM_UNIX
+#endif // !_HOST_UNIX_
#ifdef FEATURE_TRACELOGGING
JitTelemetry::NotifyDllProcessAttach();
@@ -136,9 +136,6 @@ extern "C" BOOL WINAPI DllMain(HANDLE hInstance, DWORD dwReason, LPVOID pvReserv
{
g_hInst = (HINSTANCE)hInstance;
DisableThreadLibraryCalls((HINSTANCE)hInstance);
-#if defined(SELF_NO_HOST) && COR_JIT_EE_VERSION <= 460
- jitStartup(JitHost::getJitHost());
-#endif
}
else if (dwReason == DLL_PROCESS_DETACH)
{
@@ -158,10 +155,6 @@ extern "C" void __stdcall sxsJitStartup(CoreClrCallbacks const& cccallbacks)
#ifndef SELF_NO_HOST
InitUtilcode(cccallbacks);
#endif
-
-#if COR_JIT_EE_VERSION <= 460
- jitStartup(JitHost::getJitHost());
-#endif
}
#endif // !FEATURE_MERGE_JIT_AND_ENGINE
@@ -286,15 +279,11 @@ CorJitResult CILJit::compileMethod(
JitFlags jitFlags;
-#if COR_JIT_EE_VERSION > 460
assert(flags == CORJIT_FLAGS::CORJIT_FLAG_CALL_GETJITFLAGS);
CORJIT_FLAGS corJitFlags;
DWORD jitFlagsSize = compHnd->getJitFlags(&corJitFlags, sizeof(corJitFlags));
assert(jitFlagsSize == sizeof(corJitFlags));
jitFlags.SetFromFlags(corJitFlags);
-#else // COR_JIT_EE_VERSION <= 460
- jitFlags.SetFromOldFlags(flags, 0);
-#endif // COR_JIT_EE_VERSION <= 460
int result;
void* methodCodePtr = nullptr;
@@ -382,11 +371,7 @@ void CILJit::getVersionIdentifier(GUID* versionIdentifier)
* Determine the maximum length of SIMD vector supported by this JIT.
*/
-#if COR_JIT_EE_VERSION > 460
unsigned CILJit::getMaxIntrinsicSIMDVectorLength(CORJIT_FLAGS cpuCompileFlags)
-#else
-unsigned CILJit::getMaxIntrinsicSIMDVectorLength(DWORD cpuCompileFlags)
-#endif
{
if (g_realJitCompiler != nullptr)
{
@@ -394,12 +379,7 @@ unsigned CILJit::getMaxIntrinsicSIMDVectorLength(DWORD cpuCompileFlags)
}
JitFlags jitFlags;
-
-#if COR_JIT_EE_VERSION > 460
jitFlags.SetFromFlags(cpuCompileFlags);
-#else // COR_JIT_EE_VERSION <= 460
- jitFlags.SetFromOldFlags(cpuCompileFlags, 0);
-#endif // COR_JIT_EE_VERSION <= 460
#ifdef FEATURE_SIMD
#ifdef _TARGET_XARCH_
@@ -409,16 +389,25 @@ unsigned CILJit::getMaxIntrinsicSIMDVectorLength(DWORD cpuCompileFlags)
{
if (JitConfig.EnableAVX() != 0)
{
- JITDUMP("getMaxIntrinsicSIMDVectorLength: returning 32\n");
+ if (GetJitTls() != nullptr && JitTls::GetCompiler() != nullptr)
+ {
+ JITDUMP("getMaxIntrinsicSIMDVectorLength: returning 32\n");
+ }
return 32;
}
}
#endif // FEATURE_AVX_SUPPORT
- JITDUMP("getMaxIntrinsicSIMDVectorLength: returning 16\n");
+ if (GetJitTls() != nullptr && JitTls::GetCompiler() != nullptr)
+ {
+ JITDUMP("getMaxIntrinsicSIMDVectorLength: returning 16\n");
+ }
return 16;
#endif // _TARGET_XARCH_
#else // !FEATURE_SIMD
- JITDUMP("getMaxIntrinsicSIMDVectorLength: returning 0\n");
+ if (GetJitTls() != nullptr && JitTls::GetCompiler() != nullptr)
+ {
+ JITDUMP("getMaxIntrinsicSIMDVectorLength: returning 0\n");
+ }
return 0;
#endif // !FEATURE_SIMD
}
@@ -1235,146 +1224,6 @@ void Compiler::eeGetSystemVAmd64PassStructInRegisterDescriptor(
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
-#if COR_JIT_EE_VERSION <= 460
-
-// Validate the token to determine whether to turn the bad image format exception into
-// verification failure (for backward compatibility)
-static bool isValidTokenForTryResolveToken(ICorJitInfo* corInfo, CORINFO_RESOLVED_TOKEN* resolvedToken)
-{
- if (!corInfo->isValidToken(resolvedToken->tokenScope, resolvedToken->token))
- return false;
-
- CorInfoTokenKind tokenType = resolvedToken->tokenType;
- switch (TypeFromToken(resolvedToken->token))
- {
- case mdtModuleRef:
- case mdtTypeDef:
- case mdtTypeRef:
- case mdtTypeSpec:
- if ((tokenType & CORINFO_TOKENKIND_Class) == 0)
- return false;
- break;
-
- case mdtMethodDef:
- case mdtMethodSpec:
- if ((tokenType & CORINFO_TOKENKIND_Method) == 0)
- return false;
- break;
-
- case mdtFieldDef:
- if ((tokenType & CORINFO_TOKENKIND_Field) == 0)
- return false;
- break;
-
- case mdtMemberRef:
- if ((tokenType & (CORINFO_TOKENKIND_Method | CORINFO_TOKENKIND_Field)) == 0)
- return false;
- break;
-
- default:
- return false;
- }
-
- return true;
-}
-
-// This type encapsulates the information necessary for `TryResolveTokenFilter` and
-// `eeTryResolveToken` below.
-struct TryResolveTokenFilterParam
-{
- ICorJitInfo* m_corInfo;
- CORINFO_RESOLVED_TOKEN* m_resolvedToken;
- EXCEPTION_POINTERS m_exceptionPointers;
- bool m_success;
-};
-
-LONG TryResolveTokenFilter(struct _EXCEPTION_POINTERS* exceptionPointers, void* theParam)
-{
- assert(exceptionPointers->ExceptionRecord->ExceptionCode != SEH_VERIFICATION_EXCEPTION);
-
- // Backward compatibility: Convert bad image format exceptions thrown by the EE while resolving token to
- // verification exceptions if we are verifying. Verification exceptions will cause the JIT of the basic block to
- // fail, but the JITing of the whole method is still going to succeed. This is done for backward compatibility only.
- // Ideally, we would always treat bad tokens in the IL stream as fatal errors.
- if (exceptionPointers->ExceptionRecord->ExceptionCode == EXCEPTION_COMPLUS)
- {
- auto* param = reinterpret_cast<TryResolveTokenFilterParam*>(theParam);
- if (!isValidTokenForTryResolveToken(param->m_corInfo, param->m_resolvedToken))
- {
- param->m_exceptionPointers = *exceptionPointers;
- return param->m_corInfo->FilterException(exceptionPointers);
- }
- }
-
- return EXCEPTION_CONTINUE_SEARCH;
-}
-
-bool Compiler::eeTryResolveToken(CORINFO_RESOLVED_TOKEN* resolvedToken)
-{
- TryResolveTokenFilterParam param;
- param.m_corInfo = info.compCompHnd;
- param.m_resolvedToken = resolvedToken;
- param.m_success = true;
-
- PAL_TRY(TryResolveTokenFilterParam*, pParam, &param)
- {
- pParam->m_corInfo->resolveToken(pParam->m_resolvedToken);
- }
- PAL_EXCEPT_FILTER(TryResolveTokenFilter)
- {
- if (param.m_exceptionPointers.ExceptionRecord->ExceptionCode == EXCEPTION_COMPLUS)
- {
- param.m_corInfo->HandleException(&param.m_exceptionPointers);
- }
-
- param.m_success = false;
- }
- PAL_ENDTRY
-
- return param.m_success;
-}
-
-struct TrapParam
-{
- ICorJitInfo* m_corInfo;
- EXCEPTION_POINTERS m_exceptionPointers;
-
- void (*m_function)(void*);
- void* m_param;
- bool m_success;
-};
-
-static LONG __EEFilter(PEXCEPTION_POINTERS exceptionPointers, void* param)
-{
- auto* trapParam = reinterpret_cast<TrapParam*>(param);
- trapParam->m_exceptionPointers = *exceptionPointers;
- return trapParam->m_corInfo->FilterException(exceptionPointers);
-}
-
-bool Compiler::eeRunWithErrorTrapImp(void (*function)(void*), void* param)
-{
- TrapParam trapParam;
- trapParam.m_corInfo = info.compCompHnd;
- trapParam.m_function = function;
- trapParam.m_param = param;
- trapParam.m_success = true;
-
- PAL_TRY(TrapParam*, __trapParam, &trapParam)
- {
- __trapParam->m_function(__trapParam->m_param);
- }
- PAL_EXCEPT_FILTER(__EEFilter)
- {
- trapParam.m_corInfo->HandleException(&trapParam.m_exceptionPointers);
- trapParam.m_success = false;
- }
- PAL_ENDTRY
-
- return trapParam.m_success;
-}
-
-#else // CORJIT_EE_VER <= 460
-
bool Compiler::eeTryResolveToken(CORINFO_RESOLVED_TOKEN* resolvedToken)
{
return info.compCompHnd->tryResolveToken(resolvedToken);
@@ -1385,8 +1234,6 @@ bool Compiler::eeRunWithErrorTrapImp(void (*function)(void*), void* param)
return info.compCompHnd->runWithErrorTrap(function, param);
}
-#endif // CORJIT_EE_VER > 460
-
/*****************************************************************************
*
* Utility functions
diff --git a/src/jit/ee_il_dll.hpp b/src/jit/ee_il_dll.hpp
index 3899d92192..0565d6f561 100644
--- a/src/jit/ee_il_dll.hpp
+++ b/src/jit/ee_il_dll.hpp
@@ -21,11 +21,7 @@ class CILJit : public ICorJitCompiler
void getVersionIdentifier(GUID* versionIdentifier /* OUT */
);
-#if COR_JIT_EE_VERSION > 460
unsigned getMaxIntrinsicSIMDVectorLength(CORJIT_FLAGS cpuCompileFlags);
-#else
- unsigned getMaxIntrinsicSIMDVectorLength(DWORD cpuCompileFlags);
-#endif
void setRealJit(ICorJitCompiler* realJitCompiler);
};
diff --git a/src/jit/emit.cpp b/src/jit/emit.cpp
index 1e566b2e76..3b765b9db2 100644
--- a/src/jit/emit.cpp
+++ b/src/jit/emit.cpp
@@ -1027,7 +1027,7 @@ void emitter::emitBegFN(bool hasFramePtr
emitPlaceholderList = emitPlaceholderLast = nullptr;
#ifdef JIT32_GCENCODER
- emitEpilogList = emitEpilogLast = NULL;
+ emitEpilogList = emitEpilogLast = nullptr;
#endif // JIT32_GCENCODER
/* We don't have any jumps */
@@ -1215,14 +1215,12 @@ size_t emitter::emitGenEpilogLst(size_t (*fp)(void*, unsigned), void* cp)
EpilogList* el;
size_t sz;
- for (el = emitEpilogList, sz = 0; el; el = el->elNext)
+ for (el = emitEpilogList, sz = 0; el != nullptr; el = el->elNext)
{
- assert(el->elIG->igFlags & IGF_EPILOG);
+ assert(el->elLoc.GetIG()->igFlags & IGF_EPILOG);
- UNATIVE_OFFSET ofs =
- el->elIG->igOffs; // The epilog starts at the beginning of the IG, so the IG offset is correct
-
- sz += fp(cp, ofs);
+ // The epilog starts at the location recorded in the epilog list.
+ sz += fp(cp, el->elLoc.CodeOffset(this));
}
return sz;
@@ -1383,7 +1381,6 @@ void* emitter::emitAllocInstr(size_t sz, emitAttr opsz)
id->idOpSize(EA_SIZE(opsz));
}
-#if RELOC_SUPPORT
// Amd64: ip-relative addressing is supported even when not generating relocatable ngen code
if (EA_IS_DSP_RELOC(opsz)
#ifndef _TARGET_AMD64_
@@ -1402,7 +1399,6 @@ void* emitter::emitAllocInstr(size_t sz, emitAttr opsz)
/* instruction has an immediate constant that is relocatable */
id->idSetIsCnsReloc();
}
-#endif
#if EMITTER_STATS
emitTotalInsCnt++;
@@ -1957,22 +1953,20 @@ void emitter::emitBegFnEpilog(insGroup* igPh)
#ifdef JIT32_GCENCODER
- EpilogList* el = new (emitComp, CMK_GC) EpilogList;
- el->elNext = NULL;
- el->elIG = emitCurIG;
+ EpilogList* el = new (emitComp, CMK_GC) EpilogList();
- if (emitEpilogLast)
+ if (emitEpilogLast != nullptr)
+ {
emitEpilogLast->elNext = el;
+ }
else
+ {
emitEpilogList = el;
+ }
emitEpilogLast = el;
#endif // JIT32_GCENCODER
-
- /* Remember current position so that we can compute total epilog size */
-
- emitEpilogBegLoc.CaptureLocation(this);
}
/*****************************************************************************
@@ -1984,22 +1978,17 @@ void emitter::emitEndFnEpilog()
{
emitEndPrologEpilog();
- UNATIVE_OFFSET newSize;
- UNATIVE_OFFSET epilogBegCodeOffset = emitEpilogBegLoc.CodeOffset(this);
-#ifdef _TARGET_XARCH_
- UNATIVE_OFFSET epilogExitSeqStartCodeOffset = emitExitSeqBegLoc.CodeOffset(this);
-#else
- UNATIVE_OFFSET epilogExitSeqStartCodeOffset = emitCodeOffset(emitCurIG, emitCurOffset());
-#endif
-
- newSize = epilogExitSeqStartCodeOffset - epilogBegCodeOffset;
+#ifdef JIT32_GCENCODER
+ assert(emitEpilogLast != nullptr);
-#ifdef _TARGET_X86_
+ UNATIVE_OFFSET epilogBegCodeOffset = emitEpilogLast->elLoc.CodeOffset(this);
+ UNATIVE_OFFSET epilogExitSeqStartCodeOffset = emitExitSeqBegLoc.CodeOffset(this);
+ UNATIVE_OFFSET newSize = epilogExitSeqStartCodeOffset - epilogBegCodeOffset;
/* Compute total epilog size */
-
assert(emitEpilogSize == 0 || emitEpilogSize == newSize); // All epilogs must be identical
- emitEpilogSize = newSize;
+ emitEpilogSize = newSize;
+
UNATIVE_OFFSET epilogEndCodeOffset = emitCodeOffset(emitCurIG, emitCurOffset());
assert(epilogExitSeqStartCodeOffset != epilogEndCodeOffset);
@@ -2019,8 +2008,7 @@ void emitter::emitEndFnEpilog()
);
emitExitSeqSize = newSize;
}
-
-#endif // _TARGET_X86_
+#endif // JIT32_GCENCODER
}
#if FEATURE_EH_FUNCLETS
@@ -2069,6 +2057,16 @@ void emitter::emitEndFuncletEpilog()
#ifdef JIT32_GCENCODER
+//
+// emitter::emitStartEpilog:
+// Mark the current position so that we can later compute the total epilog size.
+//
+void emitter::emitStartEpilog()
+{
+ assert(emitEpilogLast != nullptr);
+ emitEpilogLast->elLoc.CaptureLocation(this);
+}
+
/*****************************************************************************
*
* Return non-zero if the current method only has one epilog, which is
@@ -4233,7 +4231,7 @@ void emitter::emitCheckFuncletBranch(instrDesc* jmp, insGroup* jmpIG)
// meets one of those criteria...
assert(jmp->idIsBound());
-#ifdef _TARGET_AMD64_
+#ifdef _TARGET_XARCH_
// An lea of a code address (for constant data stored with the code)
// is treated like a jump for emission purposes but is not really a jump so
// we don't have to check anything here.
@@ -4405,6 +4403,12 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
emitFullyInt = fullyInt;
emitFullGCinfo = fullPtrMap;
+#ifndef UNIX_X86_ABI
+ emitFullArgInfo = !emitHasFramePtr;
+#else
+ emitFullArgInfo = fullPtrMap;
+#endif
+
#if EMITTER_STATS
GCrefsTable.record(emitGCrFrameOffsCnt);
emitSizeTable.record(static_cast<unsigned>(emitSizeMethod));
@@ -4419,7 +4423,10 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
#if EMIT_TRACK_STACK_DEPTH
/* Convert max. stack depth from # of bytes to # of entries */
- emitMaxStackDepth /= sizeof(int);
+ unsigned maxStackDepthIn4ByteElements = emitMaxStackDepth / sizeof(int);
+ JITDUMP("Converting emitMaxStackDepth from bytes (%d) to elements (%d)\n", emitMaxStackDepth,
+ maxStackDepthIn4ByteElements);
+ emitMaxStackDepth = maxStackDepthIn4ByteElements;
/* Should we use the simple stack */
@@ -4499,7 +4506,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
//
if (emitComp->fgHaveProfileData())
{
- if (emitComp->fgCalledWeight > (BB_VERY_HOT_WEIGHT * emitComp->fgNumProfileRuns))
+ if (emitComp->fgCalledCount > (BB_VERY_HOT_WEIGHT * emitComp->fgProfileRunsCount()))
{
allocMemFlag = CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN;
}
@@ -6814,7 +6821,7 @@ void emitter::emitStackPushLargeStk(BYTE* addr, GCtype gcType, unsigned count)
*u2.emitArgTrackTop++ = (BYTE)gcType;
assert(u2.emitArgTrackTop <= u2.emitArgTrackTab + emitMaxStackDepth);
- if (!emitHasFramePtr || needsGC(gcType))
+ if (emitFullArgInfo || needsGC(gcType))
{
if (emitFullGCinfo)
{
@@ -6886,7 +6893,7 @@ void emitter::emitStackPopLargeStk(BYTE* addr, bool isCall, unsigned char callIn
// This is an "interesting" argument
- if (!emitHasFramePtr || needsGC(gcType))
+ if (emitFullArgInfo || needsGC(gcType))
{
argRecCnt += 1;
}
@@ -7034,7 +7041,7 @@ void emitter::emitStackKillArgs(BYTE* addr, unsigned count, unsigned char callIn
/* We're about to kill the corresponding (pointer) arg records */
- if (emitHasFramePtr)
+ if (!emitFullArgInfo)
{
u2.emitGcArgTrackCnt -= gcCnt.Value();
}
diff --git a/src/jit/emit.h b/src/jit/emit.h
index f57cc0a0f7..e1c924f467 100644
--- a/src/jit/emit.h
+++ b/src/jit/emit.h
@@ -738,21 +738,13 @@ protected:
// arm64: 48 bits
CLANG_FORMAT_COMMENT_ANCHOR;
-#ifdef RELOC_SUPPORT
-
unsigned _idCnsReloc : 1; // LargeCns is an RVA and needs reloc tag
unsigned _idDspReloc : 1; // LargeDsp is an RVA and needs reloc tag
#define ID_EXTRA_RELOC_BITS (2)
-#else // RELOC_SUPPORT
-
-#define ID_EXTRA_RELOC_BITS (0)
-
-#endif // RELOC_SUPPORT
-
////////////////////////////////////////////////////////////////////////
- // Space taken up to here (assuming RELOC_SUPPORT):
+ // Space taken up to here:
// x86: 40 bits
// amd64: 48 bits
// arm: 50 bits
@@ -768,7 +760,7 @@ protected:
#define ID_MAX_SMALL_CNS (int)((1 << ID_BIT_SMALL_CNS) - 1U)
////////////////////////////////////////////////////////////////////////
- // Small constant size (assuming RELOC_SUPPORT):
+ // Small constant size:
// x86: 24 bits
// amd64: 16 bits
// arm: 14 bits
@@ -777,7 +769,7 @@ protected:
unsigned _idSmallCns : ID_BIT_SMALL_CNS;
////////////////////////////////////////////////////////////////////////
- // Space taken up to here (with RELOC_SUPPORT): 64 bits, all architectures, by design.
+ // Space taken up to here: 64 bits, all architectures, by design.
////////////////////////////////////////////////////////////////////////
CLANG_FORMAT_COMMENT_ANCHOR;
@@ -829,23 +821,13 @@ protected:
#define ID_EXTRA_BITFIELD_BITS (7)
-//
-// For x86, we are using 7 bits from the second DWORD for bitfields.
-//
-
-#ifdef RELOC_SUPPORT
+ //
+ // For x86, we are using 7 bits from the second DWORD for bitfields.
+ //
unsigned _idCnsReloc : 1; // LargeCns is an RVA and needs reloc tag
unsigned _idDspReloc : 1; // LargeDsp is an RVA and needs reloc tag
-#define ID_EXTRA_RELOC_BITS (2)
-
-#else // RELOC_SUPPORT
-
-#define ID_EXTRA_RELOC_BITS (0)
-
-#endif // RELOC_SUPPORT
-
#define ID_EXTRA_REG_BITS (0)
#define ID_EXTRA_BITS (ID_EXTRA_BITFIELD_BITS + ID_EXTRA_RELOC_BITS + ID_EXTRA_REG_BITS)
@@ -856,7 +838,7 @@ protected:
#define ID_MIN_SMALL_CNS 0
#define ID_MAX_SMALL_CNS (int)((1 << ID_BIT_SMALL_CNS) - 1U)
- // For x86 (assuming RELOC_SUPPORT) we have 23 bits remaining for the
+ // For x86 we have 23 bits remaining for the
// small constant in this extra DWORD.
unsigned _idSmallCns : ID_BIT_SMALL_CNS;
@@ -1283,8 +1265,6 @@ protected:
}
#endif // defined(_TARGET_ARM_)
-#ifdef RELOC_SUPPORT
-
bool idIsCnsReloc() const
{
assert(!idIsTiny());
@@ -1311,8 +1291,6 @@ protected:
return idIsDspReloc() || idIsCnsReloc();
}
-#endif
-
unsigned idSmallCns() const
{
assert(!idIsTiny());
@@ -1518,14 +1496,20 @@ protected:
// IG of the epilog, and use it to find the epilog offset at the end of code generation.
struct EpilogList
{
- EpilogList* elNext;
- insGroup* elIG;
+ EpilogList* elNext;
+ emitLocation elLoc;
+
+ EpilogList() : elNext(nullptr), elLoc()
+ {
+ }
};
EpilogList* emitEpilogList; // per method epilog list - head
EpilogList* emitEpilogLast; // per method epilog list - tail
public:
+ void emitStartEpilog();
+
bool emitHasEpilogEnd();
size_t emitGenEpilogLst(size_t (*fp)(void*, unsigned), void* cp);
@@ -1535,8 +1519,6 @@ public:
void emitBegPrologEpilog(insGroup* igPh);
void emitEndPrologEpilog();
- emitLocation emitEpilogBegLoc;
-
void emitBegFnEpilog(insGroup* igPh);
void emitEndFnEpilog();
@@ -2036,8 +2018,9 @@ public:
/* The following logic keeps track of live GC ref values */
/************************************************************************/
- bool emitFullGCinfo; // full GC pointer maps?
- bool emitFullyInt; // fully interruptible code?
+ bool emitFullArgInfo; // full arg info (including non-ptr arg)?
+ bool emitFullGCinfo; // full GC pointer maps?
+ bool emitFullyInt; // fully interruptible code?
#if EMIT_TRACK_STACK_DEPTH
unsigned emitCntStackDepth; // 0 in prolog/epilog, One DWORD elsewhere
diff --git a/src/jit/emitarm.cpp b/src/jit/emitarm.cpp
index 1b3ef1bdc7..53ee88b3a2 100644
--- a/src/jit/emitarm.cpp
+++ b/src/jit/emitarm.cpp
@@ -1380,7 +1380,7 @@ DONE:
/*****************************************************************************
*
- * emitIns_valid_imm_for_add() returns true when the immediate 'imm'
+ * emitins_valid_imm_for_add() returns true when the immediate 'imm'
* can be encoded using a single add or sub instruction.
*/
/*static*/ bool emitter::emitIns_valid_imm_for_add(int imm, insFlags flags)
@@ -1396,6 +1396,20 @@ DONE:
/*****************************************************************************
*
+ * emitins_valid_imm_for_cmp() returns true if this 'imm'
+ * can be encoded as a input operand to an cmp instruction.
+ */
+/*static*/ bool emitter::emitIns_valid_imm_for_cmp(int imm, insFlags flags)
+{
+ if (isModImmConst(imm)) // funky arm immediate
+ return true;
+ if (isModImmConst(-imm)) // funky arm immediate via sub
+ return true;
+ return false;
+}
+
+/*****************************************************************************
+ *
* emitIns_valid_imm_for_add_sp() returns true when the immediate 'imm'
* can be encoded in "add Rd,SP,i10".
*/
@@ -1408,6 +1422,20 @@ DONE:
/*****************************************************************************
*
+ * emitIns_valid_imm_for_ldst_offset() returns true when the immediate 'imm'
+ * can be encoded as the offset in a ldr/str instruction.
+ */
+/*static*/ bool emitter::emitIns_valid_imm_for_ldst_offset(int imm, emitAttr size)
+{
+ if ((imm & 0x0fff) == imm)
+ return true; // encodable using IF_T2_K1
+ if (unsigned_abs(imm) <= 0x0ff)
+ return true; // encodable using IF_T2_H0
+ return false;
+}
+
+/*****************************************************************************
+ *
* Add an instruction with no operands.
*/
@@ -4289,14 +4317,12 @@ void emitter::emitIns_R_D(instruction ins, emitAttr attr, unsigned offs, regNumb
id->idInsFmt(fmt);
id->idInsSize(isz);
-#if RELOC_SUPPORT
if (emitComp->opts.compReloc)
{
// Set the relocation flags - these give hint to zap to perform
// relocation of the specified 32bit address.
id->idSetRelocFlags(attr);
}
-#endif // RELOC_SUPPORT
dispIns(id);
appendToCurIG(id);
@@ -4579,7 +4605,6 @@ void emitter::emitIns_Call(EmitCallType callType,
id->idSetIsCallAddr();
}
-#if RELOC_SUPPORT
if (emitComp->opts.compReloc)
{
// Since this is an indirect call through a pointer and we don't
@@ -4588,7 +4613,6 @@ void emitter::emitIns_Call(EmitCallType callType,
id->idSetIsDspReloc();
}
-#endif
}
#ifdef DEBUG
@@ -5254,7 +5278,6 @@ BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i)
else if (fmt == IF_T2_J2)
{
assert((distVal & 1) == 0);
-#ifdef RELOC_SUPPORT
if (emitComp->opts.compReloc && emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
{
// dst isn't an actual final target location, just some intermediate
@@ -5263,7 +5286,6 @@ BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i)
// rely on the relocation to do all the work
}
else
-#endif
{
assert(distVal >= CALL_DIST_MAX_NEG);
assert(distVal <= CALL_DIST_MAX_POS);
@@ -5290,7 +5312,6 @@ BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i)
unsigned instrSize = emitOutput_Thumb2Instr(dst, code);
-#ifdef RELOC_SUPPORT
if (emitComp->opts.compReloc)
{
if (emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
@@ -5303,7 +5324,6 @@ BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i)
}
}
}
-#endif // RELOC_SUPPORT
dst += instrSize;
}
@@ -5968,9 +5988,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
assert(!id->idIsLclVar());
assert((ins == INS_movw) || (ins == INS_movt));
imm += (size_t)emitConsBlock;
-#ifdef RELOC_SUPPORT
if (!id->idIsCnsReloc() && !id->idIsDspReloc())
-#endif
{
goto SPLIT_IMM;
}
@@ -5988,7 +6006,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
}
}
-#ifdef RELOC_SUPPORT
if (id->idIsCnsReloc() || id->idIsDspReloc())
{
assert((ins == INS_movt) || (ins == INS_movw));
@@ -5997,7 +6014,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
emitRecordRelocation((void*)(dst - 8), (void*)imm, IMAGE_REL_BASED_THUMB_MOV32);
}
else
-#endif // RELOC_SUPPORT
{
assert((imm & 0x0000ffff) == imm);
code |= (imm & 0x00ff);
@@ -6220,7 +6236,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
}
code = emitInsCode(ins, fmt);
-#ifdef RELOC_SUPPORT
if (id->idIsDspReloc())
{
callInstrSize = SafeCvtAssert<unsigned char>(emitOutput_Thumb2Instr(dst, code));
@@ -6229,7 +6244,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
emitRecordRelocation((void*)(dst - 4), addr, IMAGE_REL_BASED_THUMB_BRANCH24);
}
else
-#endif // RELOC_SUPPORT
{
addr = (BYTE*)((size_t)addr & ~1); // Clear the lowest bit from target address
@@ -6935,14 +6949,12 @@ void emitter::emitDispInsHelp(
{
if (emitComp->opts.disDiffable)
imm = 0xD1FF;
-#if RELOC_SUPPORT
if (id->idIsCnsReloc() || id->idIsDspReloc())
{
if (emitComp->opts.disDiffable)
imm = 0xD1FFAB1E;
printf("%s RELOC ", (id->idIns() == INS_movw) ? "LOW" : "HIGH");
}
-#endif // RELOC_SUPPORT
}
emitDispImm(imm, false, (fmt == IF_T2_N));
break;
@@ -6973,12 +6985,10 @@ void emitter::emitDispInsHelp(
assert(jdsc != NULL);
-#ifdef RELOC_SUPPORT
if (id->idIsDspReloc())
{
printf("reloc ");
}
-#endif
printf("%s ADDRESS J_M%03u_DS%02u", (id->idIns() == INS_movw) ? "LOW" : "HIGH",
Compiler::s_compMethodsCount, imm);
@@ -7528,89 +7538,115 @@ void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm)
#ifndef LEGACY_BACKEND
-// this is very similar to emitInsBinary and probably could be folded in to same
-// except the requirements on the incoming parameter are different,
-// ex: the memory op in storeind case must NOT be contained
-void emitter::emitInsMov(instruction ins, emitAttr attr, GenTree* node)
+void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir)
{
- switch (node->OperGet())
+ GenTree* addr = indir->Addr();
+ GenTree* data = indir->gtOp.gtOp2;
+
+ if (addr->isContained())
{
- case GT_IND:
- case GT_STOREIND:
- {
- GenTreeIndir* indir = node->AsIndir();
- GenTree* addr = indir->Addr();
- GenTree* data = indir->gtOp.gtOp2;
+ assert(addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_LEA);
- regNumber reg = (node->OperGet() == GT_IND) ? node->gtRegNum : data->gtRegNum;
+ int offset = 0;
+ DWORD lsl = 0;
- if (addr->isContained())
+ if (addr->OperGet() == GT_LEA)
+ {
+ offset = (int)addr->AsAddrMode()->gtOffset;
+ if (addr->AsAddrMode()->gtScale > 0)
{
- assert(addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_LEA);
+ assert(isPow2(addr->AsAddrMode()->gtScale));
+ BitScanForward(&lsl, addr->AsAddrMode()->gtScale);
+ }
+ }
+
+ GenTree* memBase = indir->Base();
+
+ if (indir->HasIndex())
+ {
+ GenTree* index = indir->Index();
- int offset = 0;
- DWORD lsl = 0;
+ if (offset != 0)
+ {
+ regMaskTP tmpRegMask = indir->gtRsvdRegs;
+ regNumber tmpReg = genRegNumFromMask(tmpRegMask);
+ noway_assert(tmpReg != REG_NA);
- if (addr->OperGet() == GT_LEA)
+ if (emitIns_valid_imm_for_add(offset, INS_FLAGS_DONT_CARE))
{
- offset = (int)addr->AsAddrMode()->gtOffset;
- if (addr->AsAddrMode()->gtScale > 0)
+ if (lsl > 0)
{
- assert(isPow2(addr->AsAddrMode()->gtScale));
- BitScanForward(&lsl, addr->AsAddrMode()->gtScale);
+ // Generate code to set tmpReg = base + index*scale
+ emitIns_R_R_R_I(INS_add, EA_PTRSIZE, tmpReg, memBase->gtRegNum, index->gtRegNum, lsl,
+ INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
+ }
+ else // no scale
+ {
+ // Generate code to set tmpReg = base + index
+ emitIns_R_R_R(INS_add, EA_PTRSIZE, tmpReg, memBase->gtRegNum, index->gtRegNum);
}
- }
- GenTree* memBase = indir->Base();
+ noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg));
- if (indir->HasIndex())
- {
- NYI_ARM("emitInsMov HasIndex");
+ // Then load/store dataReg from/to [tmpReg + offset]
+ emitIns_R_R_I(ins, attr, dataReg, tmpReg, offset);
}
- else
+ else // large offset
{
- // TODO check offset is valid for encoding
- emitIns_R_R_I(ins, attr, reg, memBase->gtRegNum, offset);
+ // First load/store tmpReg with the large offset constant
+ codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
+ // Then add the base register
+ // rd = rd + base
+ emitIns_R_R_R(INS_add, EA_PTRSIZE, tmpReg, tmpReg, memBase->gtRegNum);
+
+ noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg));
+ noway_assert(tmpReg != index->gtRegNum);
+
+ // Then load/store dataReg from/to [tmpReg + index*scale]
+ emitIns_R_R_R_I(ins, attr, dataReg, tmpReg, index->gtRegNum, lsl, INS_FLAGS_DONT_CARE,
+ INS_OPTS_LSL);
}
}
- else
+ else // (offset == 0)
{
- if (addr->OperGet() == GT_CLS_VAR_ADDR)
+ if (lsl > 0)
{
- emitIns_C_R(ins, attr, addr->gtClsVar.gtClsVarHnd, data->gtRegNum, 0);
+ // Then load/store dataReg from/to [memBase + index*scale]
+ emitIns_R_R_R_I(ins, attr, dataReg, memBase->gtRegNum, index->gtRegNum, lsl, INS_FLAGS_DONT_CARE,
+ INS_OPTS_LSL);
}
- else
+ else // no scale
{
- emitIns_R_R(ins, attr, reg, addr->gtRegNum);
+ // Then load/store dataReg from/to [memBase + index]
+ emitIns_R_R_R(ins, attr, dataReg, memBase->gtRegNum, index->gtRegNum);
}
}
}
- break;
-
- case GT_STORE_LCL_VAR:
+ else // no Index
{
- GenTreeLclVarCommon* varNode = node->AsLclVarCommon();
-
- GenTree* data = node->gtOp.gtOp1->gtEffectiveVal();
- codeGen->inst_set_SV_var(varNode);
- assert(varNode->gtRegNum == REG_NA); // stack store
-
- if (data->isContainedIntOrIImmed())
+ if (emitIns_valid_imm_for_ldst_offset(offset, attr))
{
- emitIns_S_I(ins, attr, varNode->GetLclNum(), 0, (int)data->AsIntConCommon()->IconValue());
- codeGen->genUpdateLife(varNode);
+ // Then load/store dataReg from/to [memBase + offset]
+ emitIns_R_R_I(ins, attr, dataReg, memBase->gtRegNum, offset);
}
else
{
- assert(!data->isContained());
- emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0);
- codeGen->genUpdateLife(varNode);
+ // We require a tmpReg to hold the offset
+ regMaskTP tmpRegMask = indir->gtRsvdRegs;
+ regNumber tmpReg = genRegNumFromMask(tmpRegMask);
+ noway_assert(tmpReg != REG_NA);
+
+ // First load/store tmpReg with the large offset constant
+ codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
+
+ // Then load/store dataReg from/to [memBase + tmpReg]
+ emitIns_R_R_R(ins, attr, dataReg, memBase->gtRegNum, tmpReg);
}
}
- return;
-
- default:
- unreached();
+ }
+ else
+ {
+ emitIns_R_R(ins, attr, dataReg, addr->gtRegNum);
}
}
@@ -7646,5 +7682,174 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
}
}
+regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src1, GenTree* src2)
+{
+ regNumber result = REG_NA;
+
+ // dst can only be a reg
+ assert(!dst->isContained());
+
+ // find immed (if any) - it cannot be a dst
+ // Only one src can be an int.
+ GenTreeIntConCommon* intConst = nullptr;
+ GenTree* nonIntReg = nullptr;
+
+ if (varTypeIsFloating(dst))
+ {
+ // src1 can only be a reg
+ assert(!src1->isContained());
+ // src2 can only be a reg
+ assert(!src2->isContained());
+ }
+ else // not floating point
+ {
+ // src2 can be immed or reg
+ assert(!src2->isContained() || src2->isContainedIntOrIImmed());
+
+ // Check src2 first as we can always allow it to be a contained immediate
+ if (src2->isContainedIntOrIImmed())
+ {
+ intConst = src2->AsIntConCommon();
+ nonIntReg = src1;
+ }
+ // Only for commutative operations do we check src1 and allow it to be a contained immediate
+ else if (dst->OperIsCommutative())
+ {
+ // src1 can be immed or reg
+ assert(!src1->isContained() || src1->isContainedIntOrIImmed());
+
+ // Check src1 and allow it to be a contained immediate
+ if (src1->isContainedIntOrIImmed())
+ {
+ assert(!src2->isContainedIntOrIImmed());
+ intConst = src1->AsIntConCommon();
+ nonIntReg = src2;
+ }
+ }
+ else
+ {
+ // src1 can only be a reg
+ assert(!src1->isContained());
+ }
+ }
+ bool isMulOverflow = false;
+ bool isUnsignedMul = false;
+ regNumber extraReg = REG_NA;
+ if (dst->gtOverflowEx())
+ {
+ NYI_ARM("emitInsTernary overflow");
+#if 0
+ if (ins == INS_add)
+ {
+ ins = INS_adds;
+ }
+ else if (ins == INS_sub)
+ {
+ ins = INS_subs;
+ }
+ else if (ins == INS_mul)
+ {
+ isMulOverflow = true;
+ isUnsignedMul = ((dst->gtFlags & GTF_UNSIGNED) != 0);
+ assert(intConst == nullptr); // overflow format doesn't support an int constant operand
+ }
+ else
+ {
+ assert(!"Invalid ins for overflow check");
+ }
+#endif
+ }
+ if (intConst != nullptr)
+ {
+ emitIns_R_R_I(ins, attr, dst->gtRegNum, nonIntReg->gtRegNum, intConst->IconValue());
+ }
+ else
+ {
+ if (isMulOverflow)
+ {
+ NYI_ARM("emitInsTernary overflow");
+#if 0
+ // Make sure that we have an internal register
+ assert(genCountBits(dst->gtRsvdRegs) == 2);
+
+ // There will be two bits set in tmpRegsMask.
+ // Remove the bit for 'dst->gtRegNum' from 'tmpRegsMask'
+ regMaskTP tmpRegsMask = dst->gtRsvdRegs & ~genRegMask(dst->gtRegNum);
+ assert(tmpRegsMask != RBM_NONE);
+ regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask
+ extraReg = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask
+
+ if (isUnsignedMul)
+ {
+ if (attr == EA_4BYTE)
+ {
+ // Compute 8 byte results from 4 byte by 4 byte multiplication.
+ emitIns_R_R_R(INS_umull, EA_8BYTE, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+
+ // Get the high result by shifting dst.
+ emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->gtRegNum, 32);
+ }
+ else
+ {
+ assert(attr == EA_8BYTE);
+ // Compute the high result.
+ emitIns_R_R_R(INS_umulh, attr, extraReg, src1->gtRegNum, src2->gtRegNum);
+
+ // Now multiply without skewing the high result.
+ emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+ }
+
+ // zero-sign bit comparision to detect overflow.
+ emitIns_R_I(INS_cmp, attr, extraReg, 0);
+ }
+ else
+ {
+ int bitShift = 0;
+ if (attr == EA_4BYTE)
+ {
+ // Compute 8 byte results from 4 byte by 4 byte multiplication.
+ emitIns_R_R_R(INS_smull, EA_8BYTE, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+
+ // Get the high result by shifting dst.
+ emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->gtRegNum, 32);
+
+ bitShift = 31;
+ }
+ else
+ {
+ assert(attr == EA_8BYTE);
+ // Save the high result in a temporary register.
+ emitIns_R_R_R(INS_smulh, attr, extraReg, src1->gtRegNum, src2->gtRegNum);
+
+ // Now multiply without skewing the high result.
+ emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+
+ bitShift = 63;
+ }
+
+ // Sign bit comparision to detect overflow.
+ emitIns_R_R_I(INS_cmp, attr, extraReg, dst->gtRegNum, bitShift, INS_OPTS_ASR);
+ }
+#endif
+ }
+ else
+ {
+ // We can just multiply.
+ emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+ }
+ }
+
+ if (dst->gtOverflowEx())
+ {
+ NYI_ARM("emitInsTernary overflow");
+#if 0
+ assert(!varTypeIsFloating(dst));
+ codeGen->genCheckOverflow(dst);
+#endif
+ }
+
+ return dst->gtRegNum;
+}
+
#endif // !LEGACY_BACKEND
#endif // defined(_TARGET_ARM_)
diff --git a/src/jit/emitarm.h b/src/jit/emitarm.h
index 1440148f42..1e286e8425 100644
--- a/src/jit/emitarm.h
+++ b/src/jit/emitarm.h
@@ -10,10 +10,8 @@
struct CnsVal
{
- int cnsVal;
-#ifdef RELOC_SUPPORT
+ int cnsVal;
bool cnsReloc;
-#endif
};
insSize emitInsSize(insFormat insFmt);
@@ -109,6 +107,10 @@ bool emitInsIsLoad(instruction ins);
bool emitInsIsStore(instruction ins);
bool emitInsIsLoadOrStore(instruction ins);
+// Generate code for a load or store operation and handle the case
+// of contained GT_LEA op1 with [base + index<<scale + offset]
+void emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir);
+
/*****************************************************************************
*
* Convert between an index scale in bytes to a smaller encoding used for
@@ -230,6 +232,13 @@ inline static bool insOptsROR(insOpts opt)
return (opt == INS_OPTS_ROR);
}
+// Returns the number of bits used by the given 'size'.
+inline static unsigned getBitWidth(emitAttr size)
+{
+ assert(size <= EA_8BYTE);
+ return (unsigned)size * BITS_PER_BYTE;
+}
+
/************************************************************************/
/* The public entry points to output instructions */
/************************************************************************/
@@ -239,7 +248,9 @@ static bool emitIns_valid_imm_for_alu(int imm);
static bool emitIns_valid_imm_for_mov(int imm);
static bool emitIns_valid_imm_for_small_mov(regNumber reg, int imm, insFlags flags);
static bool emitIns_valid_imm_for_add(int imm, insFlags flags);
+static bool emitIns_valid_imm_for_cmp(int imm, insFlags flags);
static bool emitIns_valid_imm_for_add_sp(int imm);
+static bool emitIns_valid_imm_for_ldst_offset(int imm, emitAttr size);
void emitIns(instruction ins);
diff --git a/src/jit/emitarm64.cpp b/src/jit/emitarm64.cpp
index dd4bac808a..93994e7918 100644
--- a/src/jit/emitarm64.cpp
+++ b/src/jit/emitarm64.cpp
@@ -6697,12 +6697,8 @@ void emitter::emitIns_Call(EmitCallType callType,
{
assert(emitNoGChelper(Compiler::eeGetHelperNum(methHnd)));
- // This call will preserve the liveness of most registers
- //
- // - On the ARM64 the NOGC helpers will preserve all registers,
- // except for those listed in the RBM_CALLEE_TRASH_NOGC mask
-
- savedSet = RBM_ALLINT & ~RBM_CALLEE_TRASH_NOGC;
+ // Get the set of registers that this call kills and remove it from the saved set.
+ savedSet = RBM_ALLINT & ~emitComp->compNoGCHelperCallKillSet(Compiler::eeGetHelperNum(methHnd));
// In case of Leave profiler callback, we need to preserve liveness of REG_PROFILER_RET_SCRATCH
if (isProfLeaveCB)
@@ -6842,12 +6838,10 @@ void emitter::emitIns_Call(EmitCallType callType,
id->idSetIsCallAddr();
}
-#if RELOC_SUPPORT
if (emitComp->opts.compReloc)
{
id->idSetIsDspReloc();
}
-#endif
}
#ifdef DEBUG
@@ -10819,18 +10813,20 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR
regNumber tmpReg = genRegNumFromMask(tmpRegMask);
noway_assert(tmpReg != REG_NA);
+ emitAttr addType = varTypeIsGC(memBase) ? EA_BYREF : EA_PTRSIZE;
+
if (emitIns_valid_imm_for_add(offset, EA_8BYTE))
{
if (lsl > 0)
{
// Generate code to set tmpReg = base + index*scale
- emitIns_R_R_R_I(INS_add, EA_PTRSIZE, tmpReg, memBase->gtRegNum, index->gtRegNum, lsl,
+ emitIns_R_R_R_I(INS_add, addType, tmpReg, memBase->gtRegNum, index->gtRegNum, lsl,
INS_OPTS_LSL);
}
else // no scale
{
// Generate code to set tmpReg = base + index
- emitIns_R_R_R(INS_add, EA_PTRSIZE, tmpReg, memBase->gtRegNum, index->gtRegNum);
+ emitIns_R_R_R(INS_add, addType, tmpReg, memBase->gtRegNum, index->gtRegNum);
}
noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg));
@@ -10845,7 +10841,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR
codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
// Then add the base register
// rd = rd + base
- emitIns_R_R_R(INS_add, EA_PTRSIZE, tmpReg, tmpReg, memBase->gtRegNum);
+ emitIns_R_R_R(INS_add, addType, tmpReg, tmpReg, memBase->gtRegNum);
noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg));
noway_assert(tmpReg != index->gtRegNum);
diff --git a/src/jit/emitarm64.h b/src/jit/emitarm64.h
index 5459a0d6c8..6a8e42b86f 100644
--- a/src/jit/emitarm64.h
+++ b/src/jit/emitarm64.h
@@ -19,9 +19,7 @@ static bool strictArmAsm;
struct CnsVal
{
ssize_t cnsVal;
-#ifdef RELOC_SUPPORT
- bool cnsReloc;
-#endif
+ bool cnsReloc;
};
#ifdef DEBUG
diff --git a/src/jit/emitinl.h b/src/jit/emitinl.h
index 302b8ea448..82ad53d341 100644
--- a/src/jit/emitinl.h
+++ b/src/jit/emitinl.h
@@ -144,9 +144,7 @@ inline int emitter::emitGetInsCDinfo(instrDesc* id)
inline void emitter::emitGetInsCns(instrDesc* id, CnsVal* cv)
{
-#ifdef RELOC_SUPPORT
cv->cnsReloc = id->idIsCnsReloc();
-#endif
if (id->idIsLargeCns())
{
cv->cnsVal = ((instrDescCns*)id)->idcCnsVal;
@@ -159,9 +157,7 @@ inline void emitter::emitGetInsCns(instrDesc* id, CnsVal* cv)
inline ssize_t emitter::emitGetInsAmdCns(instrDesc* id, CnsVal* cv)
{
-#ifdef RELOC_SUPPORT
cv->cnsReloc = id->idIsCnsReloc();
-#endif
if (id->idIsLargeDsp())
{
if (id->idIsLargeCns())
@@ -192,9 +188,7 @@ inline ssize_t emitter::emitGetInsAmdCns(instrDesc* id, CnsVal* cv)
inline void emitter::emitGetInsDcmCns(instrDesc* id, CnsVal* cv)
{
-#ifdef RELOC_SUPPORT
cv->cnsReloc = id->idIsCnsReloc();
-#endif
if (id->idIsLargeCns())
{
if (id->idIsLargeDsp())
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
index be5cefbfea..b495d015d6 100644
--- a/src/jit/emitxarch.cpp
+++ b/src/jit/emitxarch.cpp
@@ -48,6 +48,15 @@ bool IsSSEOrAVXInstruction(instruction ins)
#endif // !FEATURE_AVX_SUPPORT
}
+bool IsAVXOnlyInstruction(instruction ins)
+{
+#ifdef FEATURE_AVX_SUPPORT
+ return (ins >= INS_FIRST_AVX_INSTRUCTION && ins <= INS_LAST_AVX_INSTRUCTION);
+#else
+ return false;
+#endif
+}
+
bool emitter::IsAVXInstruction(instruction ins)
{
#ifdef FEATURE_AVX_SUPPORT
@@ -81,9 +90,11 @@ bool emitter::IsThreeOperandBinaryAVXInstruction(instruction ins)
ins == INS_minsd || ins == INS_divps || ins == INS_divpd || ins == INS_maxps || ins == INS_maxpd ||
ins == INS_maxss || ins == INS_maxsd || ins == INS_andnps || ins == INS_andnpd || ins == INS_paddb ||
ins == INS_paddw || ins == INS_paddd || ins == INS_paddq || ins == INS_psubb || ins == INS_psubw ||
- ins == INS_psubd || ins == INS_psubq || ins == INS_pmuludq || ins == INS_pxor || ins == INS_pmaxub ||
- ins == INS_pminub || ins == INS_pmaxsw || ins == INS_pminsw || ins == INS_insertps ||
- ins == INS_vinsertf128 || ins == INS_punpckldq || ins == INS_phaddd);
+ ins == INS_psubd || ins == INS_psubq || ins == INS_pmuludq || ins == INS_pxor || ins == INS_insertps ||
+ ins == INS_vinsertf128 || ins == INS_punpckldq || ins == INS_phaddd || ins == INS_pminub ||
+ ins == INS_pminsw || ins == INS_pminsb || ins == INS_pminsd || ins == INS_pminuw || ins == INS_pminud ||
+ ins == INS_pmaxub || ins == INS_pmaxsw || ins == INS_pmaxsb || ins == INS_pmaxsd || ins == INS_pmaxuw ||
+ ins == INS_pmaxud);
}
// Returns true if the AVX instruction is a move operator that requires 3 operands.
@@ -106,16 +117,9 @@ bool emitter::IsThreeOperandMoveAVXInstruction(instruction ins)
//
// Note that this should be true for any of the instructions in instrsXArch.h
// that use the SSE38 or SSE3A macro.
-//
-// TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this
-// needs to be addressed by expanding instruction encodings.
bool emitter::Is4ByteAVXInstruction(instruction ins)
{
- return UseAVX() &&
- (ins == INS_dpps || ins == INS_dppd || ins == INS_insertps || ins == INS_pcmpeqq || ins == INS_pcmpgtq ||
- ins == INS_vbroadcastss || ins == INS_vbroadcastsd || ins == INS_vpbroadcastb || ins == INS_vpbroadcastw ||
- ins == INS_vpbroadcastd || ins == INS_vpbroadcastq || ins == INS_vextractf128 || ins == INS_vinsertf128 ||
- ins == INS_pmulld || ins == INS_ptest || ins == INS_phaddd);
+ return UseAVX() && (IsSSE4Instruction(ins) || IsAVXOnlyInstruction(ins)) && EncodedBySSE38orSSE3A(ins);
}
#endif // FEATURE_AVX_SUPPORT
@@ -134,8 +138,7 @@ bool emitter::Is4ByteSSE4Instruction(instruction ins)
// On legacy backend SSE3_4 is not enabled.
return false;
#else
- return UseSSE3_4() && (ins == INS_dpps || ins == INS_dppd || ins == INS_insertps || ins == INS_pcmpeqq ||
- ins == INS_pcmpgtq || ins == INS_pmulld || ins == INS_ptest || ins == INS_phaddd);
+ return UseSSE3_4() && IsSSE4Instruction(ins) && EncodedBySSE38orSSE3A(ins);
#endif
}
@@ -739,7 +742,10 @@ void emitter::emitMarkStackLvl(unsigned stackLevel)
emitCurStackLvl = emitCurIG->igStkLvl = stackLevel;
if (emitMaxStackDepth < emitCurStackLvl)
+ {
+ JITDUMP("Upping emitMaxStackDepth from %d to %d\n", emitMaxStackDepth, emitCurStackLvl);
emitMaxStackDepth = emitCurStackLvl;
+ }
}
#endif
@@ -938,72 +944,6 @@ inline size_t insCode(instruction ins)
/*****************************************************************************
*
- * Returns the "[r/m], 32-bit icon" encoding of the given CPU instruction.
- */
-
-inline size_t insCodeMI(instruction ins)
-{
- // clang-format off
- const static
- size_t insCodesMI[] =
- {
- #define INST0(id, nm, fp, um, rf, wf, mr )
- #define INST1(id, nm, fp, um, rf, wf, mr )
- #define INST2(id, nm, fp, um, rf, wf, mr, mi ) mi,
- #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) mi,
- #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) mi,
- #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) mi,
- #include "instrs.h"
- #undef INST0
- #undef INST1
- #undef INST2
- #undef INST3
- #undef INST4
- #undef INST5
- };
- // clang-format on
-
- assert((unsigned)ins < sizeof(insCodesMI) / sizeof(insCodesMI[0]));
- assert((insCodesMI[ins] != BAD_CODE));
-
- return insCodesMI[ins];
-}
-
-/*****************************************************************************
- *
- * Returns the "reg, [r/m]" encoding of the given CPU instruction.
- */
-
-inline size_t insCodeRM(instruction ins)
-{
- // clang-format off
- const static
- size_t insCodesRM[] =
- {
- #define INST0(id, nm, fp, um, rf, wf, mr )
- #define INST1(id, nm, fp, um, rf, wf, mr )
- #define INST2(id, nm, fp, um, rf, wf, mr, mi )
- #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) rm,
- #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) rm,
- #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) rm,
- #include "instrs.h"
- #undef INST0
- #undef INST1
- #undef INST2
- #undef INST3
- #undef INST4
- #undef INST5
- };
- // clang-format on
-
- assert((unsigned)ins < sizeof(insCodesRM) / sizeof(insCodesRM[0]));
- assert((insCodesRM[ins] != BAD_CODE));
-
- return insCodesRM[ins];
-}
-
-/*****************************************************************************
- *
* Returns the "AL/AX/EAX, imm" accumulator encoding of the given instruction.
*/
@@ -1070,6 +1010,86 @@ inline size_t insCodeRR(instruction ins)
// clang-format off
const static
+size_t insCodesRM[] =
+{
+ #define INST0(id, nm, fp, um, rf, wf, mr )
+ #define INST1(id, nm, fp, um, rf, wf, mr )
+ #define INST2(id, nm, fp, um, rf, wf, mr, mi )
+ #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) rm,
+ #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) rm,
+ #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) rm,
+ #include "instrs.h"
+ #undef INST0
+ #undef INST1
+ #undef INST2
+ #undef INST3
+ #undef INST4
+ #undef INST5
+};
+// clang-format on
+
+// Returns true iff the give CPU instruction has an RM encoding.
+inline bool hasCodeRM(instruction ins)
+{
+ assert((unsigned)ins < sizeof(insCodesRM) / sizeof(insCodesRM[0]));
+ return ((insCodesRM[ins] != BAD_CODE));
+}
+
+/*****************************************************************************
+ *
+ * Returns the "reg, [r/m]" encoding of the given CPU instruction.
+ */
+
+inline size_t insCodeRM(instruction ins)
+{
+ assert((unsigned)ins < sizeof(insCodesRM) / sizeof(insCodesRM[0]));
+ assert((insCodesRM[ins] != BAD_CODE));
+
+ return insCodesRM[ins];
+}
+
+// clang-format off
+const static
+size_t insCodesMI[] =
+{
+ #define INST0(id, nm, fp, um, rf, wf, mr )
+ #define INST1(id, nm, fp, um, rf, wf, mr )
+ #define INST2(id, nm, fp, um, rf, wf, mr, mi ) mi,
+ #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm ) mi,
+ #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4 ) mi,
+ #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) mi,
+ #include "instrs.h"
+ #undef INST0
+ #undef INST1
+ #undef INST2
+ #undef INST3
+ #undef INST4
+ #undef INST5
+};
+// clang-format on
+
+// Returns true iff the give CPU instruction has an MI encoding.
+inline bool hasCodeMI(instruction ins)
+{
+ assert((unsigned)ins < sizeof(insCodesMI) / sizeof(insCodesMI[0]));
+ return ((insCodesMI[ins] != BAD_CODE));
+}
+
+/*****************************************************************************
+ *
+ * Returns the "[r/m], 32-bit icon" encoding of the given CPU instruction.
+ */
+
+inline size_t insCodeMI(instruction ins)
+{
+ assert((unsigned)ins < sizeof(insCodesMI) / sizeof(insCodesMI[0]));
+ assert((insCodesMI[ins] != BAD_CODE));
+
+ return insCodesMI[ins];
+}
+
+// clang-format off
+const static
size_t insCodesMR[] =
{
#define INST0(id, nm, fp, um, rf, wf, mr )
@@ -1108,6 +1128,32 @@ inline size_t insCodeMR(instruction ins)
return insCodesMR[ins];
}
+// Return true if the instruction uses the SSE38 or SSE3A macro in instrsXArch.h.
+bool emitter::EncodedBySSE38orSSE3A(instruction ins)
+{
+ const size_t SSE38 = 0x0F660038;
+ const size_t SSE3A = 0x0F66003A;
+ const size_t MASK = 0xFFFF00FF;
+
+ size_t insCode = 0;
+
+ if (hasCodeRM(ins))
+ {
+ insCode = insCodeRM(ins);
+ }
+ else if (hasCodeMI(ins))
+ {
+ insCode = insCodeMI(ins);
+ }
+ else if (hasCodeMR(ins))
+ {
+ insCode = insCodeMR(ins);
+ }
+
+ insCode &= MASK;
+ return insCode == SSE38 || insCode == SSE3A;
+}
+
/*****************************************************************************
*
* Returns an encoding for the specified register to be used in the bit0-2
@@ -1622,7 +1668,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeSV(code_t code, int var, int dsp)
if (EBPbased)
{
-#if defined(_TARGET_AMD64_) && !defined(PLATFORM_UNIX)
+#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
// If localloc is not used, then ebp chaining is done and hence
// offset of locals will be at negative offsets, Otherwise offsets
// will be positive. In future, when RBP gets positioned in the
@@ -1756,13 +1802,11 @@ inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, int var, int dsp, in
valSize = sizeof(int);
}
-#ifdef RELOC_SUPPORT
if (id->idIsCnsReloc())
{
valInByte = false; // relocs can't be placed in a byte
assert(valSize == sizeof(int));
}
-#endif
if (valInByte)
{
@@ -1838,13 +1882,11 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code)
break;
}
-#ifdef RELOC_SUPPORT
if (id->idIsDspReloc())
{
dspInByte = false; // relocs can't be placed in a byte
dspIsZero = false; // relocs won't always be zero
}
-#endif
if (code & 0xFF000000)
{
@@ -2031,13 +2073,11 @@ inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code, int val
valSize = sizeof(INT32);
}
-#ifdef RELOC_SUPPORT
if (id->idIsCnsReloc())
{
valInByte = false; // relocs can't be placed in a byte
assert(valSize == sizeof(INT32));
}
-#endif
if (valInByte)
{
@@ -2079,13 +2119,11 @@ inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code, int val
valSize = sizeof(INT32);
#endif // !_TARGET_AMD64_
-#ifdef RELOC_SUPPORT
if (id->idIsCnsReloc())
{
valInByte = false; // relocs can't be placed in a byte
assert(valSize == sizeof(INT32));
}
-#endif
if (valInByte)
{
@@ -2315,11 +2353,8 @@ void emitter::emitIns(instruction ins)
}
#ifndef LEGACY_BACKEND
- // Account for 2-byte VEX prefix in case of vzeroupper
- if (ins == INS_vzeroupper)
- {
- sz += 2;
- }
+ // vzeroupper includes its 2-byte VEX prefix in its MR code.
+ assert((ins != INS_vzeroupper) || (sz == 3));
#endif
insFormat fmt = IF_NONE;
@@ -3623,13 +3658,11 @@ void emitter::emitIns_IJ(emitAttr attr, regNumber reg, unsigned base)
void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs)
{
-#if RELOC_SUPPORT
// Static always need relocs
if (!jitStaticFldIsGlobAddr(fldHnd))
{
attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
}
-#endif
UNATIVE_OFFSET sz;
instrDesc* id;
@@ -3791,13 +3824,11 @@ void emitter::emitIns_R_R_R(instruction ins, emitAttr attr, regNumber targetReg,
*/
void emitter::emitIns_R_C(instruction ins, emitAttr attr, regNumber reg, CORINFO_FIELD_HANDLE fldHnd, int offs)
{
-#if RELOC_SUPPORT
// Static always need relocs
if (!jitStaticFldIsGlobAddr(fldHnd))
{
attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
}
-#endif
emitAttr size = EA_SIZE(attr);
@@ -3876,13 +3907,11 @@ void emitter::emitIns_R_C(instruction ins, emitAttr attr, regNumber reg, CORINFO
void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs)
{
-#if RELOC_SUPPORT
// Static always need relocs
if (!jitStaticFldIsGlobAddr(fldHnd))
{
attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
}
-#endif
emitAttr size = EA_SIZE(attr);
@@ -3951,13 +3980,11 @@ void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE f
void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs, int val)
{
-#if RELOC_SUPPORT
// Static always need relocs
if (!jitStaticFldIsGlobAddr(fldHnd))
{
attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
}
-#endif
insFormat fmt;
@@ -4039,7 +4066,6 @@ void emitter::emitIns_J_S(instruction ins, emitAttr attr, BasicBlock* dst, int v
emitTotalIGjmps++;
#endif
-#if RELOC_SUPPORT
#ifndef _TARGET_AMD64_
// Storing the address of a basicBlock will need a reloc
// as the instruction uses the absolute address,
@@ -4052,7 +4078,6 @@ void emitter::emitIns_J_S(instruction ins, emitAttr attr, BasicBlock* dst, int v
{
id->idSetIsDspReloc();
}
-#endif // RELOC_SUPPORT
id->idCodeSize(sz);
@@ -4984,7 +5009,6 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount /* = 0
}
else if (ins == INS_push || ins == INS_push_hide)
{
-#if RELOC_SUPPORT
// Pushing the address of a basicBlock will need a reloc
// as the instruction uses the absolute address,
// not a relative address
@@ -4992,7 +5016,6 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount /* = 0
{
id->idSetIsDspReloc();
}
-#endif
sz = PUSH_INST_SIZE;
}
else
@@ -5097,7 +5120,10 @@ void emitter::emitAdjustStackDepthPushPop(instruction ins)
emitCurStackLvl += emitCntStackDepth;
if (emitMaxStackDepth < emitCurStackLvl)
+ {
+ JITDUMP("Upping emitMaxStackDepth from %d to %d\n", emitMaxStackDepth, emitCurStackLvl);
emitMaxStackDepth = emitCurStackLvl;
+ }
}
else if (ins == INS_pop)
{
@@ -5133,7 +5159,10 @@ void emitter::emitAdjustStackDepth(instruction ins, ssize_t val)
emitCurStackLvl = newStackLvl.Value();
if (emitMaxStackDepth < emitCurStackLvl)
+ {
+ JITDUMP("Upping emitMaxStackDepth from %d to %d\n", emitMaxStackDepth, emitCurStackLvl);
emitMaxStackDepth = emitCurStackLvl;
+ }
}
else if (ins == INS_add)
{
@@ -5167,22 +5196,25 @@ void emitter::emitAdjustStackDepth(instruction ins, ssize_t val)
*
*/
+// clang-format off
void emitter::emitIns_Call(EmitCallType callType,
CORINFO_METHOD_HANDLE methHnd,
INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
- void* addr,
- ssize_t argSize,
- emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
- VARSET_VALARG_TP ptrVars,
- regMaskTP gcrefRegs,
- regMaskTP byrefRegs,
- IL_OFFSETX ilOffset, // = BAD_IL_OFFSET
- regNumber ireg, // = REG_NA
- regNumber xreg, // = REG_NA
- unsigned xmul, // = 0
- ssize_t disp, // = 0
- bool isJump, // = false
- bool isNoGC) // = false
+ void* addr,
+ ssize_t argSize,
+ emitAttr retSize
+ MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+ VARSET_VALARG_TP ptrVars,
+ regMaskTP gcrefRegs,
+ regMaskTP byrefRegs,
+ IL_OFFSETX ilOffset, // = BAD_IL_OFFSET
+ regNumber ireg, // = REG_NA
+ regNumber xreg, // = REG_NA
+ unsigned xmul, // = 0
+ ssize_t disp, // = 0
+ bool isJump, // = false
+ bool isNoGC) // = false
+// clang-format on
{
/* Sanity check the arguments depending on callType */
@@ -5480,7 +5512,6 @@ void emitter::emitIns_Call(EmitCallType callType,
id->idAddr()->iiaAddr = (BYTE*)addr;
sz = 6;
-#if RELOC_SUPPORT
// Since this is an indirect call through a pointer and we don't
// currently pass in emitAttr into this function, we query codegen
// whether addr needs a reloc.
@@ -5498,7 +5529,6 @@ void emitter::emitIns_Call(EmitCallType callType,
sz++;
}
#endif //_TARGET_AMD64_
-#endif // RELOC_SUPPORT
}
else
{
@@ -5518,13 +5548,11 @@ void emitter::emitIns_Call(EmitCallType callType,
id->idSetIsCallAddr();
}
-#if RELOC_SUPPORT
// Direct call to a method and no addr indirection is needed.
if (codeGen->genCodeAddrNeedsReloc((size_t)addr))
{
id->idSetIsDspReloc();
}
-#endif
}
#ifdef DEBUG
@@ -5979,12 +6007,10 @@ void emitter::emitDispClsVar(CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, bool rel
doffs = Compiler::eeGetJitDataOffs(fldHnd);
-#ifdef RELOC_SUPPORT
if (reloc)
{
printf("reloc ");
}
-#endif
if (doffs >= 0)
{
@@ -6200,12 +6226,10 @@ void emitter::emitDispAddrMode(instrDesc* id, bool noDetail)
if (jdsc)
{
-#ifdef RELOC_SUPPORT
if (id->idIsDspReloc())
{
printf("reloc ");
}
-#endif
printf("J_M%03u_DS%02u", Compiler::s_compMethodsCount, id->idDebugOnlyInfo()->idMemCookie);
}
@@ -6246,7 +6270,6 @@ void emitter::emitDispAddrMode(instrDesc* id, bool noDetail)
nsep = true;
}
-#ifdef RELOC_SUPPORT
if ((id->idIsDspReloc()) && (id->idIns() != INS_i_jmp))
{
if (nsep)
@@ -6256,7 +6279,6 @@ void emitter::emitDispAddrMode(instrDesc* id, bool noDetail)
emitDispReloc(disp);
}
else
-#endif
{
// Munge any pointers if we want diff-able disassembly
if (emitComp->opts.disDiffable)
@@ -6463,11 +6485,8 @@ void emitter::emitDispIns(
printf("IN%04x: ", idNum);
}
-#ifdef RELOC_SUPPORT
#define ID_INFO_DSP_RELOC ((bool)(id->idIsDspReloc()))
-#else
-#define ID_INFO_DSP_RELOC false
-#endif
+
/* Display a constant value if the instruction references one */
if (!isNew)
@@ -6684,13 +6703,11 @@ void emitter::emitDispIns(
// no 8-byte immediates allowed here!
assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
#endif
-#ifdef RELOC_SUPPORT
if (id->idIsCnsReloc())
{
emitDispReloc(val);
}
else
-#endif
{
PRINT_CONSTANT:
// Munge any pointers if we want diff-able disassembly
@@ -6812,13 +6829,11 @@ void emitter::emitDispIns(
else
{
printf(", ");
-#ifdef RELOC_SUPPORT
if (cnsVal.cnsReloc)
{
emitDispReloc(val);
}
else
-#endif
{
goto PRINT_CONSTANT;
}
@@ -6893,13 +6908,11 @@ void emitter::emitDispIns(
else
{
printf(", ");
-#ifdef RELOC_SUPPORT
if (cnsVal.cnsReloc)
{
emitDispReloc(val);
}
else
-#endif
{
goto PRINT_CONSTANT;
}
@@ -6999,13 +7012,11 @@ void emitter::emitDispIns(
assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
#endif
printf(", ");
-#ifdef RELOC_SUPPORT
if (id->idIsCnsReloc())
{
emitDispReloc(val);
}
else
-#endif
{
goto PRINT_CONSTANT;
}
@@ -7073,14 +7084,11 @@ void emitter::emitDispIns(
// no 8-byte immediates allowed here!
assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
#endif
-#ifdef RELOC_SUPPORT
if (cnsVal.cnsReloc)
{
emitDispReloc(val);
}
- else
-#endif
- if (id->idInsFmt() == IF_MRW_SHF)
+ else if (id->idInsFmt() == IF_MRW_SHF)
{
emitDispShift(ins, (BYTE)val);
}
@@ -7125,13 +7133,11 @@ void emitter::emitDispIns(
case IF_RRW_CNS:
printf("%s, ", emitRegName(id->idReg1(), attr));
val = emitGetInsSC(id);
-#ifdef RELOC_SUPPORT
if (id->idIsCnsReloc())
{
emitDispReloc(val);
}
else
-#endif
{
goto PRINT_CONSTANT;
}
@@ -7477,11 +7483,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
ssize_t cval = addc->cnsVal;
// Does the constant fit in a byte?
- if ((signed char)cval == cval &&
-#ifdef RELOC_SUPPORT
- addc->cnsReloc == false &&
-#endif
- ins != INS_mov && ins != INS_test)
+ if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test)
{
if (id->idInsFmt() != IF_ARW_SHF)
{
@@ -7626,12 +7628,10 @@ GOT_DSP:
dspInByte = ((signed char)dsp == (ssize_t)dsp);
dspIsZero = (dsp == 0);
-#ifdef RELOC_SUPPORT
if (id->idIsDspReloc())
{
dspInByte = false; // relocs can't be placed in a byte
}
-#endif
// Is there a [scaled] index component?
if (rgx == REG_NA)
@@ -7725,12 +7725,10 @@ GOT_DSP:
dst += emitOutputWord(dst, code | 0x8500);
dst += emitOutputLong(dst, dsp);
-#ifdef RELOC_SUPPORT
if (id->idIsDspReloc())
{
emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
}
-#endif
}
break;
@@ -7765,12 +7763,10 @@ GOT_DSP:
dst += emitOutputWord(dst, code | 0x8400);
dst += emitOutputByte(dst, 0x24);
dst += emitOutputLong(dst, dsp);
-#ifdef RELOC_SUPPORT
if (id->idIsDspReloc())
{
emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
}
-#endif
}
break;
@@ -7796,12 +7792,10 @@ GOT_DSP:
{
dst += emitOutputWord(dst, code | 0x8000);
dst += emitOutputLong(dst, dsp);
-#ifdef RELOC_SUPPORT
if (id->idIsDspReloc())
{
emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
}
-#endif
}
}
@@ -7846,12 +7840,10 @@ GOT_DSP:
dst += emitOutputWord(dst, code | 0x8400);
dst += emitOutputByte(dst, regByte);
dst += emitOutputLong(dst, dsp);
-#ifdef RELOC_SUPPORT
if (id->idIsDspReloc())
{
emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
}
-#endif
}
}
}
@@ -7871,12 +7863,10 @@ GOT_DSP:
}
dst += emitOutputLong(dst, dsp);
-#ifdef RELOC_SUPPORT
if (id->idIsDspReloc())
{
emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
}
-#endif
}
}
else
@@ -7904,12 +7894,10 @@ GOT_DSP:
dst += emitOutputWord(dst, code | 0x8400);
dst += emitOutputByte(dst, regByte);
dst += emitOutputLong(dst, dsp);
-#ifdef RELOC_SUPPORT
if (id->idIsDspReloc())
{
emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
}
-#endif
}
}
}
@@ -7943,13 +7931,11 @@ GOT_DSP:
assert(!"unexpected operand size");
}
-#ifdef RELOC_SUPPORT
if (addc->cnsReloc)
{
emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
assert(opsz == 4);
}
-#endif
}
DONE:
@@ -8099,11 +8085,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
ssize_t cval = addc->cnsVal;
// Does the constant fit in a byte?
- if ((signed char)cval == cval &&
-#ifdef RELOC_SUPPORT
- addc->cnsReloc == false &&
-#endif
- ins != INS_mov && ins != INS_test)
+ if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test)
{
if (id->idInsFmt() != IF_SRW_SHF)
{
@@ -8238,10 +8220,8 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
dspInByte = ((signed char)dsp == (int)dsp);
dspIsZero = (dsp == 0);
-#ifdef RELOC_SUPPORT
// for stack varaibles the dsp should never be a reloc
assert(id->idIsDspReloc() == 0);
-#endif
if (EBPbased)
{
@@ -8361,13 +8341,11 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
assert(!"unexpected operand size");
}
-#ifdef RELOC_SUPPORT
if (addc->cnsReloc)
{
emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
assert(opsz == 4);
}
-#endif
}
// Does this instruction operate on a GC ref value?
@@ -8510,11 +8488,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
{
ssize_t cval = addc->cnsVal;
// Does the constant fit in a byte?
- if ((signed char)cval == cval &&
-#ifdef RELOC_SUPPORT
- addc->cnsReloc == false &&
-#endif
- ins != INS_mov && ins != INS_test)
+ if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test)
{
if (id->idInsFmt() != IF_MRW_SHF)
{
@@ -8756,12 +8730,10 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
dst += emitOutputLong(dst, (int)target);
#endif //_TARGET_X86_
-#ifdef RELOC_SUPPORT
if (id->idIsDspReloc())
{
emitRecordRelocation((void*)(dst - sizeof(int)), target, IMAGE_REL_BASED_DISP32, 0, addlDelta);
}
-#endif
}
else
{
@@ -8774,12 +8746,10 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
dst += emitOutputSizeT(dst, (ssize_t)target);
-#ifdef RELOC_SUPPORT
if (id->idIsDspReloc())
{
emitRecordRelocation((void*)(dst - sizeof(void*)), target, IMAGE_REL_BASED_MOFFSET);
}
-#endif
#endif //_TARGET_X86_
}
@@ -8811,13 +8781,11 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
default:
assert(!"unexpected operand size");
}
-#ifdef RELOC_SUPPORT
if (addc->cnsReloc)
{
emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
assert(opsz == 4);
}
-#endif
}
// Does this instruction operate on a GC ref value?
@@ -9539,12 +9507,10 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id)
ssize_t val = emitGetInsSC(id);
bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
-#ifdef RELOC_SUPPORT
if (id->idIsCnsReloc())
{
valInByte = false; // relocs can't be placed in a byte
}
-#endif
noway_assert(emitVerifyEncodable(ins, size, reg));
@@ -9632,12 +9598,10 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id)
}
#endif
-#ifdef RELOC_SUPPORT
if (id->idIsCnsReloc())
{
emitRecordRelocation((void*)(dst - (unsigned)EA_SIZE(size)), (void*)(size_t)val, IMAGE_REL_BASED_MOFFSET);
}
-#endif
goto DONE;
}
@@ -9794,13 +9758,11 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id)
break;
}
-#ifdef RELOC_SUPPORT
if (id->idIsCnsReloc())
{
emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)val, IMAGE_REL_BASED_HIGHLOW);
assert(size == EA_4BYTE);
}
-#endif
}
DONE:
@@ -9909,7 +9871,6 @@ BYTE* emitter::emitOutputIV(BYTE* dst, instrDesc* id)
noway_assert(size < EA_8BYTE || ((int)val == val && !id->idIsCnsReloc()));
#endif
-#ifdef RELOC_SUPPORT
if (id->idIsCnsReloc())
{
valInByte = false; // relocs can't be placed in a byte
@@ -9917,7 +9878,6 @@ BYTE* emitter::emitOutputIV(BYTE* dst, instrDesc* id)
// Of these instructions only the push instruction can have reloc
assert(ins == INS_push || ins == INS_push_hide);
}
-#endif
switch (ins)
{
@@ -9959,12 +9919,10 @@ BYTE* emitter::emitOutputIV(BYTE* dst, instrDesc* id)
dst += emitOutputByte(dst, code);
dst += emitOutputLong(dst, val);
-#ifdef RELOC_SUPPORT
if (id->idIsCnsReloc())
{
emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)val, IMAGE_REL_BASED_HIGHLOW);
}
-#endif
}
// Did we push a GC ref value?
@@ -10551,12 +10509,10 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
dst += emitOutputLong(dst, offset);
-#ifdef RELOC_SUPPORT
if (id->idIsDspReloc())
{
emitRecordRelocation((void*)(dst - sizeof(INT32)), addr, IMAGE_REL_BASED_REL32);
}
-#endif
DONE_CALL:
diff --git a/src/jit/emitxarch.h b/src/jit/emitxarch.h
index 9c435e5d87..faeba7d942 100644
--- a/src/jit/emitxarch.h
+++ b/src/jit/emitxarch.h
@@ -40,9 +40,7 @@ typedef unsigned __int64 code_t;
struct CnsVal
{
ssize_t cnsVal;
-#ifdef RELOC_SUPPORT
- bool cnsReloc;
-#endif
+ bool cnsReloc;
};
UNATIVE_OFFSET emitInsSize(code_t code);
@@ -107,6 +105,7 @@ void SetUseSSE3_4(bool value)
{
useSSE3_4Encodings = value;
}
+bool EncodedBySSE38orSSE3A(instruction ins);
bool Is4ByteSSE4Instruction(instruction ins);
bool hasRexPrefix(code_t code)
@@ -451,35 +450,41 @@ enum EmitCallType
EC_COUNT
};
+// clang-format off
void emitIns_Call(EmitCallType callType,
CORINFO_METHOD_HANDLE methHnd,
CORINFO_SIG_INFO* sigInfo, // used to report call sites to the EE
void* addr,
ssize_t argSize,
- emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
- VARSET_VALARG_TP ptrVars,
- regMaskTP gcrefRegs,
- regMaskTP byrefRegs,
- GenTreeIndir* indir,
- bool isJump = false,
- bool isNoGC = false);
-
+ emitAttr retSize
+ MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+ VARSET_VALARG_TP ptrVars,
+ regMaskTP gcrefRegs,
+ regMaskTP byrefRegs,
+ GenTreeIndir* indir,
+ bool isJump = false,
+ bool isNoGC = false);
+// clang-format on
+
+// clang-format off
void emitIns_Call(EmitCallType callType,
CORINFO_METHOD_HANDLE methHnd,
INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
- void* addr,
- ssize_t argSize,
- emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
- VARSET_VALARG_TP ptrVars,
- regMaskTP gcrefRegs,
- regMaskTP byrefRegs,
- IL_OFFSETX ilOffset = BAD_IL_OFFSET,
- regNumber ireg = REG_NA,
- regNumber xreg = REG_NA,
- unsigned xmul = 0,
- ssize_t disp = 0,
- bool isJump = false,
- bool isNoGC = false);
+ void* addr,
+ ssize_t argSize,
+ emitAttr retSize
+ MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+ VARSET_VALARG_TP ptrVars,
+ regMaskTP gcrefRegs,
+ regMaskTP byrefRegs,
+ IL_OFFSETX ilOffset = BAD_IL_OFFSET,
+ regNumber ireg = REG_NA,
+ regNumber xreg = REG_NA,
+ unsigned xmul = 0,
+ ssize_t disp = 0,
+ bool isJump = false,
+ bool isNoGC = false);
+// clang-format on
#ifdef _TARGET_AMD64_
// Is the last instruction emitted a call instruction?
diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp
index 50318b0940..3374b8c820 100644
--- a/src/jit/flowgraph.cpp
+++ b/src/jit/flowgraph.cpp
@@ -44,7 +44,7 @@ void Compiler::fgInit()
fgSlopUsedInEdgeWeights = false;
fgRangeUsedInEdgeWeights = true;
fgNeedsUpdateFlowGraph = false;
- fgCalledWeight = BB_ZERO_WEIGHT;
+ fgCalledCount = BB_ZERO_WEIGHT;
/* We haven't yet computed the dominator sets */
fgDomsComputed = false;
@@ -330,14 +330,37 @@ void Compiler::fgInstrumentMethod()
// Add the method entry callback node
- GenTreeArgList* args = gtNewArgList(gtNewIconEmbMethHndNode(info.compMethodHnd));
+ GenTreePtr arg;
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (opts.IsReadyToRun())
+ {
+ mdMethodDef currentMethodToken = info.compCompHnd->getMethodDefFromMethod(info.compMethodHnd);
+
+ CORINFO_RESOLVED_TOKEN resolvedToken;
+ resolvedToken.tokenContext = MAKE_METHODCONTEXT(info.compMethodHnd);
+ resolvedToken.tokenScope = info.compScopeHnd;
+ resolvedToken.token = currentMethodToken;
+ resolvedToken.tokenType = CORINFO_TOKENKIND_Method;
+
+ info.compCompHnd->resolveToken(&resolvedToken);
+
+ arg = impTokenToHandle(&resolvedToken);
+ }
+ else
+#endif
+ {
+ arg = gtNewIconEmbMethHndNode(info.compMethodHnd);
+ }
+
+ GenTreeArgList* args = gtNewArgList(arg);
GenTreePtr call = gtNewHelperCallNode(CORINFO_HELP_BBT_FCN_ENTER, TYP_VOID, 0, args);
GenTreePtr handle =
gtNewIconEmbHndNode((void*)&bbProfileBufferStart->ExecutionCount, nullptr, GTF_ICON_BBC_PTR);
GenTreePtr value = gtNewOperNode(GT_IND, TYP_INT, handle);
GenTreePtr relop = gtNewOperNode(GT_NE, TYP_INT, value, gtNewIconNode(0, TYP_INT));
- relop->gtFlags |= GTF_RELOP_QMARK;
+ relop->gtFlags |= GTF_RELOP_QMARK; // TODO-Cleanup: [Simple] Move this to gtNewQmarkNode
GenTreePtr colon = new (this, GT_COLON) GenTreeColon(TYP_VOID, gtNewNothingNode(), call);
GenTreePtr cond = gtNewQmarkNode(TYP_VOID, relop, colon);
stmt = gtNewStmt(cond);
@@ -397,6 +420,9 @@ BasicBlock* Compiler::fgNewBasicBlock(BBjumpKinds jumpKind)
void Compiler::fgEnsureFirstBBisScratch()
{
+ // This method does not update predecessor lists and so must only be called before they are computed.
+ assert(!fgComputePredsDone);
+
// Have we already allocated a scratch block?
if (fgFirstBBisScratch())
@@ -411,10 +437,11 @@ void Compiler::fgEnsureFirstBBisScratch()
if (fgFirstBB != nullptr)
{
// If we have profile data the new block will inherit fgFirstBlock's weight
- if (fgFirstBB->bbFlags & BBF_PROF_WEIGHT)
+ if (fgFirstBB->hasProfileWeight())
{
block->inheritWeight(fgFirstBB);
}
+
fgInsertBBbefore(fgFirstBB, block);
}
else
@@ -2386,6 +2413,7 @@ void Compiler::fgComputeDoms()
bbRoot.bbNum = 0;
bbRoot.bbIDom = &bbRoot;
bbRoot.bbDfsNum = 0;
+ bbRoot.bbFlags = 0;
flRoot.flNext = nullptr;
flRoot.flBlock = &bbRoot;
@@ -2508,6 +2536,8 @@ void Compiler::fgComputeDoms()
}
}
+ fgCompDominatedByExceptionalEntryBlocks();
+
#ifdef DEBUG
if (verbose)
{
@@ -3826,19 +3856,19 @@ bool Compiler::fgCreateGCPoll(GCPollType pollType, BasicBlock* block)
if (GCPOLL_CALL == pollType)
{
createdPollBlocks = false;
- GenTreePtr tree = gtNewHelperCallNode(CORINFO_HELP_POLL_GC, TYP_VOID);
+ GenTreeCall* call = gtNewHelperCallNode(CORINFO_HELP_POLL_GC, TYP_VOID);
#if GTF_CALL_REG_SAVE
- tree->gtCall.gtCallMoreFlags |= GTF_CALL_REG_SAVE;
+ call->gtCallMoreFlags |= GTF_CALL_REG_SAVE;
#endif // GTF_CALL_REG_SAVE
// for BBJ_ALWAYS I don't need to insert it before the condition. Just append it.
if (block->bbJumpKind == BBJ_ALWAYS)
{
- fgInsertStmtAtEnd(block, tree);
+ fgInsertStmtAtEnd(block, call);
}
else
{
- GenTreeStmt* newStmt = fgInsertStmtNearEnd(block, tree);
+ GenTreeStmt* newStmt = fgInsertStmtNearEnd(block, call);
// For DDB156656, we need to associate the GC Poll with the IL offset (and therefore sequence
// point) of the tree before which we inserted the poll. One example of when this is a
// problem:
@@ -3907,11 +3937,11 @@ bool Compiler::fgCreateGCPoll(GCPollType pollType, BasicBlock* block)
bottom->bbJumpDest = top->bbJumpDest;
// 2) Add a GC_CALL node to Poll.
- GenTreePtr tree = gtNewHelperCallNode(CORINFO_HELP_POLL_GC, TYP_VOID);
+ GenTreeCall* call = gtNewHelperCallNode(CORINFO_HELP_POLL_GC, TYP_VOID);
#if GTF_CALL_REG_SAVE
- tree->gtCall.gtCallMoreFlags |= GTF_CALL_REG_SAVE;
+ call->gtCallMoreFlags |= GTF_CALL_REG_SAVE;
#endif // GTF_CALL_REG_SAVE
- fgInsertStmtAtEnd(poll, tree);
+ fgInsertStmtAtEnd(poll, call);
// 3) Remove the last statement from Top and add it to Bottom.
if (oldJumpKind != BBJ_ALWAYS)
@@ -4248,7 +4278,7 @@ private:
// jumpTarget[N] is set to a JT_* value if IL offset N is a
// jump target in the method.
//
-// Also sets lvAddrExposed and lvArgWrite in lvaTable[].
+// Also sets lvAddrExposed and lvHasILStoreOp, ilHasMultipleILStoreOp in lvaTable[].
#ifdef _PREFAST_
#pragma warning(push)
@@ -4512,20 +4542,80 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE*
}
varNum = (sz == sizeof(BYTE)) ? getU1LittleEndian(codeAddr) : getU2LittleEndian(codeAddr);
- varNum = compMapILargNum(varNum); // account for possible hidden param
- // This check is only intended to prevent an AV. Bad varNum values will later
- // be handled properly by the verifier.
- if (varNum < lvaTableCnt)
+ if (isInlining)
{
- if (isInlining)
+ if (varNum < impInlineInfo->argCnt)
{
impInlineInfo->inlArgInfo[varNum].argHasStargOp = true;
}
+ }
+ else
+ {
+ // account for possible hidden param
+ varNum = compMapILargNum(varNum);
+
+ // This check is only intended to prevent an AV. Bad varNum values will later
+ // be handled properly by the verifier.
+ if (varNum < lvaTableCnt)
+ {
+ // In non-inline cases, note written-to arguments.
+ lvaTable[varNum].lvHasILStoreOp = 1;
+ }
+ }
+ }
+ break;
+
+ case CEE_STLOC_0:
+ case CEE_STLOC_1:
+ case CEE_STLOC_2:
+ case CEE_STLOC_3:
+ varNum = (opcode - CEE_STLOC_0);
+ goto STLOC;
+
+ case CEE_STLOC:
+ case CEE_STLOC_S:
+ {
+ noway_assert(sz == sizeof(BYTE) || sz == sizeof(WORD));
+
+ if (codeAddr > codeEndp - sz)
+ {
+ goto TOO_FAR;
+ }
+
+ varNum = (sz == sizeof(BYTE)) ? getU1LittleEndian(codeAddr) : getU2LittleEndian(codeAddr);
+
+ STLOC:
+ if (isInlining)
+ {
+ InlLclVarInfo& lclInfo = impInlineInfo->lclVarInfo[varNum + impInlineInfo->argCnt];
+
+ if (lclInfo.lclHasStlocOp)
+ {
+ lclInfo.lclHasMultipleStlocOp = 1;
+ }
else
{
+ lclInfo.lclHasStlocOp = 1;
+ }
+ }
+ else
+ {
+ varNum += info.compArgsCount;
+
+ // This check is only intended to prevent an AV. Bad varNum values will later
+ // be handled properly by the verifier.
+ if (varNum < lvaTableCnt)
+ {
// In non-inline cases, note written-to locals.
- lvaTable[varNum].lvArgWrite = 1;
+ if (lvaTable[varNum].lvHasILStoreOp)
+ {
+ lvaTable[varNum].lvHasMultipleILStoreOp = 1;
+ }
+ else
+ {
+ lvaTable[varNum].lvHasILStoreOp = 1;
+ }
}
}
}
@@ -4847,11 +4937,11 @@ void Compiler::fgAdjustForAddressExposedOrWrittenThis()
// Optionally enable adjustment during stress.
if (!tiVerificationNeeded && compStressCompile(STRESS_GENERIC_VARN, 15))
{
- lvaTable[info.compThisArg].lvArgWrite = true;
+ lvaTable[info.compThisArg].lvHasILStoreOp = true;
}
// If this is exposed or written to, create a temp for the modifiable this
- if (lvaTable[info.compThisArg].lvAddrExposed || lvaTable[info.compThisArg].lvArgWrite)
+ if (lvaTable[info.compThisArg].lvAddrExposed || lvaTable[info.compThisArg].lvHasILStoreOp)
{
// If there is a "ldarga 0" or "starg 0", grab and use the temp.
lvaArg0Var = lvaGrabTemp(false DEBUGARG("Address-exposed, or written this pointer"));
@@ -4865,14 +4955,14 @@ void Compiler::fgAdjustForAddressExposedOrWrittenThis()
lvaTable[lvaArg0Var].lvLclFieldExpr = lvaTable[info.compThisArg].lvLclFieldExpr;
lvaTable[lvaArg0Var].lvLiveAcrossUCall = lvaTable[info.compThisArg].lvLiveAcrossUCall;
#endif
- lvaTable[lvaArg0Var].lvArgWrite = lvaTable[info.compThisArg].lvArgWrite;
- lvaTable[lvaArg0Var].lvVerTypeInfo = lvaTable[info.compThisArg].lvVerTypeInfo;
+ lvaTable[lvaArg0Var].lvHasILStoreOp = lvaTable[info.compThisArg].lvHasILStoreOp;
+ lvaTable[lvaArg0Var].lvVerTypeInfo = lvaTable[info.compThisArg].lvVerTypeInfo;
// Clear the TI_FLAG_THIS_PTR in the original 'this' pointer.
noway_assert(lvaTable[lvaArg0Var].lvVerTypeInfo.IsThisPtr());
lvaTable[info.compThisArg].lvVerTypeInfo.ClearThisPtr();
- lvaTable[info.compThisArg].lvAddrExposed = false;
- lvaTable[info.compThisArg].lvArgWrite = false;
+ lvaTable[info.compThisArg].lvAddrExposed = false;
+ lvaTable[info.compThisArg].lvHasILStoreOp = false;
}
}
@@ -5779,11 +5869,12 @@ void Compiler::fgFindBasicBlocks()
compHndBBtabCount = impInlineInfo->InlinerCompiler->compHndBBtabCount;
info.compXcptnsCount = impInlineInfo->InlinerCompiler->info.compXcptnsCount;
- // Use a spill temp for the return value if there are multiple return blocks.
- if ((info.compRetNativeType != TYP_VOID) && (retBlocks > 1))
+ // Use a spill temp for the return value if there are multiple return blocks,
+ // or if the inlinee has GC ref locals.
+ if ((info.compRetNativeType != TYP_VOID) && ((retBlocks > 1) || impInlineInfo->HasGcRefLocals()))
{
// The lifetime of this var might expand multiple BBs. So it is a long lifetime compiler temp.
- lvaInlineeReturnSpillTemp = lvaGrabTemp(false DEBUGARG("Inline candidate multiple BBJ_RETURN spill temp"));
+ lvaInlineeReturnSpillTemp = lvaGrabTemp(false DEBUGARG("Inline return value spill temp"));
lvaTable[lvaInlineeReturnSpillTemp].lvType = info.compRetNativeType;
}
@@ -6696,9 +6787,7 @@ bool Compiler::fgIsThrow(GenTreePtr tree)
(tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_VERIFICATION)) ||
(tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_RNGCHKFAIL)) ||
(tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_THROWDIVZERO)) ||
-#if COR_JIT_EE_VERSION > 460
(tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_THROWNULLREF)) ||
-#endif // COR_JIT_EE_VERSION
(tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_THROW)) ||
(tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_RETHROW)))
{
@@ -6824,7 +6913,7 @@ GenTreePtr Compiler::fgIsIndirOfAddrOfLocal(GenTreePtr tree)
return res;
}
-GenTreePtr Compiler::fgGetStaticsCCtorHelper(CORINFO_CLASS_HANDLE cls, CorInfoHelpFunc helper)
+GenTreeCall* Compiler::fgGetStaticsCCtorHelper(CORINFO_CLASS_HANDLE cls, CorInfoHelpFunc helper)
{
bool bNeedClassID = true;
unsigned callFlags = 0;
@@ -6934,7 +7023,7 @@ GenTreePtr Compiler::fgGetStaticsCCtorHelper(CORINFO_CLASS_HANDLE cls, CorInfoHe
return gtNewHelperCallNode(helper, type, callFlags, argList);
}
-GenTreePtr Compiler::fgGetSharedCCtor(CORINFO_CLASS_HANDLE cls)
+GenTreeCall* Compiler::fgGetSharedCCtor(CORINFO_CLASS_HANDLE cls)
{
#ifdef FEATURE_READYTORUN_COMPILER
if (opts.IsReadyToRun())
@@ -7032,137 +7121,156 @@ bool Compiler::fgAddrCouldBeNull(GenTreePtr addr)
* Optimize the call to the delegate constructor.
*/
-GenTreePtr Compiler::fgOptimizeDelegateConstructor(GenTreePtr call, CORINFO_CONTEXT_HANDLE* ExactContextHnd)
+GenTreePtr Compiler::fgOptimizeDelegateConstructor(GenTreeCall* call,
+ CORINFO_CONTEXT_HANDLE* ExactContextHnd,
+ CORINFO_RESOLVED_TOKEN* ldftnToken)
{
- noway_assert(call->gtOper == GT_CALL);
-
- noway_assert(call->gtCall.gtCallType == CT_USER_FUNC);
- CORINFO_METHOD_HANDLE methHnd = call->gtCall.gtCallMethHnd;
+ noway_assert(call->gtCallType == CT_USER_FUNC);
+ CORINFO_METHOD_HANDLE methHnd = call->gtCallMethHnd;
CORINFO_CLASS_HANDLE clsHnd = info.compCompHnd->getMethodClass(methHnd);
- GenTreePtr targetMethod = call->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;
+ GenTreePtr targetMethod = call->gtCallArgs->Rest()->Current();
noway_assert(targetMethod->TypeGet() == TYP_I_IMPL);
- genTreeOps oper = targetMethod->OperGet();
- if (oper == GT_FTN_ADDR || oper == GT_CALL || oper == GT_QMARK)
+ genTreeOps oper = targetMethod->OperGet();
+ CORINFO_METHOD_HANDLE targetMethodHnd = nullptr;
+ GenTreePtr qmarkNode = nullptr;
+ if (oper == GT_FTN_ADDR)
{
- CORINFO_METHOD_HANDLE targetMethodHnd = nullptr;
- GenTreePtr qmarkNode = nullptr;
- if (oper == GT_FTN_ADDR)
- {
- targetMethodHnd = targetMethod->gtFptrVal.gtFptrMethod;
- }
- else if (oper == GT_CALL && targetMethod->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR))
- {
- GenTreePtr handleNode = targetMethod->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp2->gtOp.gtOp1;
+ targetMethodHnd = targetMethod->gtFptrVal.gtFptrMethod;
+ }
+ else if (oper == GT_CALL && targetMethod->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR))
+ {
+ GenTreePtr handleNode = targetMethod->gtCall.gtCallArgs->Rest()->Rest()->Current();
- if (handleNode->OperGet() == GT_CNS_INT)
- {
- // it's a ldvirtftn case, fetch the methodhandle off the helper for ldvirtftn. It's the 3rd arg
- targetMethodHnd = CORINFO_METHOD_HANDLE(handleNode->gtIntCon.gtCompileTimeHandle);
- }
- // Sometimes the argument to this is the result of a generic dictionary lookup, which shows
- // up as a GT_QMARK.
- else if (handleNode->OperGet() == GT_QMARK)
- {
- qmarkNode = handleNode;
- }
- }
- // Sometimes we don't call CORINFO_HELP_VIRTUAL_FUNC_PTR but instead just call
- // CORINFO_HELP_RUNTIMEHANDLE_METHOD directly.
- else if (oper == GT_QMARK)
+ if (handleNode->OperGet() == GT_CNS_INT)
{
- qmarkNode = targetMethod;
+ // it's a ldvirtftn case, fetch the methodhandle off the helper for ldvirtftn. It's the 3rd arg
+ targetMethodHnd = CORINFO_METHOD_HANDLE(handleNode->gtIntCon.gtCompileTimeHandle);
}
- if (qmarkNode)
+ // Sometimes the argument to this is the result of a generic dictionary lookup, which shows
+ // up as a GT_QMARK.
+ else if (handleNode->OperGet() == GT_QMARK)
{
- noway_assert(qmarkNode->OperGet() == GT_QMARK);
- // The argument is actually a generic dictionary lookup. For delegate creation it looks
- // like:
- // GT_QMARK
- // GT_COLON
- // op1 -> call
- // Arg 1 -> token (has compile time handle)
- // op2 -> lclvar
- //
- //
- // In this case I can find the token (which is a method handle) and that is the compile time
- // handle.
- noway_assert(qmarkNode->gtOp.gtOp2->OperGet() == GT_COLON);
- noway_assert(qmarkNode->gtOp.gtOp2->gtOp.gtOp1->OperGet() == GT_CALL);
- GenTreePtr runtimeLookupCall = qmarkNode->gtOp.gtOp2->gtOp.gtOp1;
-
- // This could be any of CORINFO_HELP_RUNTIMEHANDLE_(METHOD|CLASS)(_LOG?)
- GenTreePtr tokenNode = runtimeLookupCall->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;
- noway_assert(tokenNode->OperGet() == GT_CNS_INT);
- targetMethodHnd = CORINFO_METHOD_HANDLE(tokenNode->gtIntCon.gtCompileTimeHandle);
+ qmarkNode = handleNode;
}
+ }
+ // Sometimes we don't call CORINFO_HELP_VIRTUAL_FUNC_PTR but instead just call
+ // CORINFO_HELP_RUNTIMEHANDLE_METHOD directly.
+ else if (oper == GT_QMARK)
+ {
+ qmarkNode = targetMethod;
+ }
+ if (qmarkNode)
+ {
+ noway_assert(qmarkNode->OperGet() == GT_QMARK);
+ // The argument is actually a generic dictionary lookup. For delegate creation it looks
+ // like:
+ // GT_QMARK
+ // GT_COLON
+ // op1 -> call
+ // Arg 1 -> token (has compile time handle)
+ // op2 -> lclvar
+ //
+ //
+ // In this case I can find the token (which is a method handle) and that is the compile time
+ // handle.
+ noway_assert(qmarkNode->gtOp.gtOp2->OperGet() == GT_COLON);
+ noway_assert(qmarkNode->gtOp.gtOp2->gtOp.gtOp1->OperGet() == GT_CALL);
+ GenTreeCall* runtimeLookupCall = qmarkNode->gtOp.gtOp2->gtOp.gtOp1->AsCall();
+
+ // This could be any of CORINFO_HELP_RUNTIMEHANDLE_(METHOD|CLASS)(_LOG?)
+ GenTreePtr tokenNode = runtimeLookupCall->gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;
+ noway_assert(tokenNode->OperGet() == GT_CNS_INT);
+ targetMethodHnd = CORINFO_METHOD_HANDLE(tokenNode->gtIntCon.gtCompileTimeHandle);
+ }
#ifdef FEATURE_READYTORUN_COMPILER
- if (opts.IsReadyToRun())
+ if (opts.IsReadyToRun())
+ {
+ if (IsTargetAbi(CORINFO_CORERT_ABI))
{
- // ReadyToRun has this optimization for a non-virtual function pointers only for now.
- if (oper == GT_FTN_ADDR)
+ if (ldftnToken != nullptr)
{
- // The first argument of the helper is delegate this pointer
- GenTreeArgList* helperArgs = gtNewArgList(call->gtCall.gtCallObjp);
+ GenTreePtr thisPointer = call->gtCallObjp;
+ GenTreePtr targetObjPointers = call->gtCallArgs->Current();
+ GenTreeArgList* helperArgs = nullptr;
+ CORINFO_LOOKUP pLookup;
CORINFO_CONST_LOOKUP entryPoint;
-
- // The second argument of the helper is the target object pointers
- helperArgs->gtOp.gtOp2 = gtNewArgList(call->gtCall.gtCallArgs->gtOp.gtOp1);
-
+ info.compCompHnd->getReadyToRunDelegateCtorHelper(ldftnToken, clsHnd, &pLookup);
+ if (!pLookup.lookupKind.needsRuntimeLookup)
+ {
+ helperArgs = gtNewArgList(thisPointer, targetObjPointers);
+ entryPoint = pLookup.constLookup;
+ }
+ else
+ {
+ assert(oper != GT_FTN_ADDR);
+ CORINFO_CONST_LOOKUP genericLookup;
+ info.compCompHnd->getReadyToRunHelper(ldftnToken, &pLookup.lookupKind,
+ CORINFO_HELP_READYTORUN_GENERIC_HANDLE, &genericLookup);
+ GenTreePtr ctxTree = getRuntimeContextTree(pLookup.lookupKind.runtimeLookupKind);
+ helperArgs = gtNewArgList(thisPointer, targetObjPointers, ctxTree);
+ entryPoint = genericLookup;
+ }
call = gtNewHelperCallNode(CORINFO_HELP_READYTORUN_DELEGATE_CTOR, TYP_VOID, GTF_EXCEPT, helperArgs);
-#if COR_JIT_EE_VERSION > 460
- info.compCompHnd->getReadyToRunDelegateCtorHelper(targetMethod->gtFptrVal.gtLdftnResolvedToken, clsHnd,
- &entryPoint);
-#else
- info.compCompHnd->getReadyToRunHelper(targetMethod->gtFptrVal.gtLdftnResolvedToken,
- CORINFO_HELP_READYTORUN_DELEGATE_CTOR, &entryPoint);
-#endif
- call->gtCall.setEntryPoint(entryPoint);
+ call->setEntryPoint(entryPoint);
}
}
- else
-#endif
- if (targetMethodHnd != nullptr)
+ // ReadyToRun has this optimization for a non-virtual function pointers only for now.
+ else if (oper == GT_FTN_ADDR)
{
- CORINFO_METHOD_HANDLE alternateCtor = nullptr;
- DelegateCtorArgs ctorData;
- ctorData.pMethod = info.compMethodHnd;
- ctorData.pArg3 = nullptr;
- ctorData.pArg4 = nullptr;
- ctorData.pArg5 = nullptr;
+ GenTreePtr thisPointer = call->gtCallObjp;
+ GenTreePtr targetObjPointers = call->gtCallArgs->Current();
+ GenTreeArgList* helperArgs = gtNewArgList(thisPointer, targetObjPointers);
- alternateCtor = info.compCompHnd->GetDelegateCtor(methHnd, clsHnd, targetMethodHnd, &ctorData);
- if (alternateCtor != methHnd)
- {
- // we erase any inline info that may have been set for generics has it is not needed here,
- // and in fact it will pass the wrong info to the inliner code
- *ExactContextHnd = nullptr;
+ call = gtNewHelperCallNode(CORINFO_HELP_READYTORUN_DELEGATE_CTOR, TYP_VOID, GTF_EXCEPT, helperArgs);
- call->gtCall.gtCallMethHnd = alternateCtor;
+ CORINFO_LOOKUP entryPoint;
+ info.compCompHnd->getReadyToRunDelegateCtorHelper(ldftnToken, clsHnd, &entryPoint);
+ assert(!entryPoint.lookupKind.needsRuntimeLookup);
+ call->setEntryPoint(entryPoint.constLookup);
+ }
+ }
+ else
+#endif
+ if (targetMethodHnd != nullptr)
+ {
+ CORINFO_METHOD_HANDLE alternateCtor = nullptr;
+ DelegateCtorArgs ctorData;
+ ctorData.pMethod = info.compMethodHnd;
+ ctorData.pArg3 = nullptr;
+ ctorData.pArg4 = nullptr;
+ ctorData.pArg5 = nullptr;
- noway_assert(call->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp2 == nullptr);
- if (ctorData.pArg3)
- {
- call->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp2 =
- gtNewArgList(gtNewIconHandleNode(size_t(ctorData.pArg3), GTF_ICON_FTN_ADDR));
+ alternateCtor = info.compCompHnd->GetDelegateCtor(methHnd, clsHnd, targetMethodHnd, &ctorData);
+ if (alternateCtor != methHnd)
+ {
+ // we erase any inline info that may have been set for generics has it is not needed here,
+ // and in fact it will pass the wrong info to the inliner code
+ *ExactContextHnd = nullptr;
- if (ctorData.pArg4)
- {
- call->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp2->gtOp.gtOp2 =
- gtNewArgList(gtNewIconHandleNode(size_t(ctorData.pArg4), GTF_ICON_FTN_ADDR));
+ call->gtCallMethHnd = alternateCtor;
- if (ctorData.pArg5)
- {
- call->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp2->gtOp.gtOp2->gtOp.gtOp2 =
- gtNewArgList(gtNewIconHandleNode(size_t(ctorData.pArg5), GTF_ICON_FTN_ADDR));
- }
- }
- }
+ noway_assert(call->gtCallArgs->Rest()->Rest() == nullptr);
+ GenTreeArgList* addArgs = nullptr;
+ if (ctorData.pArg5)
+ {
+ GenTreePtr arg5 = gtNewIconHandleNode(size_t(ctorData.pArg5), GTF_ICON_FTN_ADDR);
+ addArgs = gtNewListNode(arg5, addArgs);
+ }
+ if (ctorData.pArg4)
+ {
+ GenTreePtr arg4 = gtNewIconHandleNode(size_t(ctorData.pArg4), GTF_ICON_FTN_ADDR);
+ addArgs = gtNewListNode(arg4, addArgs);
+ }
+ if (ctorData.pArg3)
+ {
+ GenTreePtr arg3 = gtNewIconHandleNode(size_t(ctorData.pArg3), GTF_ICON_FTN_ADDR);
+ addArgs = gtNewListNode(arg3, addArgs);
}
+ call->gtCallArgs->Rest()->Rest() = addArgs;
}
}
-
return call;
}
@@ -7478,7 +7586,7 @@ GenTreePtr Compiler::fgGetCritSectOfStaticMethod()
// Collectible types requires that for shared generic code, if we use the generic context paramter
// that we report it. (This is a conservative approach, we could detect some cases particularly when the
// context parameter is this that we don't need the eager reporting logic.)
- lvaGenericsContextUsed = true;
+ lvaGenericsContextUseCount++;
switch (kind.runtimeLookupKind)
{
@@ -7522,7 +7630,7 @@ GenTreePtr Compiler::fgGetCritSectOfStaticMethod()
return tree;
}
-#if !defined(_TARGET_X86_)
+#if FEATURE_EH_FUNCLETS
/*****************************************************************************
*
@@ -7614,8 +7722,15 @@ void Compiler::fgAddSyncMethodEnterExit()
assert(fgFirstBB->bbFallsThrough());
BasicBlock* tryBegBB = fgNewBBafter(BBJ_NONE, fgFirstBB, false);
+ BasicBlock* tryNextBB = tryBegBB->bbNext;
BasicBlock* tryLastBB = fgLastBB;
+ // If we have profile data the new block will inherit the next block's weight
+ if (tryNextBB->hasProfileWeight())
+ {
+ tryBegBB->inheritWeight(tryNextBB);
+ }
+
// Create a block for the fault.
assert(!tryLastBB->bbFallsThrough());
@@ -7890,7 +8005,7 @@ void Compiler::fgConvertSyncReturnToLeave(BasicBlock* block)
#endif
}
-#endif // !_TARGET_X86_
+#endif // FEATURE_EH_FUNCLETS
//------------------------------------------------------------------------
// fgAddReversePInvokeEnterExit: Add enter/exit calls for reverse PInvoke methods
@@ -7905,7 +8020,6 @@ void Compiler::fgAddReversePInvokeEnterExit()
{
assert(opts.IsReversePInvoke());
-#if COR_JIT_EE_VERSION > 460
lvaReversePInvokeFrameVar = lvaGrabTempWithImplicitUse(false DEBUGARG("Reverse Pinvoke FrameVar"));
LclVarDsc* varDsc = &lvaTable[lvaReversePInvokeFrameVar];
@@ -7952,8 +8066,6 @@ void Compiler::fgAddReversePInvokeEnterExit()
printf("\n");
}
#endif
-
-#endif // COR_JIT_EE_VERSION > 460
}
/*****************************************************************************
@@ -7989,6 +8101,16 @@ void Compiler::fgAddInternal()
{
noway_assert(!compIsForInlining());
+#ifndef LEGACY_BACKEND
+ // The RyuJIT backend requires a scratch BB into which it can safely insert a P/Invoke method prolog if one is
+ // required. Create it here.
+ if (info.compCallUnmanaged != 0)
+ {
+ fgEnsureFirstBBisScratch();
+ fgFirstBB->bbFlags |= BBF_DONT_REMOVE;
+ }
+#endif // !LEGACY_BACKEND
+
/*
<BUGNUM> VSW441487 </BUGNUM>
@@ -8017,8 +8139,8 @@ void Compiler::fgAddInternal()
lva0CopiedForGenericsCtxt = false;
#endif // JIT32_GCENCODER
noway_assert(lva0CopiedForGenericsCtxt || !lvaTable[info.compThisArg].lvAddrExposed);
- noway_assert(!lvaTable[info.compThisArg].lvArgWrite);
- noway_assert(lvaTable[lvaArg0Var].lvAddrExposed || lvaTable[lvaArg0Var].lvArgWrite ||
+ noway_assert(!lvaTable[info.compThisArg].lvHasILStoreOp);
+ noway_assert(lvaTable[lvaArg0Var].lvAddrExposed || lvaTable[lvaArg0Var].lvHasILStoreOp ||
lva0CopiedForGenericsCtxt);
var_types thisType = lvaTable[info.compThisArg].TypeGet();
@@ -8107,7 +8229,7 @@ void Compiler::fgAddInternal()
// If all BBJ_RETURN blocks have a valid profiled weights
// then allProfWeight will be true, else it is false
//
- if ((block->bbFlags & BBF_PROF_WEIGHT) == 0)
+ if (!block->hasProfileWeight())
{
allProfWeight = false;
}
@@ -8144,7 +8266,7 @@ void Compiler::fgAddInternal()
}
}
-#if !defined(_TARGET_X86_)
+#if FEATURE_EH_FUNCLETS
// Add the synchronized method enter/exit calls and try/finally protection. Note
// that this must happen before the one BBJ_RETURN block is created below, so the
// BBJ_RETURN block gets placed at the top-level, not within an EH region. (Otherwise,
@@ -8154,7 +8276,7 @@ void Compiler::fgAddInternal()
{
fgAddSyncMethodEnterExit();
}
-#endif // !_TARGET_X86_
+#endif // FEATURE_EH_FUNCLETS
if (oneReturn)
{
@@ -8373,7 +8495,7 @@ void Compiler::fgAddInternal()
#endif
}
-#if defined(_TARGET_X86_)
+#if !FEATURE_EH_FUNCLETS
/* Is this a 'synchronized' method? */
@@ -8449,7 +8571,7 @@ void Compiler::fgAddInternal()
syncEndEmitCookie = NULL;
}
-#endif // _TARGET_X86_
+#endif // !FEATURE_EH_FUNCLETS
/* Do we need to do runtime call out to check the security? */
@@ -8937,12 +9059,29 @@ void Compiler::fgFindOperOrder()
}
}
-/*****************************************************************************/
+//------------------------------------------------------------------------
+// fgSimpleLowering: do full walk of all IR, lowering selected operations
+// and computing lvaOutgoingArgumentAreaSize.
+//
+// Notes:
+// Lowers GT_ARR_LENGTH, GT_ARR_BOUNDS_CHECK, and GT_SIMD_CHK.
+//
+// For target ABIs with fixed out args area, computes upper bound on
+// the size of this area from the calls in the IR.
+//
+// Outgoing arg area size is computed here because we want to run it
+// after optimization (in case calls are removed) and need to look at
+// all possible calls in the method.
+
void Compiler::fgSimpleLowering()
{
+#if FEATURE_FIXED_OUT_ARGS
+ unsigned outgoingArgSpaceSize = 0;
+#endif // FEATURE_FIXED_OUT_ARGS
+
for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
{
- // Walk the statement trees in this basic block, converting ArrLength nodes.
+ // Walk the statement trees in this basic block.
compCurBB = block; // Used in fgRngChkTarget.
#ifdef LEGACY_BACKEND
@@ -8956,74 +9095,155 @@ void Compiler::fgSimpleLowering()
{
{
#endif
- if (tree->gtOper == GT_ARR_LENGTH)
+
+ switch (tree->OperGet())
{
- GenTreeArrLen* arrLen = tree->AsArrLen();
- GenTreePtr arr = arrLen->gtArrLen.ArrRef();
- GenTreePtr add;
- GenTreePtr con;
+ case GT_ARR_LENGTH:
+ {
+ GenTreeArrLen* arrLen = tree->AsArrLen();
+ GenTreePtr arr = arrLen->gtArrLen.ArrRef();
+ GenTreePtr add;
+ GenTreePtr con;
- /* Create the expression "*(array_addr + ArrLenOffs)" */
+ /* Create the expression "*(array_addr + ArrLenOffs)" */
- noway_assert(arr->gtNext == tree);
+ noway_assert(arr->gtNext == tree);
- noway_assert(arrLen->ArrLenOffset() == offsetof(CORINFO_Array, length) ||
- arrLen->ArrLenOffset() == offsetof(CORINFO_String, stringLen));
+ noway_assert(arrLen->ArrLenOffset() == offsetof(CORINFO_Array, length) ||
+ arrLen->ArrLenOffset() == offsetof(CORINFO_String, stringLen));
- if ((arr->gtOper == GT_CNS_INT) && (arr->gtIntCon.gtIconVal == 0))
- {
- // If the array is NULL, then we should get a NULL reference
- // exception when computing its length. We need to maintain
- // an invariant where there is no sum of two constants node, so
- // let's simply return an indirection of NULL.
+ if ((arr->gtOper == GT_CNS_INT) && (arr->gtIntCon.gtIconVal == 0))
+ {
+ // If the array is NULL, then we should get a NULL reference
+ // exception when computing its length. We need to maintain
+ // an invariant where there is no sum of two constants node, so
+ // let's simply return an indirection of NULL.
- add = arr;
- }
- else
- {
- con = gtNewIconNode(arrLen->ArrLenOffset(), TYP_I_IMPL);
- con->gtRsvdRegs = 0;
+ add = arr;
+ }
+ else
+ {
+ con = gtNewIconNode(arrLen->ArrLenOffset(), TYP_I_IMPL);
+ con->gtRsvdRegs = 0;
- add = gtNewOperNode(GT_ADD, TYP_REF, arr, con);
- add->gtRsvdRegs = arr->gtRsvdRegs;
+ add = gtNewOperNode(GT_ADD, TYP_REF, arr, con);
+ add->gtRsvdRegs = arr->gtRsvdRegs;
#ifdef LEGACY_BACKEND
- con->gtCopyFPlvl(arr);
+ con->gtCopyFPlvl(arr);
- add->gtCopyFPlvl(arr);
- add->CopyCosts(arr);
+ add->gtCopyFPlvl(arr);
+ add->CopyCosts(arr);
- arr->gtNext = con;
- con->gtPrev = arr;
+ arr->gtNext = con;
+ con->gtPrev = arr;
- con->gtNext = add;
- add->gtPrev = con;
+ con->gtNext = add;
+ add->gtPrev = con;
- add->gtNext = tree;
- tree->gtPrev = add;
+ add->gtNext = tree;
+ tree->gtPrev = add;
#else
- range.InsertAfter(arr, con, add);
+ range.InsertAfter(arr, con, add);
#endif
- }
+ }
- // Change to a GT_IND.
- tree->ChangeOperUnchecked(GT_IND);
+ // Change to a GT_IND.
+ tree->ChangeOperUnchecked(GT_IND);
- tree->gtOp.gtOp1 = add;
- }
- else if (tree->OperGet() == GT_ARR_BOUNDS_CHECK
+ tree->gtOp.gtOp1 = add;
+ break;
+ }
+
+ case GT_ARR_BOUNDS_CHECK:
#ifdef FEATURE_SIMD
- || tree->OperGet() == GT_SIMD_CHK
+ case GT_SIMD_CHK:
#endif // FEATURE_SIMD
- )
- {
- // Add in a call to an error routine.
- fgSetRngChkTarget(tree, false);
+ {
+ // Add in a call to an error routine.
+ fgSetRngChkTarget(tree, false);
+ break;
+ }
+
+#if FEATURE_FIXED_OUT_ARGS
+ case GT_CALL:
+ {
+ GenTreeCall* call = tree->AsCall();
+ // Fast tail calls use the caller-supplied scratch
+ // space so have no impact on this method's outgoing arg size.
+ if (!call->IsFastTailCall())
+ {
+ // Update outgoing arg size to handle this call
+ const unsigned thisCallOutAreaSize = call->fgArgInfo->GetOutArgSize();
+ assert(thisCallOutAreaSize >= MIN_ARG_AREA_FOR_CALL);
+
+ if (thisCallOutAreaSize > outgoingArgSpaceSize)
+ {
+ outgoingArgSpaceSize = thisCallOutAreaSize;
+ JITDUMP("Bumping outgoingArgSpaceSize to %u for call [%06d]\n", outgoingArgSpaceSize,
+ dspTreeID(tree));
+ }
+ else
+ {
+ JITDUMP("outgoingArgSpaceSize %u sufficient for call [%06d], which needs %u\n",
+ outgoingArgSpaceSize, dspTreeID(tree), thisCallOutAreaSize);
+ }
+ }
+ else
+ {
+ JITDUMP("outgoingArgSpaceSize not impacted by fast tail call [%06d]\n", dspTreeID(tree));
+ }
+ break;
+ }
+#endif // FEATURE_FIXED_OUT_ARGS
+
+ default:
+ {
+ // No other operators need processing.
+ break;
+ }
}
- }
+ } // foreach gtNext
+ } // foreach Stmt
+ } // foreach BB
+
+#if FEATURE_FIXED_OUT_ARGS
+ // Finish computing the outgoing args area size
+ //
+ // Need to make sure the MIN_ARG_AREA_FOR_CALL space is added to the frame if:
+ // 1. there are calls to THROW_HEPLPER methods.
+ // 2. we are generating profiling Enter/Leave/TailCall hooks. This will ensure
+ // that even methods without any calls will have outgoing arg area space allocated.
+ //
+ // An example for these two cases is Windows Amd64, where the ABI requires to have 4 slots for
+ // the outgoing arg space if the method makes any calls.
+ if (outgoingArgSpaceSize < MIN_ARG_AREA_FOR_CALL)
+ {
+ if (compUsesThrowHelper || compIsProfilerHookNeeded())
+ {
+ outgoingArgSpaceSize = MIN_ARG_AREA_FOR_CALL;
+ JITDUMP("Bumping outgoingArgSpaceSize to %u for throw helper or profile hook", outgoingArgSpaceSize);
}
}
+ // If a function has localloc, we will need to move the outgoing arg space when the
+ // localloc happens. When we do this, we need to maintain stack alignment. To avoid
+ // leaving alignment-related holes when doing this move, make sure the outgoing
+ // argument space size is a multiple of the stack alignment by aligning up to the next
+ // stack alignment boundary.
+ if (compLocallocUsed)
+ {
+ outgoingArgSpaceSize = (unsigned)roundUp(outgoingArgSpaceSize, STACK_ALIGN);
+ JITDUMP("Bumping outgoingArgSpaceSize to %u for localloc", outgoingArgSpaceSize);
+ }
+
+ // Publish the final value and mark it as read only so any update
+ // attempt later will cause an assert.
+ lvaOutgoingArgSpaceSize = outgoingArgSpaceSize;
+ lvaOutgoingArgSpaceSize.MarkAsReadOnly();
+
+#endif // FEATURE_FIXED_OUT_ARGS
+
#ifdef DEBUG
if (verbose && fgRngChkThrowAdded)
{
@@ -9695,8 +9915,7 @@ void Compiler::fgCompactBlocks(BasicBlock* block, BasicBlock* bNext)
// or if both block and bNext have non-zero weights
// then we select the highest weight block.
- if ((block->bbFlags & BBF_PROF_WEIGHT) || (bNext->bbFlags & BBF_PROF_WEIGHT) ||
- (block->bbWeight && bNext->bbWeight))
+ if (block->hasProfileWeight() || bNext->hasProfileWeight() || (block->bbWeight && bNext->bbWeight))
{
// We are keeping block so update its fields
// when bNext has a greater weight
@@ -11001,7 +11220,7 @@ bool Compiler::fgExpandRarelyRunBlocks()
NEW_RARELY_RUN:
/* If the weight of the block was obtained from a profile run,
than it's more accurate than our static analysis */
- if (bPrev->bbFlags & BBF_PROF_WEIGHT)
+ if (bPrev->hasProfileWeight())
{
continue;
}
@@ -11187,10 +11406,10 @@ bool Compiler::fgExpandRarelyRunBlocks()
// if bPrev->bbWeight is not based upon profile data we can adjust
// the weights of bPrev and block
//
- else if (bPrev->isBBCallAlwaysPair() && // we must have a BBJ_CALLFINALLY and BBK_ALWAYS pair
- (bPrev->bbWeight != block->bbWeight) && // the weights are currently different
- ((bPrev->bbFlags & BBF_PROF_WEIGHT) == 0)) // and the BBJ_CALLFINALLY block is not using profiled
- // weights
+ else if (bPrev->isBBCallAlwaysPair() && // we must have a BBJ_CALLFINALLY and BBK_ALWAYS pair
+ (bPrev->bbWeight != block->bbWeight) && // the weights are currently different
+ !bPrev->hasProfileWeight()) // and the BBJ_CALLFINALLY block is not using profiled
+ // weights
{
if (block->isRunRarely())
{
@@ -12126,7 +12345,8 @@ bool Compiler::fgRelocateEHRegions()
}
// Currently it is not good to move the rarely run handler regions to the end of the method
- // because fgDetermineFirstColdBlock() must put the start of any handler region in the hot section.
+ // because fgDetermineFirstColdBlock() must put the start of any handler region in the hot
+ // section.
CLANG_FORMAT_COMMENT_ANCHOR;
#if 0
@@ -12356,7 +12576,7 @@ void Compiler::fgPrintEdgeWeights()
if (edge->flEdgeWeightMin < BB_MAX_WEIGHT)
{
- printf("(%s", refCntWtd2str(edge->flEdgeWeightMin));
+ printf("(%u", edge->flEdgeWeightMin);
}
else
{
@@ -12366,7 +12586,7 @@ void Compiler::fgPrintEdgeWeights()
{
if (edge->flEdgeWeightMax < BB_MAX_WEIGHT)
{
- printf("..%s", refCntWtd2str(edge->flEdgeWeightMax));
+ printf("..%u", edge->flEdgeWeightMax);
}
else
{
@@ -12429,7 +12649,7 @@ void Compiler::fgComputeEdgeWeights()
}
#endif // DEBUG
fgHaveValidEdgeWeights = false;
- fgCalledWeight = BB_UNITY_WEIGHT;
+ fgCalledCount = BB_UNITY_WEIGHT;
}
#if DEBUG
@@ -12468,7 +12688,7 @@ void Compiler::fgComputeEdgeWeights()
for (bDst = fgFirstBB; bDst != nullptr; bDst = bDst->bbNext)
{
- if (((bDst->bbFlags & BBF_PROF_WEIGHT) == 0) && (bDst->bbPreds != nullptr))
+ if (!bDst->hasProfileWeight() && (bDst->bbPreds != nullptr))
{
BasicBlock* bOnlyNext;
@@ -12495,7 +12715,7 @@ void Compiler::fgComputeEdgeWeights()
bOnlyNext = nullptr;
}
- if ((bOnlyNext == bDst) && ((bSrc->bbFlags & BBF_PROF_WEIGHT) != 0))
+ if ((bOnlyNext == bDst) && bSrc->hasProfileWeight())
{
// We know the exact weight of bDst
newWeight = bSrc->bbWeight;
@@ -12547,8 +12767,7 @@ void Compiler::fgComputeEdgeWeights()
// Sum up the weights of all of the return blocks and throw blocks
// This is used when we have a back-edge into block 1
//
- if (((bDst->bbFlags & BBF_PROF_WEIGHT) != 0) &&
- ((bDst->bbJumpKind == BBJ_RETURN) || (bDst->bbJumpKind == BBJ_THROW)))
+ if (bDst->hasProfileWeight() && ((bDst->bbJumpKind == BBJ_RETURN) || (bDst->bbJumpKind == BBJ_THROW)))
{
returnWeight += bDst->bbWeight;
}
@@ -12568,25 +12787,57 @@ void Compiler::fgComputeEdgeWeights()
}
#endif
- // When we are not using profile data we have already setup fgCalledWeight
+ // When we are not using profile data we have already setup fgCalledCount
// only set it here if we are using profile data
//
if (fgIsUsingProfileWeights())
{
- // If the first block has one ref then it's weight is the fgCalledWeight
- // otherwise we have backedge's into the first block so instead
- // we use the sum of the return block weights.
- // If the profile data has a 0 for the returnWeoght
- // then just use the first block weight rather than the 0
+ BasicBlock* firstILBlock = fgFirstBB; // The first block for IL code (i.e. for the IL code at offset 0)
+
+ // Do we have an internal block as our first Block?
+ if (firstILBlock->bbFlags & BBF_INTERNAL)
+ {
+ // Skip past any/all BBF_INTERNAL blocks that may have been added before the first real IL block.
+ //
+ while (firstILBlock->bbFlags & BBF_INTERNAL)
+ {
+ firstILBlock = firstILBlock->bbNext;
+ }
+ // The 'firstILBlock' is now expected to have a profile-derived weight
+ assert(firstILBlock->hasProfileWeight());
+ }
+
+ // If the first block only has one ref then we use it's weight for fgCalledCount.
+ // Otherwise we have backedge's into the first block, so instead we use the sum
+ // of the return block weights for fgCalledCount.
+ //
+ // If the profile data has a 0 for the returnWeight
+ // (i.e. the function never returns because it always throws)
+ // then just use the first block weight rather than 0.
//
- if ((fgFirstBB->countOfInEdges() == 1) || (returnWeight == 0))
+ if ((firstILBlock->countOfInEdges() == 1) || (returnWeight == 0))
{
- fgCalledWeight = fgFirstBB->bbWeight;
+ assert(firstILBlock->hasProfileWeight()); // This should always be a profile-derived weight
+ fgCalledCount = firstILBlock->bbWeight;
}
else
{
- fgCalledWeight = returnWeight;
+ fgCalledCount = returnWeight;
}
+
+ // If we allocated a scratch block as the first BB then we need
+ // to set its profile-derived weight to be fgCalledCount
+ if (fgFirstBBisScratch())
+ {
+ fgFirstBB->setBBProfileWeight(fgCalledCount);
+ }
+
+#if DEBUG
+ if (verbose)
+ {
+ printf("We are using the Profile Weights and fgCalledCount is %d.\n", fgCalledCount);
+ }
+#endif
}
// Now we will compute the initial flEdgeWeightMin and flEdgeWeightMax values
@@ -12599,7 +12850,7 @@ void Compiler::fgComputeEdgeWeights()
//
if (bDst == fgFirstBB)
{
- bDstWeight -= fgCalledWeight;
+ bDstWeight -= fgCalledCount;
}
for (edge = bDst->bbPreds; edge != nullptr; edge = edge->flNext)
@@ -12616,7 +12867,7 @@ void Compiler::fgComputeEdgeWeights()
// then we must reset any values that they currently have
//
- if (((bSrc->bbFlags & BBF_PROF_WEIGHT) == 0) || ((bDst->bbFlags & BBF_PROF_WEIGHT) == 0))
+ if (!bSrc->hasProfileWeight() || !bDst->hasProfileWeight())
{
edge->flEdgeWeightMin = BB_ZERO_WEIGHT;
edge->flEdgeWeightMax = BB_MAX_WEIGHT;
@@ -12764,7 +13015,7 @@ void Compiler::fgComputeEdgeWeights()
//
if (bDst == fgFirstBB)
{
- bDstWeight -= fgCalledWeight;
+ bDstWeight -= fgCalledCount;
}
UINT64 minEdgeWeightSum = 0;
@@ -13000,7 +13251,7 @@ bool Compiler::fgOptimizeBranchToEmptyUnconditional(BasicBlock* block, BasicBloc
// When we optimize a branch to branch we need to update the profile weight
// of bDest by subtracting out the block/edge weight of the path that is being optimized.
//
- if (fgHaveValidEdgeWeights && ((bDest->bbFlags & BBF_PROF_WEIGHT) != 0))
+ if (fgHaveValidEdgeWeights && bDest->hasProfileWeight())
{
flowList* edge1 = fgGetPredForBlock(bDest, block);
noway_assert(edge1 != nullptr);
@@ -13333,7 +13584,7 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block)
// When we optimize a branch to branch we need to update the profile weight
// of bDest by subtracting out the block/edge weight of the path that is being optimized.
//
- if (fgIsUsingProfileWeights() && ((bDest->bbFlags & BBF_PROF_WEIGHT) != 0))
+ if (fgIsUsingProfileWeights() && bDest->hasProfileWeight())
{
if (fgHaveValidEdgeWeights)
{
@@ -13718,10 +13969,9 @@ bool Compiler::fgOptimizeUncondBranchToSimpleCond(BasicBlock* block, BasicBlock*
// add an unconditional block after this block to jump to the target block's fallthrough block
BasicBlock* next = fgNewBBafter(BBJ_ALWAYS, block, true);
- next->bbFlags = block->bbFlags | BBF_INTERNAL;
- next->bbFlags &= ~(BBF_TRY_BEG | BBF_LOOP_HEAD | BBF_LOOP_CALL0 | BBF_LOOP_CALL1 | BBF_HAS_LABEL | BBF_JMP_TARGET |
- BBF_FUNCLET_BEG | BBF_LOOP_PREHEADER | BBF_KEEP_BBJ_ALWAYS);
+ // The new block 'next' will inherit its weight from 'block'
+ next->inheritWeight(block);
next->bbJumpDest = target->bbNext;
target->bbNext->bbFlags |= BBF_JMP_TARGET;
fgAddRefPred(next, block);
@@ -14410,8 +14660,7 @@ void Compiler::fgReorderBlocks()
BasicBlock::weight_t profHotWeight = -1;
- if ((bPrev->bbFlags & BBF_PROF_WEIGHT) && (block->bbFlags & BBF_PROF_WEIGHT) &&
- ((bDest == nullptr) || (bDest->bbFlags & BBF_PROF_WEIGHT)))
+ if (bPrev->hasProfileWeight() && block->hasProfileWeight() && ((bDest == nullptr) || bDest->hasProfileWeight()))
{
//
// All blocks have profile information
@@ -17407,12 +17656,10 @@ unsigned Compiler::acdHelper(SpecialCodeKind codeKind)
{
case SCK_RNGCHK_FAIL:
return CORINFO_HELP_RNGCHKFAIL;
-#if COR_JIT_EE_VERSION > 460
case SCK_ARG_EXCPN:
return CORINFO_HELP_THROW_ARGUMENTEXCEPTION;
case SCK_ARG_RNG_EXCPN:
return CORINFO_HELP_THROW_ARGUMENTOUTOFRANGEEXCEPTION;
-#endif // COR_JIT_EE_VERSION
case SCK_DIV_BY_ZERO:
return CORINFO_HELP_THROWDIVZERO;
case SCK_ARITH_EXCPN:
@@ -17472,10 +17719,28 @@ BasicBlock* Compiler::fgAddCodeRef(BasicBlock* srcBlk, unsigned refData, Special
// this restriction could be removed with more careful code
// generation for BBJ_THROW (i.e. range check failed).
//
+ // For Linux/x86, we possibly need to insert stack alignment adjustment
+ // before the first stack argument pushed for every call. But we
+ // don't know what the stack alignment adjustment will be when
+ // we morph a tree that calls fgAddCodeRef(), so the stack depth
+ // number will be incorrect. For now, simply force all functions with
+ // these helpers to have EBP frames. It might be possible to make
+ // this less conservative. E.g., for top-level (not nested) calls
+ // without stack args, the stack pointer hasn't changed and stack
+ // depth will be known to be zero. Or, figure out a way to update
+ // or generate all required helpers after all stack alignment
+ // has been added, and the stack level at each call to fgAddCodeRef()
+ // is known, or can be recalculated.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(UNIX_X86_ABI)
+ codeGen->setFrameRequired(true);
+#else // !defined(UNIX_X86_ABI)
if (add->acdStkLvl != stkDepth)
{
codeGen->setFrameRequired(true);
}
+#endif // !defined(UNIX_X86_ABI)
#endif // _TARGET_X86_
return add->acdDstBlk;
@@ -17539,14 +17804,12 @@ BasicBlock* Compiler::fgAddCodeRef(BasicBlock* srcBlk, unsigned refData, Special
case SCK_OVERFLOW:
msg = " for OVERFLOW";
break;
-#if COR_JIT_EE_VERSION > 460
case SCK_ARG_EXCPN:
msg = " for ARG_EXCPN";
break;
case SCK_ARG_RNG_EXCPN:
msg = " for ARG_RNG_EXCPN";
break;
-#endif // COR_JIT_EE_VERSION
default:
msg = " for ??";
break;
@@ -17593,7 +17856,6 @@ BasicBlock* Compiler::fgAddCodeRef(BasicBlock* srcBlk, unsigned refData, Special
noway_assert(SCK_OVERFLOW == SCK_ARITH_EXCPN);
break;
-#if COR_JIT_EE_VERSION > 460
case SCK_ARG_EXCPN:
helper = CORINFO_HELP_THROW_ARGUMENTEXCEPTION;
break;
@@ -17601,7 +17863,6 @@ BasicBlock* Compiler::fgAddCodeRef(BasicBlock* srcBlk, unsigned refData, Special
case SCK_ARG_RNG_EXCPN:
helper = CORINFO_HELP_THROW_ARGUMENTOUTOFRANGEEXCEPTION;
break;
-#endif // COR_JIT_EE_VERSION
// case SCK_PAUSE_EXEC:
// noway_assert(!"add code to pause exec");
@@ -18819,7 +19080,7 @@ FILE* Compiler::fgOpenFlowGraphFile(bool* wbDontClose, Phases phase, LPCWSTR typ
if (wcscmp(filename, W("profiled")) == 0)
{
- if ((fgFirstBB->bbFlags & BBF_PROF_WEIGHT) != 0)
+ if (fgFirstBB->hasProfileWeight())
{
createDuplicateFgxFiles = true;
goto ONE_FILE_PER_METHOD;
@@ -19009,7 +19270,7 @@ bool Compiler::fgDumpFlowGraph(Phases phase)
return false;
}
bool validWeights = fgHaveValidEdgeWeights;
- unsigned calledCount = max(fgCalledWeight, BB_UNITY_WEIGHT) / BB_UNITY_WEIGHT;
+ unsigned calledCount = max(fgCalledCount, BB_UNITY_WEIGHT) / BB_UNITY_WEIGHT;
double weightDivisor = (double)(calledCount * BB_UNITY_WEIGHT);
const char* escapedString;
const char* regionString = "NONE";
@@ -19124,7 +19385,7 @@ bool Compiler::fgDumpFlowGraph(Phases phase)
{
fprintf(fgxFile, "\n inHandler=\"%s\"", "true");
}
- if (((fgFirstBB->bbFlags & BBF_PROF_WEIGHT) != 0) && ((block->bbFlags & BBF_COLD) == 0))
+ if ((fgFirstBB->hasProfileWeight()) && ((block->bbFlags & BBF_COLD) == 0))
{
fprintf(fgxFile, "\n hot=\"true\"");
}
@@ -19397,8 +19658,28 @@ void Compiler::fgTableDispBasicBlock(BasicBlock* block, int ibcColWidth /* = 0 *
}
else
{
- printf("%6s", refCntWtd2str(block->getBBWeight(this)));
+ BasicBlock::weight_t weight = block->getBBWeight(this);
+
+ if (weight > 99999) // Is it going to be more than 6 characters?
+ {
+ if (weight <= 99999 * BB_UNITY_WEIGHT)
+ {
+ // print weight in this format ddddd.
+ printf("%5u.", (weight + (BB_UNITY_WEIGHT / 2)) / BB_UNITY_WEIGHT);
+ }
+ else // print weight in terms of k (i.e. 156k )
+ {
+ // print weight in this format dddddk
+ BasicBlock::weight_t weightK = weight / 1000;
+ printf("%5uk", (weightK + (BB_UNITY_WEIGHT / 2)) / BB_UNITY_WEIGHT);
+ }
+ }
+ else // print weight in this format ddd.dd
+ {
+ printf("%6s", refCntWtd2str(weight));
+ }
}
+ printf(" ");
//
// Display optional IBC weight column.
@@ -19407,7 +19688,7 @@ void Compiler::fgTableDispBasicBlock(BasicBlock* block, int ibcColWidth /* = 0 *
if (ibcColWidth > 0)
{
- if (block->bbFlags & BBF_PROF_WEIGHT)
+ if (block->hasProfileWeight())
{
printf("%*u", ibcColWidth, block->bbWeight);
}
@@ -19661,7 +19942,7 @@ void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock, BasicBlock* lastBlock,
int ibcColWidth = 0;
for (block = firstBlock; block != nullptr; block = block->bbNext)
{
- if (block->bbFlags & BBF_PROF_WEIGHT)
+ if (block->hasProfileWeight())
{
int thisIbcWidth = CountDigits(block->bbWeight);
ibcColWidth = max(ibcColWidth, thisIbcWidth);
@@ -19686,11 +19967,11 @@ void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock, BasicBlock* lastBlock,
// clang-format off
printf("\n");
- printf("------%*s------------------------------------%*s-----------------------%*s----------------------------------------\n",
+ printf("------%*s-------------------------------------%*s-----------------------%*s----------------------------------------\n",
padWidth, "------------",
ibcColWidth, "------------",
maxBlockNumWidth, "----");
- printf("BBnum %*sdescAddr ref try hnd %s weight %*s%s [IL range] [jump]%*s [EH region] [flags]\n",
+ printf("BBnum %*sdescAddr ref try hnd %s weight %*s%s [IL range] [jump]%*s [EH region] [flags]\n",
padWidth, "",
fgCheapPredsValid ? "cheap preds" :
(fgComputePredsDone ? "preds "
@@ -19700,7 +19981,7 @@ void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock, BasicBlock* lastBlock,
: ""),
maxBlockNumWidth, ""
);
- printf("------%*s------------------------------------%*s-----------------------%*s----------------------------------------\n",
+ printf("------%*s-------------------------------------%*s-----------------------%*s----------------------------------------\n",
padWidth, "------------",
ibcColWidth, "------------",
maxBlockNumWidth, "----");
@@ -19724,16 +20005,16 @@ void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock, BasicBlock* lastBlock,
if (block == fgFirstColdBlock)
{
- printf("~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~~~"
- "~~~~~~~~~~~~~~~\n",
+ printf("~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~~"
+ "~~~~~~~~~~~~~~~~\n",
padWidth, "~~~~~~~~~~~~", ibcColWidth, "~~~~~~~~~~~~", maxBlockNumWidth, "~~~~");
}
#if FEATURE_EH_FUNCLETS
if (block == fgFirstFuncletBB)
{
- printf("++++++%*s++++++++++++++++++++++++++++++++++++%*s+++++++++++++++++++++++%*s+++++++++++++++++++++++++"
- "+++++++++++++++ funclets follow\n",
+ printf("++++++%*s+++++++++++++++++++++++++++++++++++++%*s+++++++++++++++++++++++%*s++++++++++++++++++++++++"
+ "++++++++++++++++ funclets follow\n",
padWidth, "++++++++++++", ibcColWidth, "++++++++++++", maxBlockNumWidth, "++++");
}
#endif // FEATURE_EH_FUNCLETS
@@ -19746,8 +20027,8 @@ void Compiler::fgDispBasicBlocks(BasicBlock* firstBlock, BasicBlock* lastBlock,
}
}
- printf("------%*s------------------------------------%*s-----------------------%*s---------------------------------"
- "-------\n",
+ printf("------%*s-------------------------------------%*s-----------------------%*s--------------------------------"
+ "--------\n",
padWidth, "------------", ibcColWidth, "------------", maxBlockNumWidth, "----");
if (dumpTrees)
@@ -20283,10 +20564,11 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef
// Should never expose the address of arg 0 or write to arg 0.
// In addition, lvArg0Var should remain 0 if arg0 is not
// written to or address-exposed.
- noway_assert(compThisArgAddrExposedOK && !lvaTable[info.compThisArg].lvArgWrite &&
- (lvaArg0Var == info.compThisArg ||
- lvaArg0Var != info.compThisArg && (lvaTable[lvaArg0Var].lvAddrExposed ||
- lvaTable[lvaArg0Var].lvArgWrite || copiedForGenericsCtxt)));
+ noway_assert(
+ compThisArgAddrExposedOK && !lvaTable[info.compThisArg].lvHasILStoreOp &&
+ (lvaArg0Var == info.compThisArg ||
+ lvaArg0Var != info.compThisArg &&
+ (lvaTable[lvaArg0Var].lvAddrExposed || lvaTable[lvaArg0Var].lvHasILStoreOp || copiedForGenericsCtxt)));
}
}
@@ -20496,7 +20778,7 @@ void Compiler::fgDebugCheckFlags(GenTreePtr tree)
if ((treeFlags & GTF_EXCEPT) && !(chkFlags & GTF_EXCEPT))
{
- switch (eeGetHelperNum(tree->gtCall.gtCallMethHnd))
+ switch (eeGetHelperNum(call->gtCallMethHnd))
{
// Is this a helper call that can throw an exception ?
case CORINFO_HELP_LDIV:
@@ -21048,6 +21330,7 @@ void Compiler::fgInline()
}
// See if we need to replace the return value place holder.
+ // Also, see if this update enables further devirtualization.
fgWalkTreePre(&stmt->gtStmtExpr, fgUpdateInlineReturnExpressionPlaceHolder, (void*)this);
// See if stmt is of the form GT_COMMA(call, nop)
@@ -21319,11 +21602,46 @@ void Compiler::fgAttachStructInlineeToAsg(GenTreePtr tree, GenTreePtr child, COR
#endif // FEATURE_MULTIREG_RET
-/*****************************************************************************
- * Callback to replace the inline return expression place holder (GT_RET_EXPR)
- */
+//------------------------------------------------------------------------
+// fgUpdateInlineReturnExpressionPlaceHolder: callback to replace the
+// inline return expression placeholder.
+//
+// Arguments:
+// pTree -- pointer to tree to examine for updates
+// data -- context data for the tree walk
+//
+// Returns:
+// fgWalkResult indicating the walk should continue; that
+// is we wish to fully explore the tree.
+//
+// Notes:
+// Looks for GT_RET_EXPR nodes that arose from tree splitting done
+// during importation for inline candidates, and replaces them.
+//
+// For successful inlines, substitutes the return value expression
+// from the inline body for the GT_RET_EXPR.
+//
+// For failed inlines, rejoins the original call into the tree from
+// whence it was split during importation.
+//
+// The code doesn't actually know if the corresponding inline
+// succeeded or not; it relies on the fact that gtInlineCandidate
+// initially points back at the call and is modified in place to
+// the inlinee return expression if the inline is successful (see
+// tail end of fgInsertInlineeBlocks for the update of iciCall).
+//
+// If the parent of the GT_RET_EXPR is a virtual call,
+// devirtualization is attempted. This should only succeed in the
+// successful inline case, when the inlinee's return value
+// expression provides a better type than the return type of the
+// method. Note for failed inlines, the devirtualizer can only go
+// by the return type, and any devirtualization that type enabled
+// would have already happened during importation.
+//
+// If the return type is a struct type and we're on a platform
+// where structs can be returned in multiple registers, ensure the
+// call has a suitable parent.
-/* static */
Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(GenTreePtr* pTree, fgWalkData* data)
{
GenTreePtr tree = *pTree;
@@ -21369,6 +21687,41 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(GenTr
}
#endif // DEBUG
} while (tree->gtOper == GT_RET_EXPR);
+
+ // Now see if this return value expression feeds the 'this'
+ // object at a virtual call site.
+ //
+ // Note for void returns where the inline failed, the
+ // GT_RET_EXPR may be top-level.
+ //
+ // May miss cases where there are intermediaries between call
+ // and this, eg commas.
+ GenTreePtr parentTree = data->parent;
+
+ if ((parentTree != nullptr) && (parentTree->gtOper == GT_CALL))
+ {
+ GenTreeCall* call = parentTree->AsCall();
+ bool tryLateDevirt = call->IsVirtual() && (call->gtCallObjp == tree);
+
+#ifdef DEBUG
+ tryLateDevirt = tryLateDevirt && (JitConfig.JitEnableLateDevirtualization() == 1);
+#endif // DEBUG
+
+ if (tryLateDevirt)
+ {
+#ifdef DEBUG
+ if (comp->verbose)
+ {
+ printf("**** Late devirt opportunity\n");
+ comp->gtDispTree(call);
+ }
+#endif // DEBUG
+
+ CORINFO_CALL_INFO x = {};
+ x.hMethod = call->gtCallMethHnd;
+ comp->impDevirtualizeCall(call, tree, &x, nullptr);
+ }
+ }
}
#if FEATURE_MULTIREG_RET
@@ -21784,7 +22137,7 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
}
#endif // DEBUG
- // Append statements to unpin, if necessary.
+ // Append statements to null out gc ref locals, if necessary.
fgInlineAppendStatements(pInlineInfo, iciBlock, stmtAfter);
goto _Done;
@@ -21954,7 +22307,7 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
//
fgBBcount += InlineeCompiler->fgBBcount;
- // Append statements to unpin if necessary.
+ // Append statements to null out gc ref locals, if necessary.
fgInlineAppendStatements(pInlineInfo, bottomBlock, nullptr);
#ifdef DEBUG
@@ -22009,7 +22362,7 @@ _Done:
// If there is non-NULL return, replace the GT_CALL with its return value expression,
// so later it will be picked up by the GT_RET_EXPR node.
- if ((pInlineInfo->inlineCandidateInfo->fncRetType != TYP_VOID) || (iciCall->gtCall.gtReturnType == TYP_STRUCT))
+ if ((pInlineInfo->inlineCandidateInfo->fncRetType != TYP_VOID) || (iciCall->gtReturnType == TYP_STRUCT))
{
noway_assert(pInlineInfo->retExpr);
#ifdef DEBUG
@@ -22062,7 +22415,7 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
GenTreeStmt* postStmt = callStmt->gtNextStmt;
GenTreePtr afterStmt = callStmt; // afterStmt is the place where the new statements should be inserted after.
GenTreePtr newStmt = nullptr;
- GenTreePtr call = inlineInfo->iciCall;
+ GenTreeCall* call = inlineInfo->iciCall->AsCall();
noway_assert(call->gtOper == GT_CALL);
@@ -22115,9 +22468,13 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
for (unsigned argNum = 0; argNum < inlineInfo->argCnt; argNum++)
{
- if (inlArgInfo[argNum].argHasTmp)
+ const InlArgInfo& argInfo = inlArgInfo[argNum];
+ const bool argIsSingleDef = !argInfo.argHasLdargaOp && !argInfo.argHasStargOp;
+ GenTree* const argNode = inlArgInfo[argNum].argNode;
+
+ if (argInfo.argHasTmp)
{
- noway_assert(inlArgInfo[argNum].argIsUsed);
+ noway_assert(argInfo.argIsUsed);
/* argBashTmpNode is non-NULL iff the argument's value was
referenced exactly once by the original IL. This offers an
@@ -22131,27 +22488,29 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
once) but the optimization cannot be applied.
*/
- GenTreePtr argSingleUseNode = inlArgInfo[argNum].argBashTmpNode;
+ GenTreePtr argSingleUseNode = argInfo.argBashTmpNode;
- if (argSingleUseNode && !(argSingleUseNode->gtFlags & GTF_VAR_CLONED) &&
- !inlArgInfo[argNum].argHasLdargaOp && !inlArgInfo[argNum].argHasStargOp)
+ if ((argSingleUseNode != nullptr) && !(argSingleUseNode->gtFlags & GTF_VAR_CLONED) && argIsSingleDef)
{
// Change the temp in-place to the actual argument.
// We currently do not support this for struct arguments, so it must not be a GT_OBJ.
- GenTree* argNode = inlArgInfo[argNum].argNode;
assert(argNode->gtOper != GT_OBJ);
argSingleUseNode->CopyFrom(argNode, this);
continue;
}
else
{
- /* Create the temp assignment for this argument */
+ // We're going to assign the argument value to the
+ // temp we use for it in the inline body.
+ const unsigned tmpNum = argInfo.argTmpNum;
+ const var_types argType = lclVarInfo[argNum].lclTypeInfo;
+ // Create the temp assignment for this argument
CORINFO_CLASS_HANDLE structHnd = DUMMY_INIT(0);
- if (varTypeIsStruct(lclVarInfo[argNum].lclTypeInfo))
+ if (varTypeIsStruct(argType))
{
- structHnd = gtGetStructHandleIfPresent(inlArgInfo[argNum].argNode);
+ structHnd = gtGetStructHandleIfPresent(argNode);
noway_assert(structHnd != NO_CLASS_HANDLE);
}
@@ -22159,8 +22518,16 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
// argTmpNum here since in-linee compiler instance
// would have iterated over these and marked them
// accordingly.
- impAssignTempGen(inlArgInfo[argNum].argTmpNum, inlArgInfo[argNum].argNode, structHnd,
- (unsigned)CHECK_SPILL_NONE, &afterStmt, callILOffset, block);
+ impAssignTempGen(tmpNum, argNode, structHnd, (unsigned)CHECK_SPILL_NONE, &afterStmt, callILOffset,
+ block);
+
+ // If we know the argument's value can't be
+ // changed within the method body, try and improve
+ // the type of the temp.
+ if (argIsSingleDef && (argType == TYP_REF))
+ {
+ lvaUpdateClass(tmpNum, argNode);
+ }
#ifdef DEBUG
if (verbose)
@@ -22170,44 +22537,42 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
#endif // DEBUG
}
}
- else if (inlArgInfo[argNum].argIsByRefToStructLocal)
+ else if (argInfo.argIsByRefToStructLocal)
{
- // Do nothing.
+ // Do nothing. Arg was directly substituted as we read
+ // the inlinee.
}
else
{
/* The argument is either not used or a const or lcl var */
- noway_assert(!inlArgInfo[argNum].argIsUsed || inlArgInfo[argNum].argIsInvariant ||
- inlArgInfo[argNum].argIsLclVar);
+ noway_assert(!argInfo.argIsUsed || argInfo.argIsInvariant || argInfo.argIsLclVar);
/* Make sure we didnt change argNode's along the way, or else
subsequent uses of the arg would have worked with the bashed value */
- if (inlArgInfo[argNum].argIsInvariant)
+ if (argInfo.argIsInvariant)
{
- assert(inlArgInfo[argNum].argNode->OperIsConst() || inlArgInfo[argNum].argNode->gtOper == GT_ADDR);
+ assert(argNode->OperIsConst() || argNode->gtOper == GT_ADDR);
}
- noway_assert((inlArgInfo[argNum].argIsLclVar == 0) ==
- (inlArgInfo[argNum].argNode->gtOper != GT_LCL_VAR ||
- (inlArgInfo[argNum].argNode->gtFlags & GTF_GLOB_REF)));
+ noway_assert((argInfo.argIsLclVar == 0) ==
+ (argNode->gtOper != GT_LCL_VAR || (argNode->gtFlags & GTF_GLOB_REF)));
/* If the argument has side effects, append it */
- if (inlArgInfo[argNum].argHasSideEff)
+ if (argInfo.argHasSideEff)
{
- noway_assert(inlArgInfo[argNum].argIsUsed == false);
+ noway_assert(argInfo.argIsUsed == false);
- if (inlArgInfo[argNum].argNode->gtOper == GT_OBJ ||
- inlArgInfo[argNum].argNode->gtOper == GT_MKREFANY)
+ if (argNode->gtOper == GT_OBJ || argNode->gtOper == GT_MKREFANY)
{
// Don't put GT_OBJ node under a GT_COMMA.
// Codegen can't deal with it.
// Just hang the address here in case there are side-effect.
- newStmt = gtNewStmt(gtUnusedValNode(inlArgInfo[argNum].argNode->gtOp.gtOp1), callILOffset);
+ newStmt = gtNewStmt(gtUnusedValNode(argNode->gtOp.gtOp1), callILOffset);
}
else
{
- newStmt = gtNewStmt(gtUnusedValNode(inlArgInfo[argNum].argNode), callILOffset);
+ newStmt = gtNewStmt(gtUnusedValNode(argNode), callILOffset);
}
afterStmt = fgInsertStmtAfter(block, afterStmt, newStmt);
@@ -22336,45 +22701,103 @@ GenTreePtr Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
// inlineInfo - information about the inline
// block - basic block for the new statements
// stmtAfter - (optional) insertion point for mid-block cases
+//
+// Notes:
+// If the call we're inlining is in tail position then
+// we skip nulling the locals, since it can interfere
+// with tail calls introduced by the local.
void Compiler::fgInlineAppendStatements(InlineInfo* inlineInfo, BasicBlock* block, GenTreePtr stmtAfter)
{
- // Null out any inline pinned locals
- if (!inlineInfo->hasPinnedLocals)
+ // If this inlinee was passed a runtime lookup generic context and
+ // ignores it, we can decrement the "generic context was used" ref
+ // count, because we created a new lookup tree and incremented the
+ // count when we imported the type parameter argument to pass to
+ // the inlinee. See corresponding logic in impImportCall that
+ // checks the sig for CORINFO_CALLCONV_PARAMTYPE.
+ //
+ // Does this method require a context (type) parameter?
+ if ((inlineInfo->inlineCandidateInfo->methInfo.args.callConv & CORINFO_CALLCONV_PARAMTYPE) != 0)
{
- // No pins, nothing to do
+ // Did the computation of that parameter require the
+ // caller to perform a runtime lookup?
+ if (inlineInfo->inlineCandidateInfo->exactContextNeedsRuntimeLookup)
+ {
+ // Fetch the temp for the generic context as it would
+ // appear in the inlinee's body.
+ const unsigned typeCtxtArg = inlineInfo->typeContextArg;
+ const unsigned tmpNum = inlineInfo->lclTmpNum[typeCtxtArg];
+
+ // Was it used in the inline body?
+ if (tmpNum == BAD_VAR_NUM)
+ {
+ // No -- so the associated runtime lookup is not needed
+ // and also no longer provides evidence that the generic
+ // context should be kept alive.
+ JITDUMP("Inlinee ignores runtime lookup generics context\n");
+ assert(lvaGenericsContextUseCount > 0);
+ lvaGenericsContextUseCount--;
+ }
+ }
+ }
+
+ // Null out any gc ref locals
+ if (!inlineInfo->HasGcRefLocals())
+ {
+ // No ref locals, nothing to do.
+ JITDUMP("fgInlineAppendStatements: no gc ref inline locals.\n");
return;
}
- JITDUMP("Unpin inlinee locals:\n");
+ if (inlineInfo->iciCall->IsImplicitTailCall())
+ {
+ JITDUMP("fgInlineAppendStatements: implicit tail call; skipping nulling.\n");
+ return;
+ }
+
+ JITDUMP("fgInlineAppendStatements: nulling out gc ref inlinee locals.\n");
GenTreePtr callStmt = inlineInfo->iciStmt;
IL_OFFSETX callILOffset = callStmt->gtStmt.gtStmtILoffsx;
CORINFO_METHOD_INFO* InlineeMethodInfo = InlineeCompiler->info.compMethodInfo;
- unsigned lclCnt = InlineeMethodInfo->locals.numArgs;
+ const unsigned lclCnt = InlineeMethodInfo->locals.numArgs;
InlLclVarInfo* lclVarInfo = inlineInfo->lclVarInfo;
+ unsigned gcRefLclCnt = inlineInfo->numberOfGcRefLocals;
+ const unsigned argCnt = inlineInfo->argCnt;
noway_assert(callStmt->gtOper == GT_STMT);
for (unsigned lclNum = 0; lclNum < lclCnt; lclNum++)
{
- unsigned tmpNum = inlineInfo->lclTmpNum[lclNum];
+ // Is the local a gc ref type? Need to look at the
+ // inline info for this since we will not have local
+ // temps for unused inlinee locals.
+ const var_types lclTyp = lclVarInfo[argCnt + lclNum].lclTypeInfo;
- // Is the local used at all?
- if (tmpNum == BAD_VAR_NUM)
+ if (!varTypeIsGC(lclTyp))
{
- // Nope, nothing to unpin.
+ // Nope, nothing to null out.
continue;
}
- // Is the local pinned?
- if (!lvaTable[tmpNum].lvPinned)
+ // Ensure we're examining just the right number of locals.
+ assert(gcRefLclCnt > 0);
+ gcRefLclCnt--;
+
+ // Fetch the temp for this inline local
+ const unsigned tmpNum = inlineInfo->lclTmpNum[lclNum];
+
+ // Is the local used at all?
+ if (tmpNum == BAD_VAR_NUM)
{
- // Nope, nothing to unpin.
+ // Nope, nothing to null out.
continue;
}
- // Does the local we're about to unpin appear in the return
+ // Local was used, make sure the type is consistent.
+ assert(lvaTable[tmpNum].lvType == lclTyp);
+
+ // Does the local we're about to null out appear in the return
// expression? If so we somehow messed up and didn't properly
// spill the return value. See impInlineFetchLocal.
GenTreePtr retExpr = inlineInfo->retExpr;
@@ -22384,29 +22807,29 @@ void Compiler::fgInlineAppendStatements(InlineInfo* inlineInfo, BasicBlock* bloc
noway_assert(!interferesWithReturn);
}
- // Emit the unpin, by assigning null to the local.
- var_types lclTyp = (var_types)lvaTable[tmpNum].lvType;
- noway_assert(lclTyp == lclVarInfo[lclNum + inlineInfo->argCnt].lclTypeInfo);
- noway_assert(!varTypeIsStruct(lclTyp));
- GenTreePtr unpinExpr = gtNewTempAssign(tmpNum, gtNewZeroConNode(genActualType(lclTyp)));
- GenTreePtr unpinStmt = gtNewStmt(unpinExpr, callILOffset);
+ // Assign null to the local.
+ GenTreePtr nullExpr = gtNewTempAssign(tmpNum, gtNewZeroConNode(lclTyp));
+ GenTreePtr nullStmt = gtNewStmt(nullExpr, callILOffset);
if (stmtAfter == nullptr)
{
- stmtAfter = fgInsertStmtAtBeg(block, unpinStmt);
+ stmtAfter = fgInsertStmtAtBeg(block, nullStmt);
}
else
{
- stmtAfter = fgInsertStmtAfter(block, stmtAfter, unpinStmt);
+ stmtAfter = fgInsertStmtAfter(block, stmtAfter, nullStmt);
}
#ifdef DEBUG
if (verbose)
{
- gtDispTree(unpinStmt);
+ gtDispTree(nullStmt);
}
#endif // DEBUG
}
+
+ // There should not be any GC ref locals left to null out.
+ assert(gcRefLclCnt == 0);
}
/*****************************************************************************/
@@ -22512,6 +22935,14 @@ void Compiler::fgRemoveEmptyFinally()
{
JITDUMP("\n*************** In fgRemoveEmptyFinally()\n");
+#if FEATURE_EH_FUNCLETS
+ // We need to do this transformation before funclets are created.
+ assert(!fgFuncletsCreated);
+#endif // FEATURE_EH_FUNCLETS
+
+ // Assume we don't need to update the bbPreds lists.
+ assert(!fgComputePredsDone);
+
if (compHndBBtabCount == 0)
{
JITDUMP("No EH in this method, nothing to remove.\n");
@@ -22741,6 +23172,14 @@ void Compiler::fgRemoveEmptyTry()
{
JITDUMP("\n*************** In fgRemoveEmptyTry()\n");
+#if FEATURE_EH_FUNCLETS
+ // We need to do this transformation before funclets are created.
+ assert(!fgFuncletsCreated);
+#endif // FEATURE_EH_FUNCLETS
+
+ // Assume we don't need to update the bbPreds lists.
+ assert(!fgComputePredsDone);
+
#ifdef FEATURE_CORECLR
bool enableRemoveEmptyTry = true;
#else
@@ -22969,6 +23408,7 @@ void Compiler::fgRemoveEmptyTry()
// Handler index of any nested blocks will update when we
// remove the EH table entry. Change handler exits to jump to
// the continuation. Clear catch type on handler entry.
+ // Decrement nesting level of enclosed GT_END_LFINs.
for (BasicBlock* block = firstHandlerBlock; block != endHandlerBlock; block = block->bbNext)
{
if (block == firstHandlerBlock)
@@ -22995,8 +23435,25 @@ void Compiler::fgRemoveEmptyTry()
fgRemoveStmt(block, finallyRet);
block->bbJumpKind = BBJ_ALWAYS;
block->bbJumpDest = continuation;
+ fgAddRefPred(continuation, block);
+ }
+ }
+
+#if !FEATURE_EH_FUNCLETS
+ // If we're in a non-funclet model, decrement the nesting
+ // level of any GT_END_LFIN we find in the handler region,
+ // since we're removing the enclosing handler.
+ for (GenTreeStmt* stmt = block->firstStmt(); stmt != nullptr; stmt = stmt->gtNextStmt)
+ {
+ GenTreePtr expr = stmt->gtStmtExpr;
+ if (expr->gtOper == GT_END_LFIN)
+ {
+ const unsigned nestLevel = expr->gtVal.gtVal1;
+ assert(nestLevel > 0);
+ expr->gtVal.gtVal1 = nestLevel - 1;
}
}
+#endif // !FEATURE_EH_FUNCLETS
}
// (6) Remove the try-finally EH region. This will compact the
@@ -23060,6 +23517,14 @@ void Compiler::fgCloneFinally()
{
JITDUMP("\n*************** In fgCloneFinally()\n");
+#if FEATURE_EH_FUNCLETS
+ // We need to do this transformation before funclets are created.
+ assert(!fgFuncletsCreated);
+#endif // FEATURE_EH_FUNCLETS
+
+ // Assume we don't need to update the bbPreds lists.
+ assert(!fgComputePredsDone);
+
#ifdef FEATURE_CORECLR
bool enableCloning = true;
#else
@@ -23234,7 +23699,7 @@ void Compiler::fgCloneFinally()
BasicBlock* const firstTryBlock = HBtab->ebdTryBeg;
BasicBlock* const lastTryBlock = HBtab->ebdTryLast;
assert(firstTryBlock->getTryIndex() == XTnum);
- assert(lastTryBlock->getTryIndex() == XTnum);
+ assert(bbInTryRegions(XTnum, lastTryBlock));
BasicBlock* const beforeTryBlock = firstTryBlock->bbPrev;
BasicBlock* normalCallFinallyBlock = nullptr;
@@ -23564,7 +24029,7 @@ void Compiler::fgCloneFinally()
BasicBlock* firstClonedBlock = blockMap[firstBlock];
firstClonedBlock->bbCatchTyp = BBCT_NONE;
- // Cleanup the contination
+ // Cleanup the continuation
fgCleanupContinuation(normalCallFinallyReturn);
// Todo -- mark cloned blocks as a cloned finally....
@@ -23873,6 +24338,291 @@ void Compiler::fgUpdateFinallyTargetFlags()
#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
}
+//------------------------------------------------------------------------
+// fgMergeFinallyChains: tail merge finally invocations
+//
+// Notes:
+//
+// Looks for common suffixes in chains of finally invocations
+// (callfinallys) and merges them. These typically arise from
+// try-finallys where there are multiple exit points in the try
+// that have the same target.
+
+void Compiler::fgMergeFinallyChains()
+{
+ JITDUMP("\n*************** In fgMergeFinallyChains()\n");
+
+#if FEATURE_EH_FUNCLETS
+ // We need to do this transformation before funclets are created.
+ assert(!fgFuncletsCreated);
+#endif // FEATURE_EH_FUNCLETS
+
+ // Assume we don't need to update the bbPreds lists.
+ assert(!fgComputePredsDone);
+
+ if (compHndBBtabCount == 0)
+ {
+ JITDUMP("No EH in this method, nothing to merge.\n");
+ return;
+ }
+
+ if (opts.MinOpts())
+ {
+ JITDUMP("Method compiled with minOpts, no merging.\n");
+ return;
+ }
+
+ if (opts.compDbgCode)
+ {
+ JITDUMP("Method compiled with debug codegen, no merging.\n");
+ return;
+ }
+
+ bool enableMergeFinallyChains = true;
+
+#if !FEATURE_EH_FUNCLETS
+ // For non-funclet models (x86) the callfinallys may contain
+ // statements and the continuations contain GT_END_LFINs. So no
+ // merging is possible until the GT_END_LFIN blocks can be merged
+ // and merging is not safe unless the callfinally blocks are split.
+ JITDUMP("EH using non-funclet model; merging not yet implemented.\n");
+ enableMergeFinallyChains = false;
+#endif // !FEATURE_EH_FUNCLETS
+
+#if !FEATURE_EH_CALLFINALLY_THUNKS
+ // For non-thunk EH models (arm32) the callfinallys may contain
+ // statements, and merging is not safe unless the callfinally
+ // blocks are split.
+ JITDUMP("EH using non-callfinally thunk model; merging not yet implemented.\n");
+ enableMergeFinallyChains = false;
+#endif
+
+ if (!enableMergeFinallyChains)
+ {
+ JITDUMP("fgMergeFinallyChains disabled\n");
+ return;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** Before fgMergeFinallyChains()\n");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ printf("\n");
+ }
+#endif // DEBUG
+
+ // Look for finallys.
+ bool hasFinally = false;
+ for (unsigned XTnum = 0; XTnum < compHndBBtabCount; XTnum++)
+ {
+ EHblkDsc* const HBtab = &compHndBBtab[XTnum];
+
+ // Check if this is a try/finally.
+ if (HBtab->HasFinallyHandler())
+ {
+ hasFinally = true;
+ break;
+ }
+ }
+
+ if (!hasFinally)
+ {
+ JITDUMP("Method does not have any try-finallys; no merging.\n");
+ return;
+ }
+
+ // Process finallys from outside in, merging as we go. This gives
+ // us the desired bottom-up tail merge order for callfinally
+ // chains: outer merges may enable inner merges.
+ bool canMerge = false;
+ bool didMerge = false;
+ BlockToBlockMap continuationMap(getAllocator());
+
+ // Note XTnum is signed here so we can count down.
+ for (int XTnum = compHndBBtabCount - 1; XTnum >= 0; XTnum--)
+ {
+ EHblkDsc* const HBtab = &compHndBBtab[XTnum];
+
+ // Screen out non-finallys
+ if (!HBtab->HasFinallyHandler())
+ {
+ continue;
+ }
+
+ JITDUMP("Examining callfinallys for EH#%d.\n", XTnum);
+
+ // Find all the callfinallys that invoke this finally.
+ BasicBlock* firstCallFinallyRangeBlock = nullptr;
+ BasicBlock* endCallFinallyRangeBlock = nullptr;
+ ehGetCallFinallyBlockRange(XTnum, &firstCallFinallyRangeBlock, &endCallFinallyRangeBlock);
+
+ // Clear out any stale entries in the continuation map
+ continuationMap.RemoveAll();
+
+ // Build a map from each continuation to the "canonical"
+ // callfinally for that continuation.
+ unsigned callFinallyCount = 0;
+ BasicBlock* const beginHandlerBlock = HBtab->ebdHndBeg;
+
+ for (BasicBlock* currentBlock = firstCallFinallyRangeBlock; currentBlock != endCallFinallyRangeBlock;
+ currentBlock = currentBlock->bbNext)
+ {
+ // Ignore "retless" callfinallys (where the finally doesn't return).
+ if (currentBlock->isBBCallAlwaysPair() && (currentBlock->bbJumpDest == beginHandlerBlock))
+ {
+ // The callfinally must be empty, so that we can
+ // safely retarget anything that branches here to
+ // another callfinally with the same contiuation.
+ assert(currentBlock->isEmpty());
+
+ // This callfinally invokes the finally for this try.
+ callFinallyCount++;
+
+ // Locate the continuation
+ BasicBlock* const leaveBlock = currentBlock->bbNext;
+ BasicBlock* const continuationBlock = leaveBlock->bbJumpDest;
+
+ // If this is the first time we've seen this
+ // continuation, register this callfinally as the
+ // canonical one.
+ if (!continuationMap.Lookup(continuationBlock))
+ {
+ continuationMap.Set(continuationBlock, currentBlock);
+ }
+ }
+ }
+
+ // Now we've seen all the callfinallys and their continuations.
+ JITDUMP("EH#%i has %u callfinallys, %u continuations\n", XTnum, callFinallyCount, continuationMap.GetCount());
+
+ // If there are more callfinallys than continuations, some of the
+ // callfinallys must share a continuation, and we can merge them.
+ const bool tryMerge = callFinallyCount > continuationMap.GetCount();
+
+ if (!tryMerge)
+ {
+ JITDUMP("EH#%i does not have any mergeable callfinallys\n", XTnum);
+ continue;
+ }
+
+ canMerge = true;
+
+ // Walk the callfinally region, looking for blocks that jump
+ // to a callfinally that invokes this try's finally, and make
+ // sure they all jump to the appropriate canonical
+ // callfinally.
+ for (BasicBlock* currentBlock = firstCallFinallyRangeBlock; currentBlock != endCallFinallyRangeBlock;
+ currentBlock = currentBlock->bbNext)
+ {
+ bool merged = fgRetargetBranchesToCanonicalCallFinally(currentBlock, beginHandlerBlock, continuationMap);
+ didMerge = didMerge || merged;
+ }
+ }
+
+ if (!canMerge)
+ {
+ JITDUMP("Method had try-finallys, but did not have any mergeable finally chains.\n");
+ }
+ else
+ {
+ if (didMerge)
+ {
+ JITDUMP("Method had mergeable try-finallys and some callfinally merges were performed.\n");
+
+#if DEBUG
+ if (verbose)
+ {
+ printf("\n*************** After fgMergeFinallyChains()\n");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ printf("\n");
+ }
+
+#endif // DEBUG
+ }
+ else
+ {
+ // We may not end up doing any merges, because we are only
+ // merging continuations for callfinallys that can
+ // actually be invoked, and the importer may leave
+ // unreachable callfinallys around (for instance, if it
+ // is forced to re-import a leave).
+ JITDUMP("Method had mergeable try-finallys but no callfinally merges were performed,\n"
+ "likely the non-canonical callfinallys were unreachable\n");
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// fgRetargetBranchesToCanonicalCallFinally: find non-canonical callfinally
+// invocations and make them canonical.
+//
+// Arguments:
+// block -- block to examine for call finally invocation
+// handler -- start of the finally region for the try
+// continuationMap -- map giving the canonical callfinally for
+// each continuation
+//
+// Returns:
+// true iff the block's branch was retargeted.
+
+bool Compiler::fgRetargetBranchesToCanonicalCallFinally(BasicBlock* block,
+ BasicBlock* handler,
+ BlockToBlockMap& continuationMap)
+{
+ // We expect callfinallys to be invoked by a BBJ_ALWAYS at this
+ // stage in compilation.
+ if (block->bbJumpKind != BBJ_ALWAYS)
+ {
+ // Possible paranoia assert here -- no flow successor of
+ // this block should be a callfinally for this try.
+ return false;
+ }
+
+ // Screen out cases that are not callfinallys to the right
+ // handler.
+ BasicBlock* const callFinally = block->bbJumpDest;
+
+ if (!callFinally->isBBCallAlwaysPair())
+ {
+ return false;
+ }
+
+ if (callFinally->bbJumpDest != handler)
+ {
+ return false;
+ }
+
+ // Ok, this is a callfinally that invokes the right handler.
+ // Get its continuation.
+ BasicBlock* const leaveBlock = callFinally->bbNext;
+ BasicBlock* const continuationBlock = leaveBlock->bbJumpDest;
+
+ // Find the canonical callfinally for that continuation.
+ BasicBlock* const canonicalCallFinally = continuationMap[continuationBlock];
+ assert(canonicalCallFinally != nullptr);
+
+ // If the block already jumps to the canoncial call finally, no work needed.
+ if (block->bbJumpDest == canonicalCallFinally)
+ {
+ JITDUMP("BB%02u already canonical\n", block->bbNum);
+ return false;
+ }
+
+ // Else, retarget it so that it does...
+ JITDUMP("Redirecting branch in BB%02u from BB%02u to BB%02u.\n", block->bbNum, callFinally->bbNum,
+ canonicalCallFinally->bbNum);
+
+ block->bbJumpDest = canonicalCallFinally;
+ fgAddRefPred(canonicalCallFinally, block);
+ assert(callFinally->bbRefs > 0);
+ fgRemoveRefPred(callFinally, block);
+
+ return true;
+}
+
// FatCalliTransformer transforms calli that can use fat function pointer.
// Fat function pointer is pointer with the second least significant bit set,
// if the bit is set, the pointer (after clearing the bit) actually points to
@@ -24132,7 +24882,7 @@ private:
// fixedFptrAddress - pointer to the tuple <methodPointer, instantiationArgumentPointer>
//
// Return Value:
- // loaded hidden argument.
+ // generic context hidden argument.
GenTreePtr GetHiddenArgument(GenTreePtr fixedFptrAddress)
{
GenTreePtr fixedFptrAddressCopy = compiler->gtCloneExpr(fixedFptrAddress);
@@ -24148,7 +24898,7 @@ private:
//
// Arguments:
// actualCallAddress - fixed call address
- // hiddenArgument - loaded hidden argument
+ // hiddenArgument - generic context hidden argument
//
// Return Value:
// created call node.
@@ -24158,13 +24908,58 @@ private:
GenTreePtr fatTree = fatStmt->gtStmtExpr;
GenTreeCall* fatCall = GetCall(fatStmt);
fatCall->gtCallAddr = actualCallAddress;
- GenTreeArgList* args = fatCall->gtCallArgs;
- args = compiler->gtNewListNode(hiddenArgument, args);
- fatCall->gtCallArgs = args;
+ AddHiddenArgument(fatCall, hiddenArgument);
return fatStmt;
}
//------------------------------------------------------------------------
+ // AddHiddenArgument: add hidden argument to the call argument list.
+ //
+ // Arguments:
+ // fatCall - fat call node
+ // hiddenArgument - generic context hidden argument
+ //
+ void AddHiddenArgument(GenTreeCall* fatCall, GenTreePtr hiddenArgument)
+ {
+ GenTreeArgList* oldArgs = fatCall->gtCallArgs;
+ GenTreeArgList* newArgs;
+#if USER_ARGS_COME_LAST
+ if (fatCall->HasRetBufArg())
+ {
+ GenTreePtr retBuffer = oldArgs->Current();
+ GenTreeArgList* rest = oldArgs->Rest();
+ newArgs = compiler->gtNewListNode(hiddenArgument, rest);
+ newArgs = compiler->gtNewListNode(retBuffer, newArgs);
+ }
+ else
+ {
+ newArgs = compiler->gtNewListNode(hiddenArgument, oldArgs);
+ }
+#else
+ newArgs = oldArgs;
+ AddArgumentToTail(newArgs, hiddenArgument);
+#endif
+ fatCall->gtCallArgs = newArgs;
+ }
+
+ //------------------------------------------------------------------------
+ // AddArgumentToTail: add hidden argument to the tail of the call argument list.
+ //
+ // Arguments:
+ // argList - fat call node
+ // hiddenArgument - generic context hidden argument
+ //
+ void AddArgumentToTail(GenTreeArgList* argList, GenTreePtr hiddenArgument)
+ {
+ GenTreeArgList* iterator = argList;
+ while (iterator->Rest() != nullptr)
+ {
+ iterator = iterator->Rest();
+ }
+ iterator->Rest() = compiler->gtNewArgList(hiddenArgument);
+ }
+
+ //------------------------------------------------------------------------
// RemoveOldStatement: remove original stmt from current block.
//
void RemoveOldStatement()
@@ -24256,3 +25051,63 @@ void Compiler::fgTransformFatCalli()
CheckNoFatPointerCandidatesLeft();
#endif
}
+
+//------------------------------------------------------------------------
+// fgMeasureIR: count and return the number of IR nodes in the function.
+//
+unsigned Compiler::fgMeasureIR()
+{
+ unsigned nodeCount = 0;
+
+ for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ if (!block->IsLIR())
+ {
+ for (GenTreeStmt* stmt = block->firstStmt(); stmt != nullptr; stmt = stmt->getNextStmt())
+ {
+ fgWalkTreePre(&stmt->gtStmtExpr,
+ [](GenTree** slot, fgWalkData* data) -> Compiler::fgWalkResult {
+ (*reinterpret_cast<unsigned*>(data->pCallbackData))++;
+ return Compiler::WALK_CONTINUE;
+ },
+ &nodeCount);
+ }
+ }
+ else
+ {
+ for (GenTree* node : LIR::AsRange(block))
+ {
+ nodeCount++;
+ }
+ }
+ }
+
+ return nodeCount;
+}
+
+//------------------------------------------------------------------------
+// fgCompDominatedByExceptionalEntryBlocks: compute blocks that are
+// dominated by not normal entry.
+//
+void Compiler::fgCompDominatedByExceptionalEntryBlocks()
+{
+ assert(fgEnterBlksSetValid);
+ if (BlockSetOps::Count(this, fgEnterBlks) != 1) // There are exception entries.
+ {
+ for (unsigned i = 1; i <= fgBBNumMax; ++i)
+ {
+ BasicBlock* block = fgBBInvPostOrder[i];
+ if (BlockSetOps::IsMember(this, fgEnterBlks, block->bbNum))
+ {
+ if (fgFirstBB != block) // skip the normal entry.
+ {
+ block->SetDominatedByExceptionalEntryFlag();
+ }
+ }
+ else if (block->bbIDom->IsDominatedByExceptionalEntryFlag())
+ {
+ block->SetDominatedByExceptionalEntryFlag();
+ }
+ }
+ }
+}
diff --git a/src/jit/gcencode.cpp b/src/jit/gcencode.cpp
index dcca19ebe8..4c300ac15f 100644
--- a/src/jit/gcencode.cpp
+++ b/src/jit/gcencode.cpp
@@ -1318,6 +1318,8 @@ size_t GCInfo::gcInfoBlockHdrSave(
header->syncStartOffset = INVALID_SYNC_OFFSET;
header->syncEndOffset = INVALID_SYNC_OFFSET;
+#ifndef UNIX_X86_ABI
+ // JIT is responsible for synchronization on funclet-based EH model that x86/Linux uses.
if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
{
assert(compiler->syncStartEmitCookie != NULL);
@@ -1332,6 +1334,7 @@ size_t GCInfo::gcInfoBlockHdrSave(
// synchronized methods can't have more than 1 epilog
assert(header->epilogCount <= 1);
}
+#endif
header->revPInvokeOffset = INVALID_REV_PINVOKE_OFFSET;
@@ -2424,7 +2427,9 @@ DONE_VLT:
assert((codeDelta & 0x7) == codeDelta);
*dest++ = 0xB0 | (BYTE)codeDelta;
+#ifndef UNIX_X86_ABI
assert(!compiler->isFramePointerUsed());
+#endif
/* Remember the new 'last' offset */
@@ -3844,13 +3849,15 @@ struct InterruptibleRangeReporter
}
};
-void GCInfo::gcMakeRegPtrTable(GcInfoEncoder* gcInfoEncoder,
- unsigned codeSize,
- unsigned prologSize,
- MakeRegPtrMode mode)
+void GCInfo::gcMakeRegPtrTable(
+ GcInfoEncoder* gcInfoEncoder, unsigned codeSize, unsigned prologSize, MakeRegPtrMode mode, unsigned* callCntRef)
{
GCENCODER_WITH_LOGGING(gcInfoEncoderWithLog, gcInfoEncoder);
+ const bool noTrackedGCSlots =
+ (compiler->opts.MinOpts() && !compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) &&
+ !JitConfig.JitMinOptsTrackGCrefs());
+
if (mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS)
{
m_regSlotMap = new (compiler->getAllocator()) RegSlotMap(compiler->getAllocator());
@@ -3961,14 +3968,25 @@ void GCInfo::gcMakeRegPtrTable(GcInfoEncoder* gcInfoEncoder,
{
stackSlotBase = GC_FRAMEREG_REL;
}
- StackSlotIdKey sskey(varDsc->lvStkOffs, (stackSlotBase == GC_FRAMEREG_REL), flags);
- GcSlotId varSlotId;
- if (mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS)
+ if (noTrackedGCSlots)
{
- if (!m_stackSlotMap->Lookup(sskey, &varSlotId))
+ // No need to hash/lookup untracked GC refs; just grab a new Slot Id.
+ if (mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS)
{
- varSlotId = gcInfoEncoderWithLog->GetStackSlotId(varDsc->lvStkOffs, flags, stackSlotBase);
- m_stackSlotMap->Set(sskey, varSlotId);
+ gcInfoEncoderWithLog->GetStackSlotId(varDsc->lvStkOffs, flags, stackSlotBase);
+ }
+ }
+ else
+ {
+ StackSlotIdKey sskey(varDsc->lvStkOffs, (stackSlotBase == GC_FRAMEREG_REL), flags);
+ GcSlotId varSlotId;
+ if (mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS)
+ {
+ if (!m_stackSlotMap->Lookup(sskey, &varSlotId))
+ {
+ varSlotId = gcInfoEncoderWithLog->GetStackSlotId(varDsc->lvStkOffs, flags, stackSlotBase);
+ m_stackSlotMap->Set(sskey, varSlotId);
+ }
}
}
}
@@ -4204,9 +4222,24 @@ void GCInfo::gcMakeRegPtrTable(GcInfoEncoder* gcInfoEncoder,
{
if (gcCallDescList != nullptr)
{
- for (CallDsc* call = gcCallDescList; call != nullptr; call = call->cdNext)
+ if (noTrackedGCSlots)
{
- numCallSites++;
+ // We have the call count from the previous run.
+ numCallSites = *callCntRef;
+
+ // If there are no calls, tell the world and bail.
+ if (numCallSites == 0)
+ {
+ gcInfoEncoderWithLog->DefineCallSites(nullptr, nullptr, 0);
+ return;
+ }
+ }
+ else
+ {
+ for (CallDsc* call = gcCallDescList; call != nullptr; call = call->cdNext)
+ {
+ numCallSites++;
+ }
}
pCallSites = new (compiler, CMK_GC) unsigned[numCallSites];
pCallSiteSizes = new (compiler, CMK_GC) BYTE[numCallSites];
@@ -4216,17 +4249,8 @@ void GCInfo::gcMakeRegPtrTable(GcInfoEncoder* gcInfoEncoder,
// Now consider every call.
for (CallDsc* call = gcCallDescList; call != nullptr; call = call->cdNext)
{
- if (mode == MAKE_REG_PTR_MODE_DO_WORK)
- {
- pCallSites[callSiteNum] = call->cdOffs - call->cdCallInstrSize;
- pCallSiteSizes[callSiteNum] = call->cdCallInstrSize;
- callSiteNum++;
- }
-
- unsigned nextOffset;
-
// Figure out the code offset of this entry.
- nextOffset = call->cdOffs;
+ unsigned nextOffset = call->cdOffs;
// As far as I (DLD, 2010) can determine by asking around, the "call->u1.cdArgMask"
// and "cdArgCnt" cases are to handle x86 situations in which a call expression is nested as an
@@ -4251,13 +4275,35 @@ void GCInfo::gcMakeRegPtrTable(GcInfoEncoder* gcInfoEncoder,
assert(call->cdOffs >= call->cdCallInstrSize);
// call->cdOffs is actually the offset of the instruction *following* the call, so subtract
// the call instruction size to get the offset of the actual call instruction...
- unsigned callOffset = call->cdOffs - call->cdCallInstrSize;
- // Record that these registers are live before the call...
- gcInfoRecordGCRegStateChange(gcInfoEncoder, mode, callOffset, regMask, GC_SLOT_LIVE, byrefRegMask, nullptr);
- // ...and dead after.
- gcInfoRecordGCRegStateChange(gcInfoEncoder, mode, call->cdOffs, regMask, GC_SLOT_DEAD, byrefRegMask,
- nullptr);
+ unsigned callOffset = nextOffset - call->cdCallInstrSize;
+
+ if (noTrackedGCSlots && regMask == 0)
+ {
+ // No live GC refs in regs at the call -> don't record the call.
+ }
+ else
+ {
+ // Append an entry for the call if doing the real thing.
+ if (mode == MAKE_REG_PTR_MODE_DO_WORK)
+ {
+ pCallSites[callSiteNum] = callOffset;
+ pCallSiteSizes[callSiteNum] = call->cdCallInstrSize;
+ }
+ callSiteNum++;
+
+ // Record that these registers are live before the call...
+ gcInfoRecordGCRegStateChange(gcInfoEncoder, mode, callOffset, regMask, GC_SLOT_LIVE, byrefRegMask,
+ nullptr);
+ // ...and dead after.
+ gcInfoRecordGCRegStateChange(gcInfoEncoder, mode, nextOffset, regMask, GC_SLOT_DEAD, byrefRegMask,
+ nullptr);
+ }
}
+ // Make sure we've recorded the expected number of calls
+ assert(mode != MAKE_REG_PTR_MODE_DO_WORK || numCallSites == callSiteNum);
+ // Return the actual recorded call count to the caller
+ *callCntRef = callSiteNum;
+
// OK, define the call sites.
if (mode == MAKE_REG_PTR_MODE_DO_WORK)
{
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
index 7af500f877..c5733b81e4 100644
--- a/src/jit/gentree.cpp
+++ b/src/jit/gentree.cpp
@@ -2531,7 +2531,7 @@ AGAIN:
}
}
- if (tree->gtCall.gtCallLateArgs)
+ if (tree->gtCall.gtControlExpr)
{
if (gtHasRef(tree->gtCall.gtControlExpr, lclNum, defOnly))
{
@@ -5524,7 +5524,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
}
#ifdef FEATURE_READYTORUN_COMPILER
-#ifdef _TARGET_ARM64_
+#if defined(_TARGET_ARMARCH_)
if (tree->gtCall.IsR2RRelativeIndir())
{
ftreg |= RBM_R2R_INDIRECT_PARAM;
@@ -6927,7 +6927,7 @@ GenTreeCall* Compiler::gtNewCallNode(
// Initialize spill flags of gtOtherRegs
node->ClearOtherRegFlags();
-#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+#if (defined(_TARGET_X86_) || defined(_TARGET_ARM_)) && !defined(LEGACY_BACKEND)
// Initialize the multi-reg long return info if necessary
if (varTypeIsLong(node))
{
@@ -6941,7 +6941,7 @@ GenTreeCall* Compiler::gtNewCallNode(
// must be a long returned in two registers
assert(retTypeDesc->GetReturnRegCount() == 2);
}
-#endif // defined(_TARGET_X86_) && !defined(_LEGACY_BACKEND_)
+#endif // (defined(_TARGET_X86_) || defined(_TARGET_ARM_)) && !defined(_LEGACY_BACKEND_)
return node;
}
@@ -7062,10 +7062,9 @@ GenTreeArgList* Compiler::gtNewArgList(GenTreePtr arg1, GenTreePtr arg2, GenTree
* that has the matching argNum and return the fgArgTableEntryPtr
*/
-fgArgTabEntryPtr Compiler::gtArgEntryByArgNum(GenTreePtr call, unsigned argNum)
+fgArgTabEntryPtr Compiler::gtArgEntryByArgNum(GenTreeCall* call, unsigned argNum)
{
- noway_assert(call->IsCall());
- fgArgInfoPtr argInfo = call->gtCall.fgArgInfo;
+ fgArgInfoPtr argInfo = call->fgArgInfo;
noway_assert(argInfo != nullptr);
unsigned argCount = argInfo->ArgCount();
@@ -7090,10 +7089,9 @@ fgArgTabEntryPtr Compiler::gtArgEntryByArgNum(GenTreePtr call, unsigned argNum)
* that has the matching node and return the fgArgTableEntryPtr
*/
-fgArgTabEntryPtr Compiler::gtArgEntryByNode(GenTreePtr call, GenTreePtr node)
+fgArgTabEntryPtr Compiler::gtArgEntryByNode(GenTreeCall* call, GenTreePtr node)
{
- noway_assert(call->IsCall());
- fgArgInfoPtr argInfo = call->gtCall.fgArgInfo;
+ fgArgInfoPtr argInfo = call->fgArgInfo;
noway_assert(argInfo != nullptr);
unsigned argCount = argInfo->ArgCount();
@@ -7108,12 +7106,6 @@ fgArgTabEntryPtr Compiler::gtArgEntryByNode(GenTreePtr call, GenTreePtr node)
{
return curArgTabEntry;
}
-#ifdef PROTO_JIT
- else if (node->OperGet() == GT_RELOAD && node->gtOp.gtOp1 == curArgTabEntry->node)
- {
- return curArgTabEntry;
- }
-#endif // PROTO_JIT
else if (curArgTabEntry->parent != nullptr)
{
assert(curArgTabEntry->parent->OperIsList());
@@ -7124,7 +7116,7 @@ fgArgTabEntryPtr Compiler::gtArgEntryByNode(GenTreePtr call, GenTreePtr node)
}
else // (curArgTabEntry->parent == NULL)
{
- if (call->gtCall.gtCallObjp == node)
+ if (call->gtCallObjp == node)
{
return curArgTabEntry;
}
@@ -7139,10 +7131,9 @@ fgArgTabEntryPtr Compiler::gtArgEntryByNode(GenTreePtr call, GenTreePtr node)
* Find and return the entry with the given "lateArgInx". Requires that one is found
* (asserts this).
*/
-fgArgTabEntryPtr Compiler::gtArgEntryByLateArgIndex(GenTreePtr call, unsigned lateArgInx)
+fgArgTabEntryPtr Compiler::gtArgEntryByLateArgIndex(GenTreeCall* call, unsigned lateArgInx)
{
- noway_assert(call->IsCall());
- fgArgInfoPtr argInfo = call->gtCall.fgArgInfo;
+ fgArgInfoPtr argInfo = call->fgArgInfo;
noway_assert(argInfo != nullptr);
unsigned argCount = argInfo->ArgCount();
@@ -7909,8 +7900,7 @@ GenTreePtr Compiler::gtCloneExpr(
copy = new (this, oper) GenTreeFptrVal(tree->gtType, tree->gtFptrVal.gtFptrMethod);
#ifdef FEATURE_READYTORUN_COMPILER
- copy->gtFptrVal.gtEntryPoint = tree->gtFptrVal.gtEntryPoint;
- copy->gtFptrVal.gtLdftnResolvedToken = tree->gtFptrVal.gtLdftnResolvedToken;
+ copy->gtFptrVal.gtEntryPoint = tree->gtFptrVal.gtEntryPoint;
#endif
goto DONE;
@@ -8265,7 +8255,7 @@ GenTreePtr Compiler::gtCloneExpr(
if (tree->gtCall.fgArgInfo)
{
// Create and initialize the fgArgInfo for our copy of the call tree
- copy->gtCall.fgArgInfo = new (this, CMK_Unknown) fgArgInfo(copy, tree);
+ copy->gtCall.fgArgInfo = new (this, CMK_Unknown) fgArgInfo(copy->AsCall(), tree->AsCall());
}
else
{
@@ -8636,21 +8626,19 @@ bool Compiler::gtCompareTree(GenTree* op1, GenTree* op2)
return false;
}
-GenTreePtr Compiler::gtGetThisArg(GenTreePtr call)
+GenTreePtr Compiler::gtGetThisArg(GenTreeCall* call)
{
- assert(call->gtOper == GT_CALL);
-
- if (call->gtCall.gtCallObjp != nullptr)
+ if (call->gtCallObjp != nullptr)
{
- if (call->gtCall.gtCallObjp->gtOper != GT_NOP && call->gtCall.gtCallObjp->gtOper != GT_ASG)
+ if (call->gtCallObjp->gtOper != GT_NOP && call->gtCallObjp->gtOper != GT_ASG)
{
- if (!(call->gtCall.gtCallObjp->gtFlags & GTF_LATE_ARG))
+ if (!(call->gtCallObjp->gtFlags & GTF_LATE_ARG))
{
- return call->gtCall.gtCallObjp;
+ return call->gtCallObjp;
}
}
- if (call->gtCall.gtCallLateArgs)
+ if (call->gtCallLateArgs)
{
regNumber thisReg = REG_ARG_0;
unsigned argNum = 0;
@@ -8658,13 +8646,13 @@ GenTreePtr Compiler::gtGetThisArg(GenTreePtr call)
GenTreePtr result = thisArgTabEntry->node;
#if !FEATURE_FIXED_OUT_ARGS
- GenTreePtr lateArgs = call->gtCall.gtCallLateArgs;
- regList list = call->gtCall.regArgList;
+ GenTreePtr lateArgs = call->gtCallLateArgs;
+ regList list = call->regArgList;
int index = 0;
while (lateArgs != NULL)
{
assert(lateArgs->gtOper == GT_LIST);
- assert(index < call->gtCall.regArgListCount);
+ assert(index < call->regArgListCount);
regNumber curArgReg = list[index];
if (curArgReg == thisReg)
{
@@ -8811,8 +8799,8 @@ void GenTree::CopyTo(class Compiler* comp, const GenTree& gt)
{
SetOperRaw(gt.OperGet());
- gtType = gt.gtType;
- gtAssertionNum = gt.gtAssertionNum;
+ gtType = gt.gtType;
+ gtAssertionInfo = gt.gtAssertionInfo;
gtRegNum = gt.gtRegNum; // one union member.
CopyCosts(&gt);
@@ -9183,529 +9171,553 @@ GenTreePtr GenTree::GetChild(unsigned childNum)
}
}
-GenTreeUseEdgeIterator::GenTreeUseEdgeIterator() : m_node(nullptr), m_edge(nullptr), m_argList(nullptr), m_state(-1)
+GenTreeUseEdgeIterator::GenTreeUseEdgeIterator()
+ : m_advance(nullptr), m_node(nullptr), m_edge(nullptr), m_argList(nullptr), m_state(-1)
{
}
GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node)
- : m_node(node), m_edge(nullptr), m_argList(nullptr), m_state(0)
+ : m_advance(nullptr), m_node(node), m_edge(nullptr), m_argList(nullptr), m_state(0)
{
assert(m_node != nullptr);
- // Advance to the first operand.
- ++(*this);
-}
+ // NOTE: the switch statement below must be updated when introducing new nodes.
-//------------------------------------------------------------------------
-// GenTreeUseEdgeIterator::GetNextUseEdge:
-// Gets the next operand of a node with a fixed number of operands.
-// This covers all nodes besides GT_CALL, GT_PHI, and GT_SIMD. For the
-// node types handled by this method, the `m_state` field indicates the
-// index of the next operand to produce.
-//
-// Returns:
-// The node's next operand or nullptr if all operands have been
-// produced.
-//
-GenTree** GenTreeUseEdgeIterator::GetNextUseEdge() const
-{
switch (m_node->OperGet())
{
- case GT_CMPXCHG:
- switch (m_state)
- {
- case 0:
- return &m_node->AsCmpXchg()->gtOpLocation;
- case 1:
- return &m_node->AsCmpXchg()->gtOpValue;
- case 2:
- return &m_node->AsCmpXchg()->gtOpComparand;
- default:
- return nullptr;
+ // Leaf nodes
+ case GT_LCL_VAR:
+ case GT_LCL_FLD:
+ case GT_LCL_VAR_ADDR:
+ case GT_LCL_FLD_ADDR:
+ case GT_CATCH_ARG:
+ case GT_LABEL:
+ case GT_FTN_ADDR:
+ case GT_RET_EXPR:
+ case GT_CNS_INT:
+ case GT_CNS_LNG:
+ case GT_CNS_DBL:
+ case GT_CNS_STR:
+ case GT_MEMORYBARRIER:
+ case GT_JMP:
+ case GT_JCC:
+ case GT_NO_OP:
+ case GT_START_NONGC:
+ case GT_PROF_HOOK:
+#if !FEATURE_EH_FUNCLETS
+ case GT_END_LFIN:
+#endif // !FEATURE_EH_FUNCLETS
+ case GT_PHI_ARG:
+#ifndef LEGACY_BACKEND
+ case GT_JMPTABLE:
+#endif // LEGACY_BACKEND
+ case GT_REG_VAR:
+ case GT_CLS_VAR:
+ case GT_CLS_VAR_ADDR:
+ case GT_ARGPLACE:
+ case GT_PHYSREG:
+ case GT_EMITNOP:
+ case GT_PINVOKE_PROLOG:
+ case GT_PINVOKE_EPILOG:
+ case GT_IL_OFFSET:
+ m_state = -1;
+ return;
+
+ // Standard unary operators
+ case GT_STORE_LCL_VAR:
+ case GT_STORE_LCL_FLD:
+ case GT_NOT:
+ case GT_NEG:
+ case GT_COPY:
+ case GT_RELOAD:
+ case GT_ARR_LENGTH:
+ case GT_CAST:
+ case GT_CKFINITE:
+ case GT_LCLHEAP:
+ case GT_ADDR:
+ case GT_IND:
+ case GT_OBJ:
+ case GT_BLK:
+ case GT_BOX:
+ case GT_ALLOCOBJ:
+ case GT_INIT_VAL:
+ case GT_JTRUE:
+ case GT_SWITCH:
+ case GT_NULLCHECK:
+ case GT_PHYSREGDST:
+ case GT_PUTARG_REG:
+ case GT_PUTARG_STK:
+ case GT_RETURNTRAP:
+ m_edge = &m_node->AsUnOp()->gtOp1;
+ assert(*m_edge != nullptr);
+ m_advance = &GenTreeUseEdgeIterator::Terminate;
+ return;
+
+ // Unary operators with an optional operand
+ case GT_NOP:
+ case GT_RETURN:
+ case GT_RETFILT:
+ if (m_node->AsUnOp()->gtOp1 == nullptr)
+ {
+ assert(m_node->NullOp1Legal());
+ m_state = -1;
}
- case GT_ARR_BOUNDS_CHECK:
-#ifdef FEATURE_SIMD
- case GT_SIMD_CHK:
-#endif // FEATURE_SIMD
- switch (m_state)
+ else
{
- case 0:
- return &m_node->AsBoundsChk()->gtIndex;
- case 1:
- return &m_node->AsBoundsChk()->gtArrLen;
- default:
- return nullptr;
+ m_edge = &m_node->AsUnOp()->gtOp1;
+ m_advance = &GenTreeUseEdgeIterator::Terminate;
}
+ return;
- case GT_FIELD:
- if (m_state == 0)
+ // Variadic nodes
+ case GT_PHI:
+ SetEntryStateForList(m_node->AsUnOp()->gtOp1);
+ return;
+
+ case GT_FIELD_LIST:
+ SetEntryStateForList(m_node);
+ return;
+
+#ifdef FEATURE_SIMD
+ case GT_SIMD:
+ if (m_node->AsSIMD()->gtSIMDIntrinsicID == SIMDIntrinsicInitN)
{
- return &m_node->AsField()->gtFldObj;
+ SetEntryStateForList(m_node->AsSIMD()->gtOp1);
}
- return nullptr;
-
- case GT_STMT:
- if (m_state == 0)
+ else
{
- return &m_node->AsStmt()->gtStmtExpr;
+ SetEntryStateForBinOp();
}
- return nullptr;
+ return;
+#endif // FEATURE_SIMD
- case GT_ARR_ELEM:
- if (m_state == 0)
+ // LEA, which may have no first operand
+ case GT_LEA:
+ if (m_node->AsAddrMode()->gtOp1 == nullptr)
{
- return &m_node->AsArrElem()->gtArrObj;
+ m_edge = &m_node->AsAddrMode()->gtOp2;
+ m_advance = &GenTreeUseEdgeIterator::Terminate;
}
- else if (m_state <= m_node->AsArrElem()->gtArrRank)
+ else
{
- return &m_node->AsArrElem()->gtArrInds[m_state - 1];
+ SetEntryStateForBinOp();
}
- return nullptr;
+ return;
- case GT_ARR_OFFSET:
- switch (m_state)
- {
- case 0:
- return &m_node->AsArrOffs()->gtOffset;
- case 1:
- return &m_node->AsArrOffs()->gtIndex;
- case 2:
- return &m_node->AsArrOffs()->gtArrObj;
- default:
- return nullptr;
- }
+ // Special nodes
+ case GT_CMPXCHG:
+ m_edge = &m_node->AsCmpXchg()->gtOpLocation;
+ assert(*m_edge != nullptr);
+ m_advance = &GenTreeUseEdgeIterator::AdvanceCmpXchg;
+ return;
- // Call, phi, and SIMD nodes are handled by MoveNext{Call,Phi,SIMD}UseEdge, repsectively.
- case GT_CALL:
- case GT_PHI:
+ case GT_ARR_BOUNDS_CHECK:
#ifdef FEATURE_SIMD
- case GT_SIMD:
-#endif
- break;
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ m_edge = &m_node->AsBoundsChk()->gtIndex;
+ assert(*m_edge != nullptr);
+ m_advance = &GenTreeUseEdgeIterator::AdvanceBoundsChk;
+ return;
- case GT_ASG:
- {
- bool operandsReversed = (m_node->gtFlags & GTF_REVERSE_OPS) != 0;
- switch (m_state)
+ case GT_FIELD:
+ if (m_node->AsField()->gtFldObj == nullptr)
{
- case 0:
- return !operandsReversed ? &(m_node->AsOp()->gtOp1) : &(m_node->AsOp()->gtOp2);
- case 1:
- return !operandsReversed ? &(m_node->AsOp()->gtOp2) : &(m_node->AsOp()->gtOp1);
- default:
- return nullptr;
+ m_state = -1;
}
- }
+ else
+ {
+ m_edge = &m_node->AsField()->gtFldObj;
+ m_advance = &GenTreeUseEdgeIterator::Terminate;
+ }
+ return;
+
+ case GT_STMT:
+ if (m_node->AsStmt()->gtStmtExpr == nullptr)
+ {
+ m_state = -1;
+ }
+ else
+ {
+ m_edge = &m_node->AsStmt()->gtStmtExpr;
+ m_advance = &GenTreeUseEdgeIterator::Terminate;
+ }
+ return;
+
+ case GT_ARR_ELEM:
+ m_edge = &m_node->AsArrElem()->gtArrObj;
+ assert(*m_edge != nullptr);
+ m_advance = &GenTreeUseEdgeIterator::AdvanceArrElem;
+ return;
+
+ case GT_ARR_OFFSET:
+ m_edge = &m_node->AsArrOffs()->gtOffset;
+ assert(*m_edge != nullptr);
+ m_advance = &GenTreeUseEdgeIterator::AdvanceArrOffset;
+ return;
case GT_DYN_BLK:
{
GenTreeDynBlk* const dynBlock = m_node->AsDynBlk();
- switch (m_state)
- {
- case 0:
- return dynBlock->gtEvalSizeFirst ? &dynBlock->gtDynamicSize : &dynBlock->gtOp1;
- case 1:
- return dynBlock->gtEvalSizeFirst ? &dynBlock->gtOp1 : &dynBlock->gtDynamicSize;
- default:
- return nullptr;
- }
+ m_edge = dynBlock->gtEvalSizeFirst ? &dynBlock->gtDynamicSize : &dynBlock->gtOp1;
+ assert(*m_edge != nullptr);
+ m_advance = &GenTreeUseEdgeIterator::AdvanceDynBlk;
}
- break;
+ return;
case GT_STORE_DYN_BLK:
{
GenTreeDynBlk* const dynBlock = m_node->AsDynBlk();
if (dynBlock->gtEvalSizeFirst)
{
- switch (m_state)
- {
- case 0:
- return &dynBlock->gtDynamicSize;
- case 1:
- return dynBlock->IsReverseOp() ? &dynBlock->gtOp2 : &dynBlock->gtOp1;
- case 2:
- return dynBlock->IsReverseOp() ? &dynBlock->gtOp1 : &dynBlock->gtOp2;
- default:
- return nullptr;
- }
+ m_edge = &dynBlock->gtDynamicSize;
}
else
{
- switch (m_state)
- {
- case 0:
- return dynBlock->IsReverseOp() ? &dynBlock->gtOp2 : &dynBlock->gtOp1;
- case 1:
- return dynBlock->IsReverseOp() ? &dynBlock->gtOp1 : &dynBlock->gtOp2;
- case 2:
- return &dynBlock->gtDynamicSize;
- default:
- return nullptr;
- }
- }
- }
- break;
-
- case GT_LEA:
- {
- GenTreeAddrMode* lea = m_node->AsAddrMode();
-
- bool hasOp1 = lea->gtOp1 != nullptr;
- if (!hasOp1)
- {
- return m_state == 0 ? &lea->gtOp2 : nullptr;
+ m_edge = dynBlock->IsReverseOp() ? &dynBlock->gtOp2 : &dynBlock->gtOp1;
}
+ assert(*m_edge != nullptr);
- bool operandsReversed = (lea->gtFlags & GTF_REVERSE_OPS) != 0;
- switch (m_state)
- {
- case 0:
- return !operandsReversed ? &lea->gtOp1 : &lea->gtOp2;
- case 1:
- return !operandsReversed ? &lea->gtOp2 : &lea->gtOp1;
- default:
- return nullptr;
- }
+ m_advance = &GenTreeUseEdgeIterator::AdvanceStoreDynBlk;
}
- break;
+ return;
- case GT_FIELD_LIST:
- // Field List nodes are handled by MoveToNextFieldUseEdge.
- break;
+ case GT_CALL:
+ AdvanceCall<CALL_INSTANCE>();
+ return;
+ // Binary nodes
default:
- if (m_node->OperIsConst() || m_node->OperIsLeaf())
- {
- return nullptr;
- }
- else if (m_node->OperIsUnary())
- {
- return m_state == 0 ? &m_node->AsUnOp()->gtOp1 : nullptr;
- }
- else if (m_node->OperIsBinary())
- {
- bool operandsReversed = (m_node->gtFlags & GTF_REVERSE_OPS) != 0;
- switch (m_state)
- {
- case 0:
- return !operandsReversed ? &m_node->AsOp()->gtOp1 : &m_node->AsOp()->gtOp2;
- case 1:
- return !operandsReversed ? &m_node->AsOp()->gtOp2 : &m_node->AsOp()->gtOp1;
- default:
- return nullptr;
- }
- }
+ assert(m_node->OperIsBinary());
+ SetEntryStateForBinOp();
+ return;
}
-
- unreached();
}
//------------------------------------------------------------------------
-// GenTreeUseEdgeIterator::MoveToNextCallUseEdge:
-// Moves to the next operand of a call node. Unlike the simple nodes
-// handled by `GetNextUseEdge`, call nodes have a variable number of
-// operands stored in cons lists. This method expands the cons lists
-// into the operands stored within.
+// GenTreeUseEdgeIterator::AdvanceCmpXchg: produces the next operand of a CmpXchg node and advances the state.
//
-void GenTreeUseEdgeIterator::MoveToNextCallUseEdge()
+void GenTreeUseEdgeIterator::AdvanceCmpXchg()
{
- enum
- {
- CALL_INSTANCE = 0,
- CALL_ARGS = 1,
- CALL_LATE_ARGS = 2,
- CALL_CONTROL_EXPR = 3,
- CALL_COOKIE = 4,
- CALL_ADDRESS = 5,
- CALL_TERMINAL = 6,
- };
-
- GenTreeCall* call = m_node->AsCall();
-
- for (;;)
+ switch (m_state)
{
- switch (m_state)
- {
- case CALL_INSTANCE:
- m_state = CALL_ARGS;
- m_argList = call->gtCallArgs;
-
- if (call->gtCallObjp != nullptr)
- {
- m_edge = &call->gtCallObjp;
- return;
- }
- break;
-
- case CALL_ARGS:
- case CALL_LATE_ARGS:
- if (m_argList == nullptr)
- {
- m_state++;
-
- if (m_state == CALL_LATE_ARGS)
- {
- m_argList = call->gtCallLateArgs;
- }
- }
- else
- {
- GenTreeArgList* argNode = m_argList->AsArgList();
- m_edge = &argNode->gtOp1;
- m_argList = argNode->Rest();
- return;
- }
- break;
-
- case CALL_CONTROL_EXPR:
- m_state = call->gtCallType == CT_INDIRECT ? CALL_COOKIE : CALL_TERMINAL;
+ case 0:
+ m_edge = &m_node->AsCmpXchg()->gtOpValue;
+ m_state = 1;
+ break;
+ case 1:
+ m_edge = &m_node->AsCmpXchg()->gtOpComparand;
+ m_advance = &GenTreeUseEdgeIterator::Terminate;
+ break;
+ default:
+ unreached();
+ }
- if (call->gtControlExpr != nullptr)
- {
- m_edge = &call->gtControlExpr;
- return;
- }
- break;
+ assert(*m_edge != nullptr);
+}
- case 4:
- assert(call->gtCallType == CT_INDIRECT);
+//------------------------------------------------------------------------
+// GenTreeUseEdgeIterator::AdvanceBoundsChk: produces the next operand of a BoundsChk node and advances the state.
+//
+void GenTreeUseEdgeIterator::AdvanceBoundsChk()
+{
+ m_edge = &m_node->AsBoundsChk()->gtArrLen;
+ assert(*m_edge != nullptr);
+ m_advance = &GenTreeUseEdgeIterator::Terminate;
+}
- m_state = CALL_ADDRESS;
+//------------------------------------------------------------------------
+// GenTreeUseEdgeIterator::AdvanceArrElem: produces the next operand of a ArrElem node and advances the state.
+//
+// Because these nodes are variadic, this function uses `m_state` to index into the list of array indices.
+//
+void GenTreeUseEdgeIterator::AdvanceArrElem()
+{
+ if (m_state < m_node->AsArrElem()->gtArrRank)
+ {
+ m_edge = &m_node->AsArrElem()->gtArrInds[m_state];
+ assert(*m_edge != nullptr);
+ m_state++;
+ }
+ else
+ {
+ m_state = -1;
+ }
+}
- if (call->gtCallCookie != nullptr)
- {
- m_edge = &call->gtCallCookie;
- return;
- }
- break;
+//------------------------------------------------------------------------
+// GenTreeUseEdgeIterator::AdvanceArrOffset: produces the next operand of a ArrOffset node and advances the state.
+//
+void GenTreeUseEdgeIterator::AdvanceArrOffset()
+{
+ switch (m_state)
+ {
+ case 0:
+ m_edge = &m_node->AsArrOffs()->gtIndex;
+ m_state = 1;
+ break;
+ case 1:
+ m_edge = &m_node->AsArrOffs()->gtArrObj;
+ m_advance = &GenTreeUseEdgeIterator::Terminate;
+ break;
+ default:
+ unreached();
+ }
- case 5:
- assert(call->gtCallType == CT_INDIRECT);
+ assert(*m_edge != nullptr);
+}
- m_state = CALL_TERMINAL;
- if (call->gtCallAddr != nullptr)
- {
- m_edge = &call->gtCallAddr;
- return;
- }
- break;
+//------------------------------------------------------------------------
+// GenTreeUseEdgeIterator::AdvanceDynBlk: produces the next operand of a DynBlk node and advances the state.
+//
+void GenTreeUseEdgeIterator::AdvanceDynBlk()
+{
+ GenTreeDynBlk* const dynBlock = m_node->AsDynBlk();
- default:
- m_node = nullptr;
- m_edge = nullptr;
- m_argList = nullptr;
- m_state = -1;
- return;
- }
- }
+ m_edge = dynBlock->gtEvalSizeFirst ? &dynBlock->gtOp1 : &dynBlock->gtDynamicSize;
+ assert(*m_edge != nullptr);
+ m_advance = &GenTreeUseEdgeIterator::Terminate;
}
//------------------------------------------------------------------------
-// GenTreeUseEdgeIterator::MoveToNextPhiUseEdge:
-// Moves to the next operand of a phi node. Unlike the simple nodes
-// handled by `GetNextUseEdge`, phi nodes have a variable number of
-// operands stored in a cons list. This method expands the cons list
-// into the operands stored within.
+// GenTreeUseEdgeIterator::AdvanceStoreDynBlk: produces the next operand of a StoreDynBlk node and advances the state.
//
-void GenTreeUseEdgeIterator::MoveToNextPhiUseEdge()
+// These nodes are moderately complicated but rare enough that templating this function is probably not
+// worth the extra complexity.
+//
+void GenTreeUseEdgeIterator::AdvanceStoreDynBlk()
{
- GenTreeUnOp* phi = m_node->AsUnOp();
-
- for (;;)
+ GenTreeDynBlk* const dynBlock = m_node->AsDynBlk();
+ if (dynBlock->gtEvalSizeFirst)
{
switch (m_state)
{
case 0:
- m_state = 1;
- m_argList = phi->gtOp1;
+ m_edge = dynBlock->IsReverseOp() ? &dynBlock->gtOp2 : &dynBlock->gtOp1;
+ m_state = 1;
break;
-
case 1:
- if (m_argList == nullptr)
- {
- m_state = 2;
- }
- else
- {
- GenTreeArgList* argNode = m_argList->AsArgList();
- m_edge = &argNode->gtOp1;
- m_argList = argNode->Rest();
- return;
- }
+ m_edge = dynBlock->IsReverseOp() ? &dynBlock->gtOp1 : &dynBlock->gtOp2;
+ m_advance = &GenTreeUseEdgeIterator::Terminate;
break;
-
default:
- m_node = nullptr;
- m_edge = nullptr;
- m_argList = nullptr;
- m_state = -1;
- return;
+ unreached();
}
}
-}
-
-#ifdef FEATURE_SIMD
-//------------------------------------------------------------------------
-// GenTreeUseEdgeIterator::MoveToNextSIMDUseEdge:
-// Moves to the next operand of a SIMD node. Most SIMD nodes have a
-// fixed number of operands and are handled accordingly.
-// `SIMDIntrinsicInitN` nodes, however, have a variable number of
-// operands stored in a cons list. This method expands the cons list
-// into the operands stored within.
-//
-void GenTreeUseEdgeIterator::MoveToNextSIMDUseEdge()
-{
- GenTreeSIMD* simd = m_node->AsSIMD();
-
- if (simd->gtSIMDIntrinsicID != SIMDIntrinsicInitN)
+ else
{
- bool operandsReversed = (simd->gtFlags & GTF_REVERSE_OPS) != 0;
switch (m_state)
{
case 0:
- m_edge = !operandsReversed ? &simd->gtOp1 : &simd->gtOp2;
+ m_edge = dynBlock->IsReverseOp() ? &dynBlock->gtOp1 : &dynBlock->gtOp2;
+ m_state = 1;
break;
case 1:
- m_edge = !operandsReversed ? &simd->gtOp2 : &simd->gtOp1;
+ m_edge = &dynBlock->gtDynamicSize;
+ m_advance = &GenTreeUseEdgeIterator::Terminate;
break;
default:
- m_edge = nullptr;
- break;
+ unreached();
}
+ }
- if (m_edge != nullptr && *m_edge != nullptr)
- {
- m_state++;
- }
- else
- {
- m_node = nullptr;
- m_state = -1;
- }
+ assert(*m_edge != nullptr);
+}
- return;
- }
+//------------------------------------------------------------------------
+// GenTreeUseEdgeIterator::AdvanceBinOp: produces the next operand of a binary node and advances the state.
+//
+// This function must be instantiated s.t. `ReverseOperands` is `true` iff the node is marked with the
+// `GTF_REVERSE_OPS` flag.
+//
+template <bool ReverseOperands>
+void GenTreeUseEdgeIterator::AdvanceBinOp()
+{
+ assert(ReverseOperands == ((m_node->gtFlags & GTF_REVERSE_OPS) != 0));
- for (;;)
+ m_edge = !ReverseOperands ? &m_node->AsOp()->gtOp2 : &m_node->AsOp()->gtOp1;
+ assert(*m_edge != nullptr);
+ m_advance = &GenTreeUseEdgeIterator::Terminate;
+}
+
+//------------------------------------------------------------------------
+// GenTreeUseEdgeIterator::SetEntryStateForBinOp: produces the first operand of a binary node and chooses
+// the appropriate advance function.
+//
+void GenTreeUseEdgeIterator::SetEntryStateForBinOp()
+{
+ assert(m_node != nullptr);
+ assert(m_node->OperIsBinary());
+
+ GenTreeOp* const node = m_node->AsOp();
+
+ if (node->gtOp2 == nullptr)
{
- switch (m_state)
- {
- case 0:
- m_state = 1;
- m_argList = simd->gtOp1;
- break;
+ assert(node->gtOp1 != nullptr);
+ assert(node->NullOp2Legal());
+ m_edge = &node->gtOp1;
+ m_advance = &GenTreeUseEdgeIterator::Terminate;
+ }
+ else if ((node->gtFlags & GTF_REVERSE_OPS) != 0)
+ {
+ m_edge = &m_node->AsOp()->gtOp2;
+ m_advance = &GenTreeUseEdgeIterator::AdvanceBinOp<true>;
+ }
+ else
+ {
+ m_edge = &m_node->AsOp()->gtOp1;
+ m_advance = &GenTreeUseEdgeIterator::AdvanceBinOp<false>;
+ }
+}
- case 1:
- if (m_argList == nullptr)
- {
- m_state = 2;
- }
- else
- {
- GenTreeArgList* argNode = m_argList->AsArgList();
- m_edge = &argNode->gtOp1;
- m_argList = argNode->Rest();
- return;
- }
- break;
+//------------------------------------------------------------------------
+// GenTreeUseEdgeIterator::AdvanceList: produces the next operand of a variadic node and advances the state.
+//
+// This function does not use `m_state` for anything meaningful; it simply walks the `m_argList` until
+// there are no further entries.
+//
+void GenTreeUseEdgeIterator::AdvanceList()
+{
+ assert(m_state == 0);
- default:
- m_node = nullptr;
- m_edge = nullptr;
- m_argList = nullptr;
- m_state = -1;
- return;
- }
+ if (m_argList == nullptr)
+ {
+ m_state = -1;
+ }
+ else
+ {
+ GenTreeArgList* listNode = m_argList->AsArgList();
+ m_edge = &listNode->gtOp1;
+ m_argList = listNode->Rest();
}
}
-#endif // FEATURE_SIMD
-void GenTreeUseEdgeIterator::MoveToNextFieldUseEdge()
+//------------------------------------------------------------------------
+// GenTreeUseEdgeIterator::SetEntryStateForList: produces the first operand of a list node.
+//
+void GenTreeUseEdgeIterator::SetEntryStateForList(GenTree* list)
{
- assert(m_node->OperGet() == GT_FIELD_LIST);
+ m_argList = list;
+ m_advance = &GenTreeUseEdgeIterator::AdvanceList;
+ AdvanceList();
+}
- for (;;)
+//------------------------------------------------------------------------
+// GenTreeUseEdgeIterator::AdvanceCall: produces the next operand of a call node and advances the state.
+//
+// This function is a bit tricky: in order to avoid doing unnecessary work, it is instantiated with the
+// state number the iterator will be in when it is called. For example, `AdvanceCall<CALL_INSTANCE>`
+// is the instantiation used when the iterator is at the `CALL_INSTANCE` state (i.e. the entry state).
+// This sort of templating allows each state to avoid processing earlier states without unnecessary
+// duplication of code.
+//
+// Note that this method expands the argument lists (`gtCallArgs` and `gtCallLateArgs`) into their
+// component operands.
+//
+template <int state>
+void GenTreeUseEdgeIterator::AdvanceCall()
+{
+ GenTreeCall* const call = m_node->AsCall();
+
+ switch (state)
{
- switch (m_state)
- {
- case 0:
- m_state = 1;
- m_argList = m_node;
- break;
+ case CALL_INSTANCE:
+ m_argList = call->gtCallArgs;
+ m_advance = &GenTreeUseEdgeIterator::AdvanceCall<CALL_ARGS>;
+ if (call->gtCallObjp != nullptr)
+ {
+ m_edge = &call->gtCallObjp;
+ return;
+ }
+ __fallthrough;
- case 1:
- if (m_argList == nullptr)
+ case CALL_ARGS:
+ if (m_argList != nullptr)
+ {
+ GenTreeArgList* argNode = m_argList->AsArgList();
+ m_edge = &argNode->gtOp1;
+ m_argList = argNode->Rest();
+ return;
+ }
+ m_argList = call->gtCallLateArgs;
+ m_advance = &GenTreeUseEdgeIterator::AdvanceCall<CALL_LATE_ARGS>;
+ __fallthrough;
+
+ case CALL_LATE_ARGS:
+ if (m_argList != nullptr)
+ {
+ GenTreeArgList* argNode = m_argList->AsArgList();
+ m_edge = &argNode->gtOp1;
+ m_argList = argNode->Rest();
+ return;
+ }
+ m_advance = &GenTreeUseEdgeIterator::AdvanceCall<CALL_CONTROL_EXPR>;
+ __fallthrough;
+
+ case CALL_CONTROL_EXPR:
+ if (call->gtControlExpr != nullptr)
+ {
+ if (call->gtCallType == CT_INDIRECT)
{
- m_state = 2;
+ m_advance = &GenTreeUseEdgeIterator::AdvanceCall<CALL_COOKIE>;
}
else
{
- GenTreeArgList* listNode = m_argList->AsArgList();
- m_edge = &listNode->gtOp1;
- m_argList = listNode->Rest();
- return;
+ m_advance = &GenTreeUseEdgeIterator::Terminate;
}
- break;
+ m_edge = &call->gtControlExpr;
+ return;
+ }
+ else if (call->gtCallType != CT_INDIRECT)
+ {
+ m_state = -1;
+ return;
+ }
+ __fallthrough;
- default:
- m_node = nullptr;
- m_edge = nullptr;
- m_argList = nullptr;
- m_state = -1;
+ case CALL_COOKIE:
+ assert(call->gtCallType == CT_INDIRECT);
+
+ m_advance = &GenTreeUseEdgeIterator::AdvanceCall<CALL_ADDRESS>;
+ if (call->gtCallCookie != nullptr)
+ {
+ m_edge = &call->gtCallCookie;
return;
- }
+ }
+ __fallthrough;
+
+ case CALL_ADDRESS:
+ assert(call->gtCallType == CT_INDIRECT);
+
+ m_advance = &GenTreeUseEdgeIterator::Terminate;
+ if (call->gtCallAddr != nullptr)
+ {
+ m_edge = &call->gtCallAddr;
+ }
+ return;
+
+ default:
+ unreached();
}
}
//------------------------------------------------------------------------
-// GenTreeUseEdgeIterator::operator++:
-// Advances the iterator to the next operand.
+// GenTreeUseEdgeIterator::Terminate: advances the iterator to the terminal state.
+//
+void GenTreeUseEdgeIterator::Terminate()
+{
+ m_state = -1;
+}
+
+//------------------------------------------------------------------------
+// GenTreeUseEdgeIterator::operator++: advances the iterator to the next operand.
//
GenTreeUseEdgeIterator& GenTreeUseEdgeIterator::operator++()
{
- if (m_state == -1)
+ // If we've reached the terminal state, do nothing.
+ if (m_state != -1)
{
- // If we've reached the terminal state, do nothing.
- assert(m_node == nullptr);
- assert(m_edge == nullptr);
- assert(m_argList == nullptr);
- }
- else
- {
- // Otherwise, move to the next operand in the node.
- genTreeOps op = m_node->OperGet();
- if (op == GT_CALL)
- {
- MoveToNextCallUseEdge();
- }
- else if (op == GT_PHI)
- {
- MoveToNextPhiUseEdge();
- }
-#ifdef FEATURE_SIMD
- else if (op == GT_SIMD)
- {
- MoveToNextSIMDUseEdge();
- }
-#endif
- else if (op == GT_FIELD_LIST)
- {
- MoveToNextFieldUseEdge();
- }
- else
- {
- m_edge = GetNextUseEdge();
- if (m_edge != nullptr && *m_edge != nullptr)
- {
- m_state++;
- }
- else
- {
- m_edge = nullptr;
- m_node = nullptr;
- m_state = -1;
- }
- }
+ (this->*m_advance)();
}
return *this;
@@ -11580,39 +11592,40 @@ void Compiler::gtDispTree(GenTreePtr tree,
case GT_CALL:
{
- assert(tree->gtFlags & GTF_CALL);
- unsigned numChildren = tree->NumChildren();
+ GenTreeCall* call = tree->AsCall();
+ assert(call->gtFlags & GTF_CALL);
+ unsigned numChildren = call->NumChildren();
GenTree* lastChild = nullptr;
if (numChildren != 0)
{
- lastChild = tree->GetChild(numChildren - 1);
+ lastChild = call->GetChild(numChildren - 1);
}
- if (tree->gtCall.gtCallType != CT_INDIRECT)
+ if (call->gtCallType != CT_INDIRECT)
{
const char* methodName;
const char* className;
- methodName = eeGetMethodName(tree->gtCall.gtCallMethHnd, &className);
+ methodName = eeGetMethodName(call->gtCallMethHnd, &className);
printf(" %s.%s", className, methodName);
}
- if ((tree->gtFlags & GTF_CALL_UNMANAGED) && (tree->gtCall.gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH))
+ if ((call->gtFlags & GTF_CALL_UNMANAGED) && (call->gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH))
{
printf(" (FramesRoot last use)");
}
- if (((tree->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0) && (tree->gtCall.gtInlineCandidateInfo != nullptr) &&
- (tree->gtCall.gtInlineCandidateInfo->exactContextHnd != nullptr))
+ if (((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0) && (call->gtInlineCandidateInfo != nullptr) &&
+ (call->gtInlineCandidateInfo->exactContextHnd != nullptr))
{
- printf(" (exactContextHnd=0x%p)", dspPtr(tree->gtCall.gtInlineCandidateInfo->exactContextHnd));
+ printf(" (exactContextHnd=0x%p)", dspPtr(call->gtInlineCandidateInfo->exactContextHnd));
}
- gtDispVN(tree);
- if (tree->IsMultiRegCall())
+ gtDispVN(call);
+ if (call->IsMultiRegCall())
{
- gtDispRegVal(tree);
+ gtDispRegVal(call);
}
printf("\n");
@@ -11623,10 +11636,10 @@ void Compiler::gtDispTree(GenTreePtr tree,
bufp = &buf[0];
- if ((tree->gtCall.gtCallObjp != nullptr) && (tree->gtCall.gtCallObjp->gtOper != GT_NOP) &&
- (!tree->gtCall.gtCallObjp->IsArgPlaceHolderNode()))
+ if ((call->gtCallObjp != nullptr) && (call->gtCallObjp->gtOper != GT_NOP) &&
+ (!call->gtCallObjp->IsArgPlaceHolderNode()))
{
- if (tree->gtCall.gtCallObjp->gtOper == GT_ASG)
+ if (call->gtCallObjp->gtOper == GT_ASG)
{
sprintf_s(bufp, sizeof(buf), "this SETUP%c", 0);
}
@@ -11634,34 +11647,33 @@ void Compiler::gtDispTree(GenTreePtr tree,
{
sprintf_s(bufp, sizeof(buf), "this in %s%c", compRegVarName(REG_ARG_0), 0);
}
- gtDispChild(tree->gtCall.gtCallObjp, indentStack,
- (tree->gtCall.gtCallObjp == lastChild) ? IIArcBottom : IIArc, bufp, topOnly);
+ gtDispChild(call->gtCallObjp, indentStack, (call->gtCallObjp == lastChild) ? IIArcBottom : IIArc,
+ bufp, topOnly);
}
- if (tree->gtCall.gtCallArgs)
+ if (call->gtCallArgs)
{
- gtDispArgList(tree, indentStack);
+ gtDispArgList(call, indentStack);
}
- if (tree->gtCall.gtCallType == CT_INDIRECT)
+ if (call->gtCallType == CT_INDIRECT)
{
- gtDispChild(tree->gtCall.gtCallAddr, indentStack,
- (tree->gtCall.gtCallAddr == lastChild) ? IIArcBottom : IIArc, "calli tgt", topOnly);
+ gtDispChild(call->gtCallAddr, indentStack, (call->gtCallAddr == lastChild) ? IIArcBottom : IIArc,
+ "calli tgt", topOnly);
}
- if (tree->gtCall.gtControlExpr != nullptr)
+ if (call->gtControlExpr != nullptr)
{
- gtDispChild(tree->gtCall.gtControlExpr, indentStack,
- (tree->gtCall.gtControlExpr == lastChild) ? IIArcBottom : IIArc, "control expr",
- topOnly);
+ gtDispChild(call->gtControlExpr, indentStack,
+ (call->gtControlExpr == lastChild) ? IIArcBottom : IIArc, "control expr", topOnly);
}
#if !FEATURE_FIXED_OUT_ARGS
- regList list = tree->gtCall.regArgList;
+ regList list = call->regArgList;
#endif
/* process the late argument list */
int lateArgIndex = 0;
- for (GenTreeArgList* lateArgs = tree->gtCall.gtCallLateArgs; lateArgs;
+ for (GenTreeArgList* lateArgs = call->gtCallLateArgs; lateArgs;
(lateArgIndex++, lateArgs = lateArgs->Rest()))
{
GenTreePtr argx;
@@ -11669,7 +11681,7 @@ void Compiler::gtDispTree(GenTreePtr tree,
argx = lateArgs->Current();
IndentInfo arcType = (lateArgs->Rest() == nullptr) ? IIArcBottom : IIArc;
- gtGetLateArgMsg(tree, argx, lateArgIndex, -1, bufp, sizeof(buf));
+ gtGetLateArgMsg(call, argx, lateArgIndex, -1, bufp, sizeof(buf));
gtDispChild(argx, indentStack, arcType, bufp, topOnly);
}
}
@@ -11787,9 +11799,9 @@ void Compiler::gtDispTree(GenTreePtr tree,
// 'arg' must be an argument to 'call' (else gtArgEntryByNode will assert)
void Compiler::gtGetArgMsg(
- GenTreePtr call, GenTreePtr arg, unsigned argNum, int listCount, char* bufp, unsigned bufLength)
+ GenTreeCall* call, GenTreePtr arg, unsigned argNum, int listCount, char* bufp, unsigned bufLength)
{
- if (call->gtCall.gtCallLateArgs != nullptr)
+ if (call->gtCallLateArgs != nullptr)
{
fgArgTabEntryPtr curArgTabEntry = gtArgEntryByArgNum(call, argNum);
assert(curArgTabEntry);
@@ -11843,7 +11855,7 @@ void Compiler::gtGetArgMsg(
// 'arg' must be an argument to 'call' (else gtArgEntryByNode will assert)
void Compiler::gtGetLateArgMsg(
- GenTreePtr call, GenTreePtr argx, int lateArgIndex, int listCount, char* bufp, unsigned bufLength)
+ GenTreeCall* call, GenTreePtr argx, int lateArgIndex, int listCount, char* bufp, unsigned bufLength)
{
assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
@@ -11852,8 +11864,8 @@ void Compiler::gtGetLateArgMsg(
regNumber argReg = curArgTabEntry->regNum;
#if !FEATURE_FIXED_OUT_ARGS
- assert(lateArgIndex < call->gtCall.regArgListCount);
- assert(argReg == call->gtCall.regArgList[lateArgIndex]);
+ assert(lateArgIndex < call->regArgListCount);
+ assert(argReg == call->regArgList[lateArgIndex]);
#else
if (argReg == REG_STK)
{
@@ -11908,28 +11920,25 @@ void Compiler::gtGetLateArgMsg(
// gtDispArgList: Dump the tree for a call arg list
//
// Arguments:
-// tree - The call for which 'arg' is an argument
+// call - The call to dump arguments for
// indentStack - the specification for the current level of indentation & arcs
//
// Return Value:
// None.
//
-// Assumptions:
-// 'tree' must be a call node
-
-void Compiler::gtDispArgList(GenTreePtr tree, IndentStack* indentStack)
+void Compiler::gtDispArgList(GenTreeCall* call, IndentStack* indentStack)
{
- GenTree* args = tree->gtCall.gtCallArgs;
+ GenTree* args = call->gtCallArgs;
unsigned argnum = 0;
const int BufLength = 256;
char buf[BufLength];
char* bufp = &buf[0];
- unsigned numChildren = tree->NumChildren();
+ unsigned numChildren = call->NumChildren();
assert(numChildren != 0);
- bool argListIsLastChild = (args == tree->GetChild(numChildren - 1));
+ bool argListIsLastChild = (args == call->GetChild(numChildren - 1));
IndentInfo arcType = IIArc;
- if (tree->gtCall.gtCallObjp != nullptr)
+ if (call->gtCallObjp != nullptr)
{
argnum++;
}
@@ -11940,7 +11949,7 @@ void Compiler::gtDispArgList(GenTreePtr tree, IndentStack* indentStack)
GenTree* arg = args->gtOp.gtOp1;
if (!arg->IsNothingNode() && !arg->IsArgPlaceHolderNode())
{
- gtGetArgMsg(tree, arg, argnum, -1, bufp, BufLength);
+ gtGetArgMsg(call, arg, argnum, -1, bufp, BufLength);
if (argListIsLastChild && (args->gtOp.gtOp2 == nullptr))
{
arcType = IIArcBottom;
@@ -12008,16 +12017,23 @@ void Compiler::gtDispTreeRange(LIR::Range& containingRange, GenTree* tree)
//
// Arguments:
// node - the LIR node to dump.
+// prefixMsg - an optional prefix for each line of output.
//
-void Compiler::gtDispLIRNode(GenTree* node)
+void Compiler::gtDispLIRNode(GenTree* node, const char* prefixMsg /* = nullptr */)
{
- auto displayOperand = [](GenTree* operand, const char* message, IndentInfo operandArc, IndentStack& indentStack) {
+ auto displayOperand = [](GenTree* operand, const char* message, IndentInfo operandArc, IndentStack& indentStack,
+ size_t prefixIndent) {
assert(operand != nullptr);
assert(message != nullptr);
+ if (prefixIndent != 0)
+ {
+ printf("%*s", (int)prefixIndent, "");
+ }
+
// 49 spaces for alignment
printf("%-49s", "");
-#ifdef FEATURE_SET_FLAGS
+#if FEATURE_SET_FLAGS
// additional flag enlarges the flag field by one character
printf(" ");
#endif
@@ -12028,11 +12044,16 @@ void Compiler::gtDispLIRNode(GenTree* node)
operandArc = IIArc;
printf(" t%-5d %-6s %s\n", operand->gtTreeID, varTypeName(operand->TypeGet()), message);
-
};
IndentStack indentStack(this);
+ size_t prefixIndent = 0;
+ if (prefixMsg != nullptr)
+ {
+ prefixIndent = strlen(prefixMsg);
+ }
+
const int bufLength = 256;
char buf[bufLength];
@@ -12054,19 +12075,19 @@ void Compiler::gtDispLIRNode(GenTree* node)
if (operand == call->gtCallObjp)
{
sprintf_s(buf, sizeof(buf), "this in %s", compRegVarName(REG_ARG_0));
- displayOperand(operand, buf, operandArc, indentStack);
+ displayOperand(operand, buf, operandArc, indentStack, prefixIndent);
}
else if (operand == call->gtCallAddr)
{
- displayOperand(operand, "calli tgt", operandArc, indentStack);
+ displayOperand(operand, "calli tgt", operandArc, indentStack, prefixIndent);
}
else if (operand == call->gtControlExpr)
{
- displayOperand(operand, "control expr", operandArc, indentStack);
+ displayOperand(operand, "control expr", operandArc, indentStack, prefixIndent);
}
else if (operand == call->gtCallCookie)
{
- displayOperand(operand, "cookie", operandArc, indentStack);
+ displayOperand(operand, "cookie", operandArc, indentStack, prefixIndent);
}
else
{
@@ -12088,7 +12109,7 @@ void Compiler::gtDispLIRNode(GenTree* node)
gtGetLateArgMsg(call, operand, curArgTabEntry->lateArgInx, listIndex, buf, sizeof(buf));
}
- displayOperand(operand, buf, operandArc, indentStack);
+ displayOperand(operand, buf, operandArc, indentStack, prefixIndent);
operandArc = IIArc;
}
}
@@ -12103,7 +12124,7 @@ void Compiler::gtDispLIRNode(GenTree* node)
gtGetLateArgMsg(call, operand, curArgTabEntry->lateArgInx, -1, buf, sizeof(buf));
}
- displayOperand(operand, buf, operandArc, indentStack);
+ displayOperand(operand, buf, operandArc, indentStack, prefixIndent);
}
}
}
@@ -12111,55 +12132,59 @@ void Compiler::gtDispLIRNode(GenTree* node)
{
if (operand == node->AsBlk()->Addr())
{
- displayOperand(operand, "lhs", operandArc, indentStack);
+ displayOperand(operand, "lhs", operandArc, indentStack, prefixIndent);
}
else if (operand == node->AsBlk()->Data())
{
- displayOperand(operand, "rhs", operandArc, indentStack);
+ displayOperand(operand, "rhs", operandArc, indentStack, prefixIndent);
}
else
{
assert(operand == node->AsDynBlk()->gtDynamicSize);
- displayOperand(operand, "size", operandArc, indentStack);
+ displayOperand(operand, "size", operandArc, indentStack, prefixIndent);
}
}
else if (node->OperGet() == GT_DYN_BLK)
{
if (operand == node->AsBlk()->Addr())
{
- displayOperand(operand, "lhs", operandArc, indentStack);
+ displayOperand(operand, "lhs", operandArc, indentStack, prefixIndent);
}
else
{
assert(operand == node->AsDynBlk()->gtDynamicSize);
- displayOperand(operand, "size", operandArc, indentStack);
+ displayOperand(operand, "size", operandArc, indentStack, prefixIndent);
}
}
else if (node->OperIsAssignment())
{
if (operand == node->gtGetOp1())
{
- displayOperand(operand, "lhs", operandArc, indentStack);
+ displayOperand(operand, "lhs", operandArc, indentStack, prefixIndent);
}
else
{
- displayOperand(operand, "rhs", operandArc, indentStack);
+ displayOperand(operand, "rhs", operandArc, indentStack, prefixIndent);
}
}
else
{
- displayOperand(operand, "", operandArc, indentStack);
+ displayOperand(operand, "", operandArc, indentStack, prefixIndent);
}
operandArc = IIArc;
}
// Visit the operator
+
+ if (prefixMsg != nullptr)
+ {
+ printf("%s", prefixMsg);
+ }
+
const bool topOnly = true;
const bool isLIR = true;
gtDispTree(node, &indentStack, nullptr, topOnly, isLIR);
-
- printf("\n");
}
/*****************************************************************************/
@@ -13410,15 +13435,10 @@ GenTreePtr Compiler::gtFoldExprConst(GenTreePtr tree)
#endif
#ifdef _TARGET_64BIT_
- // we need to properly re-sign-extend or truncate as needed.
- if (tree->gtFlags & GTF_UNSIGNED)
- {
- i1 = UINT32(i1);
- }
- else
- {
- i1 = INT32(i1);
- }
+ // Some operations are performed as 64 bit instead of 32 bit so the upper 32 bits
+ // need to be discarded. Since constant values are stored as ssize_t and the node
+ // has TYP_INT the result needs to be sign extended rather than zero extended.
+ i1 = INT32(i1);
#endif // _TARGET_64BIT_
/* Also all conditional folding jumps here since the node hanging from
@@ -14927,7 +14947,7 @@ bool Compiler::gtCanOptimizeTypeEquality(GenTreePtr tree)
{
if (tree->gtCall.gtCallType == CT_HELPER)
{
- if (gtIsTypeHandleToRuntimeTypeHelper(tree))
+ if (gtIsTypeHandleToRuntimeTypeHelper(tree->AsCall()))
{
return true;
}
@@ -14958,10 +14978,10 @@ bool Compiler::gtCanOptimizeTypeEquality(GenTreePtr tree)
return false;
}
-bool Compiler::gtIsTypeHandleToRuntimeTypeHelper(GenTreePtr tree)
+bool Compiler::gtIsTypeHandleToRuntimeTypeHelper(GenTreeCall* call)
{
- return tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE) ||
- tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL);
+ return call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE) ||
+ call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL);
}
bool Compiler::gtIsActiveCSE_Candidate(GenTreePtr tree)
@@ -15787,6 +15807,20 @@ unsigned GenTree::IsLclVarUpdateTree(GenTree** pOtherTree, genTreeOps* pOper)
}
//------------------------------------------------------------------------
+// canBeContained: check whether this tree node may be a subcomponent of its parent for purposes
+// of code generation.
+//
+// Return value: returns true if it is possible to contain this node and false otherwise.
+bool GenTree::canBeContained() const
+{
+ assert(IsLIR());
+
+ // It is not possible for nodes that do not produce values or that are not containable values
+ // to be contained.
+ return (OperKind() & (GTK_NOVALUE | GTK_NOCONTAIN)) == 0;
+}
+
+//------------------------------------------------------------------------
// isContained: check whether this tree node is a subcomponent of its parent for codegen purposes
//
// Return Value:
@@ -15801,14 +15835,16 @@ unsigned GenTree::IsLclVarUpdateTree(GenTree** pOtherTree, genTreeOps* pOper)
//
bool GenTree::isContained() const
{
- if (gtHasReg())
+ assert(IsLIR());
+
+ if (!canBeContained() || gtHasReg())
{
return false;
}
// these actually produce a register (the flags reg, we just don't model it)
// and are a separate instruction from the branch that consumes the result
- if (OperKind() & GTK_RELOP)
+ if ((OperKind() & GTK_RELOP) != 0)
{
return false;
}
@@ -15819,75 +15855,25 @@ bool GenTree::isContained() const
return false;
}
- switch (OperGet())
- {
- case GT_STOREIND:
- case GT_JTRUE:
- case GT_JCC:
- case GT_RETURN:
- case GT_RETFILT:
- case GT_STORE_LCL_FLD:
- case GT_STORE_LCL_VAR:
- case GT_ARR_BOUNDS_CHECK:
- case GT_LOCKADD:
- case GT_NOP:
- case GT_NO_OP:
- case GT_START_NONGC:
- case GT_PROF_HOOK:
- case GT_RETURNTRAP:
- case GT_COMMA:
- case GT_PINVOKE_PROLOG:
- case GT_PHYSREGDST:
- case GT_PUTARG_STK:
- case GT_MEMORYBARRIER:
- case GT_STORE_BLK:
- case GT_STORE_OBJ:
- case GT_STORE_DYN_BLK:
- case GT_SWITCH:
-#ifndef LEGACY_BACKEND
- case GT_JMPTABLE:
-#endif
- case GT_SWITCH_TABLE:
- case GT_SWAP:
- case GT_LCLHEAP:
- case GT_CKFINITE:
- case GT_JMP:
- case GT_IL_OFFSET:
-#ifdef FEATURE_SIMD
- case GT_SIMD_CHK:
-#endif // FEATURE_SIMD
-
-#if !FEATURE_EH_FUNCLETS
- case GT_END_LFIN:
-#endif
- return false;
-
#if !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_)
- case GT_LONG:
- // GT_LONG nodes are normally contained. The only exception is when the result
- // of a TYP_LONG operation is not used and this can only happen if the GT_LONG
- // is the last node in the statement (in linear order).
- return gtNext != nullptr;
+ if (OperGet() == GT_LONG)
+ {
+ // GT_LONG nodes are normally contained. The only exception is when the result
+ // of a TYP_LONG operation is not used and this can only happen if the GT_LONG
+ // is the last node in the statement (in linear order).
+ return gtNext != nullptr;
+ }
#endif
- case GT_CALL:
- // Note: if you hit this assert you are probably calling isContained()
- // before the LSRA phase has allocated physical register to the tree nodes
- //
- assert(gtType == TYP_VOID);
- return false;
-
- default:
- // if it's contained it better have a parent
- assert(gtNext || OperIsLocal());
- return true;
- }
+ // if it's contained it better have a user
+ assert((gtNext != nullptr) || OperIsLocal());
+ return true;
}
// return true if node is contained and an indir
bool GenTree::isContainedIndir() const
{
- return isContained() && isIndir();
+ return isIndir() && isContained();
}
bool GenTree::isIndirAddrMode()
@@ -15989,11 +15975,7 @@ size_t GenTreeIndir::Offset()
bool GenTreeIntConCommon::ImmedValNeedsReloc(Compiler* comp)
{
-#ifdef RELOC_SUPPORT
return comp->opts.compReloc && (gtOper == GT_CNS_INT) && IsIconHandle();
-#else
- return false;
-#endif
}
//------------------------------------------------------------------------
@@ -16089,9 +16071,7 @@ bool GenTreeIntConCommon::FitsInAddrBase(Compiler* comp)
#endif
#endif //! LEGACY_BACKEND
- // TODO-x86 - TLS field handles are excluded for now as they are accessed relative to FS segment.
- // Handling of TLS field handles is a NYI and this needs to be relooked after implementing it.
- return IsCnsIntOrI() && !IsIconHandle(GTF_ICON_TLS_HDL);
+ return IsCnsIntOrI();
}
// Returns true if this icon value is encoded as addr needs recording a relocation with VM
@@ -16371,6 +16351,215 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandle(GenTree* tree)
return structHnd;
}
+//------------------------------------------------------------------------
+// gtGetClassHandle: find class handle for a ref type
+//
+// Arguments:
+// tree -- tree to find handle for
+// isExact [out] -- whether handle is exact type
+// isNonNull [out] -- whether tree value is known not to be null
+//
+// Return Value:
+// nullptr if class handle is unknown,
+// otherwise the class handle.
+// isExact set true if tree type is known to be exactly the handle type,
+// otherwise actual type may be a subtype.
+// isNonNull set true if tree value is known not to be null,
+// otherwise a null value is possible.
+
+CORINFO_CLASS_HANDLE Compiler::gtGetClassHandle(GenTreePtr tree, bool* isExact, bool* isNonNull)
+{
+ // Set default values for our out params.
+ *isNonNull = false;
+ *isExact = false;
+ CORINFO_CLASS_HANDLE objClass = nullptr;
+
+ // Bail out if we're just importing and not generating code, since
+ // the jit uses TYP_REF for CORINFO_TYPE_VAR locals and args, but
+ // these may not be ref types.
+ if (compIsForImportOnly())
+ {
+ return objClass;
+ }
+
+ // Bail out if the tree is not a ref type.
+ var_types treeType = tree->TypeGet();
+ if (treeType != TYP_REF)
+ {
+ return objClass;
+ }
+
+ // Tunnel through commas.
+ GenTreePtr obj = tree->gtEffectiveVal(false);
+ const genTreeOps objOp = obj->OperGet();
+
+ switch (objOp)
+ {
+ case GT_COMMA:
+ {
+ // gtEffectiveVal above means we shouldn't see commas here.
+ assert(!"unexpected GT_COMMA");
+ break;
+ }
+
+ case GT_LCL_VAR:
+ {
+ // For locals, pick up type info from the local table.
+ const unsigned objLcl = obj->AsLclVar()->GetLclNum();
+
+ objClass = lvaTable[objLcl].lvClassHnd;
+ *isExact = lvaTable[objLcl].lvClassIsExact;
+ break;
+ }
+
+ case GT_FIELD:
+ {
+ // For fields, get the type from the field handle.
+ CORINFO_FIELD_HANDLE fieldHnd = obj->gtField.gtFldHnd;
+
+ if (fieldHnd != nullptr)
+ {
+ CORINFO_CLASS_HANDLE fieldClass = nullptr;
+ CorInfoType fieldCorType = info.compCompHnd->getFieldType(fieldHnd, &fieldClass);
+ if (fieldCorType == CORINFO_TYPE_CLASS)
+ {
+ objClass = fieldClass;
+ }
+ }
+
+ break;
+ }
+
+ case GT_RET_EXPR:
+ {
+ // If we see a RET_EXPR, recurse through to examine the
+ // return value expression.
+ GenTreePtr retExpr = tree->gtRetExpr.gtInlineCandidate;
+ objClass = gtGetClassHandle(retExpr, isExact, isNonNull);
+ break;
+ }
+
+ case GT_CALL:
+ {
+ GenTreeCall* call = tree->AsCall();
+ if (call->IsInlineCandidate())
+ {
+ // For inline candidates, we've already cached the return
+ // type class handle in the inline info.
+ InlineCandidateInfo* inlInfo = call->gtInlineCandidateInfo;
+ assert(inlInfo != nullptr);
+
+ // Grab it as our first cut at a return type.
+ assert(inlInfo->methInfo.args.retType == CORINFO_TYPE_CLASS);
+ objClass = inlInfo->methInfo.args.retTypeClass;
+
+ // If the method is shared, the above may not capture
+ // the most precise return type information (that is,
+ // it may represent a shared return type and as such,
+ // have instances of __Canon). See if we can use the
+ // context to get at something more definite.
+ //
+ // For now, we do this here on demand rather than when
+ // processing the call, but we could/should apply
+ // similar sharpening to the argument and local types
+ // of the inlinee.
+ const unsigned retClassFlags = info.compCompHnd->getClassAttribs(objClass);
+ if (retClassFlags & CORINFO_FLG_SHAREDINST)
+ {
+ CORINFO_CONTEXT_HANDLE context = inlInfo->exactContextHnd;
+
+ if (context != nullptr)
+ {
+ CORINFO_CLASS_HANDLE exactClass = nullptr;
+
+ if (((size_t)context & CORINFO_CONTEXTFLAGS_MASK) == CORINFO_CONTEXTFLAGS_CLASS)
+ {
+ exactClass = (CORINFO_CLASS_HANDLE)((size_t)context & ~CORINFO_CONTEXTFLAGS_MASK);
+ }
+ else
+ {
+ CORINFO_METHOD_HANDLE exactMethod =
+ (CORINFO_METHOD_HANDLE)((size_t)context & ~CORINFO_CONTEXTFLAGS_MASK);
+ exactClass = info.compCompHnd->getMethodClass(exactMethod);
+ }
+
+ // Grab the signature in this context.
+ CORINFO_SIG_INFO sig;
+ eeGetMethodSig(call->gtCallMethHnd, &sig, exactClass);
+ assert(sig.retType == CORINFO_TYPE_CLASS);
+ objClass = sig.retTypeClass;
+ }
+ }
+ }
+ else if (call->gtCallType == CT_USER_FUNC)
+ {
+ // For user calls, we can fetch the approximate return
+ // type info from the method handle. Unfortunately
+ // we've lost the exact context, so this is the best
+ // we can do for now.
+ CORINFO_METHOD_HANDLE method = call->gtCallMethHnd;
+ CORINFO_CLASS_HANDLE exactClass = nullptr;
+ CORINFO_SIG_INFO sig;
+ eeGetMethodSig(method, &sig, exactClass);
+ if (sig.retType == CORINFO_TYPE_VOID)
+ {
+ // This is a constructor call.
+ const unsigned methodFlags = info.compCompHnd->getMethodAttribs(method);
+ assert((methodFlags & CORINFO_FLG_CONSTRUCTOR) != 0);
+ objClass = info.compCompHnd->getMethodClass(method);
+ *isExact = true;
+ *isNonNull = true;
+ }
+ else
+ {
+ assert(sig.retType == CORINFO_TYPE_CLASS);
+ objClass = sig.retTypeClass;
+ }
+ }
+
+ break;
+ }
+
+ case GT_CNS_STR:
+ {
+ // For literal strings, we know the class and that the
+ // value is not null.
+ objClass = impGetStringClass();
+ *isExact = true;
+ *isNonNull = true;
+ break;
+ }
+
+ case GT_IND:
+ {
+ // indir(addr(lcl)) --> lcl
+ //
+ // This comes up during constrained callvirt on ref types.
+ GenTreeIndir* indir = obj->AsIndir();
+ if (indir->HasBase() && !indir->HasIndex())
+ {
+ GenTreePtr base = indir->Base();
+ GenTreeLclVarCommon* lcl = base->IsLocalAddrExpr();
+
+ if ((lcl != nullptr) && (base->OperGet() != GT_ADD))
+ {
+ const unsigned objLcl = lcl->GetLclNum();
+ objClass = lvaTable[objLcl].lvClassHnd;
+ *isExact = lvaTable[objLcl].lvClassIsExact;
+ }
+ }
+ break;
+ }
+
+ default:
+ {
+ break;
+ }
+ }
+
+ return objClass;
+}
+
void GenTree::ParseArrayAddress(
Compiler* comp, ArrayInfo* arrayInfo, GenTreePtr* pArr, ValueNum* pInxVN, FieldSeqNode** pFldSeq)
{
@@ -17062,7 +17251,7 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler* comp, CORINFO_CLASS_HA
//
void ReturnTypeDesc::InitializeLongReturnType(Compiler* comp)
{
-#if defined(_TARGET_X86_)
+#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
// Setups up a ReturnTypeDesc for returning a long using two registers
//
@@ -17070,11 +17259,11 @@ void ReturnTypeDesc::InitializeLongReturnType(Compiler* comp)
m_regType[0] = TYP_INT;
m_regType[1] = TYP_INT;
-#else // not _TARGET_X86_
+#else // not (_TARGET_X86_ or _TARGET_ARM_)
m_regType[0] = TYP_LONG;
-#endif // _TARGET_X86_
+#endif // _TARGET_X86_ or _TARGET_ARM_
#ifdef DEBUG
m_inited = true;
@@ -17150,7 +17339,7 @@ regNumber ReturnTypeDesc::GetABIReturnReg(unsigned idx)
}
}
-#elif defined(_TARGET_X86_)
+#elif defined(_TARGET_X86_) || defined(_TARGET_ARM_)
if (idx == 0)
{
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
index 0ea8321e77..1d52248657 100644
--- a/src/jit/gentree.h
+++ b/src/jit/gentree.h
@@ -120,6 +120,8 @@ enum genTreeKinds
GTK_NOVALUE = 0x0400, // node does not produce a value
GTK_NOTLIR = 0x0800, // node is not allowed in LIR
+ GTK_NOCONTAIN = 0x1000, // this node is a value, but may not be contained
+
/* Define composite value(s) */
GTK_SMPOP = (GTK_UNOP | GTK_BINOP | GTK_RELOP | GTK_LOGOP)
@@ -147,6 +149,61 @@ struct BasicBlock;
struct InlineCandidateInfo;
+typedef unsigned short AssertionIndex;
+
+static const AssertionIndex NO_ASSERTION_INDEX = 0;
+
+class AssertionInfo
+{
+ // true if the assertion holds on the bbNext edge instead of the bbJumpDest edge (for GT_JTRUE nodes)
+ unsigned short m_isNextEdgeAssertion : 1;
+ // 1-based index of the assertion
+ unsigned short m_assertionIndex : 15;
+
+ AssertionInfo(bool isNextEdgeAssertion, AssertionIndex assertionIndex)
+ : m_isNextEdgeAssertion(isNextEdgeAssertion), m_assertionIndex(assertionIndex)
+ {
+ assert(m_assertionIndex == assertionIndex);
+ }
+
+public:
+ AssertionInfo() : AssertionInfo(false, 0)
+ {
+ }
+
+ AssertionInfo(AssertionIndex assertionIndex) : AssertionInfo(false, assertionIndex)
+ {
+ }
+
+ static AssertionInfo ForNextEdge(AssertionIndex assertionIndex)
+ {
+ // Ignore the edge information if there's no assertion
+ bool isNextEdge = (assertionIndex != NO_ASSERTION_INDEX);
+ return AssertionInfo(isNextEdge, assertionIndex);
+ }
+
+ void Clear()
+ {
+ m_isNextEdgeAssertion = 0;
+ m_assertionIndex = NO_ASSERTION_INDEX;
+ }
+
+ bool HasAssertion() const
+ {
+ return m_assertionIndex != NO_ASSERTION_INDEX;
+ }
+
+ AssertionIndex GetAssertionIndex() const
+ {
+ return m_assertionIndex;
+ }
+
+ bool IsNextEdgeAssertion() const
+ {
+ return m_isNextEdgeAssertion;
+ }
+};
+
/*****************************************************************************/
// GT_FIELD nodes will be lowered into more "code-gen-able" representations, like
@@ -394,28 +451,27 @@ struct GenTree
unsigned char gtLIRFlags; // Used for nodes that are in LIR. See LIR::Flags in lir.h for the various flags.
#if ASSERTION_PROP
- unsigned short gtAssertionNum; // 0 or Assertion table index
- // valid only for non-GT_STMT nodes
+ AssertionInfo gtAssertionInfo; // valid only for non-GT_STMT nodes
- bool HasAssertion() const
+ bool GeneratesAssertion() const
{
- return gtAssertionNum != 0;
+ return gtAssertionInfo.HasAssertion();
}
+
void ClearAssertion()
{
- gtAssertionNum = 0;
+ gtAssertionInfo.Clear();
}
- unsigned short GetAssertion() const
+ AssertionInfo GetAssertionInfo() const
{
- return gtAssertionNum;
+ return gtAssertionInfo;
}
- void SetAssertion(unsigned short value)
+
+ void SetAssertionInfo(AssertionInfo info)
{
- assert((unsigned short)value == value);
- gtAssertionNum = (unsigned short)value;
+ gtAssertionInfo = info;
}
-
#endif
#if FEATURE_STACK_FP_X87
@@ -555,6 +611,8 @@ public:
__declspec(property(get = GetRegNum, put = SetRegNum)) regNumber gtRegNum;
+ bool canBeContained() const;
+
// for codegen purposes, is this node a subnode of its parent
bool isContained() const;
@@ -1852,6 +1910,10 @@ public:
{
return (gtFlags & GTF_REVERSE_OPS) ? true : false;
}
+ bool IsUnsigned() const
+ {
+ return ((gtFlags & GTF_UNSIGNED) != 0);
+ }
inline bool IsCnsIntOrI() const;
@@ -2051,19 +2113,22 @@ public:
};
//------------------------------------------------------------------------
-// GenTreeUseEdgeIterator: an iterator that will produce each use edge of a
-// GenTree node in the order in which they are
-// used. Note that the use edges of a node may not
-// correspond exactly to the nodes on the other
-// ends of its use edges: in particular, GT_LIST
-// nodes are expanded into their component parts
-// (with the optional exception of multi-reg
-// arguments). This differs from the behavior of
-// GenTree::GetChildPointer(), which does not expand
-// lists.
+// GenTreeUseEdgeIterator: an iterator that will produce each use edge of a GenTree node in the order in which
+// they are used.
//
-// Note: valid values of this type may be obtained by calling
-// `GenTree::UseEdgesBegin` and `GenTree::UseEdgesEnd`.
+// The use edges of a node may not correspond exactly to the nodes on the other ends of its use edges: in
+// particular, GT_LIST nodes are expanded into their component parts. This differs from the behavior of
+// GenTree::GetChildPointer(), which does not expand lists.
+//
+// Operand iteration is common enough in the back end of the compiler that the implementation of this type has
+// traded some simplicity for speed:
+// - As much work as is reasonable is done in the constructor rather than during operand iteration
+// - Node-specific functionality is handled by a small class of "advance" functions called by operator++
+// rather than making operator++ itself handle all nodes
+// - Some specialization has been performed for specific node types/shapes (e.g. the advance function for
+// binary nodes is specialized based on whether or not the node has the GTF_REVERSE_OPS flag set)
+//
+// Valid values of this type may be obtained by calling `GenTree::UseEdgesBegin` and `GenTree::UseEdgesEnd`.
//
class GenTreeUseEdgeIterator final
{
@@ -2071,6 +2136,20 @@ class GenTreeUseEdgeIterator final
friend GenTreeUseEdgeIterator GenTree::UseEdgesBegin();
friend GenTreeUseEdgeIterator GenTree::UseEdgesEnd();
+ enum
+ {
+ CALL_INSTANCE = 0,
+ CALL_ARGS = 1,
+ CALL_LATE_ARGS = 2,
+ CALL_CONTROL_EXPR = 3,
+ CALL_COOKIE = 4,
+ CALL_ADDRESS = 5,
+ CALL_TERMINAL = 6,
+ };
+
+ typedef void (GenTreeUseEdgeIterator::*AdvanceFn)();
+
+ AdvanceFn m_advance;
GenTree* m_node;
GenTree** m_edge;
GenTree* m_argList;
@@ -2078,24 +2157,40 @@ class GenTreeUseEdgeIterator final
GenTreeUseEdgeIterator(GenTree* node);
- GenTree** GetNextUseEdge() const;
- void MoveToNextCallUseEdge();
- void MoveToNextPhiUseEdge();
-#ifdef FEATURE_SIMD
- void MoveToNextSIMDUseEdge();
-#endif
- void MoveToNextFieldUseEdge();
+ // Advance functions for special nodes
+ void AdvanceCmpXchg();
+ void AdvanceBoundsChk();
+ void AdvanceArrElem();
+ void AdvanceArrOffset();
+ void AdvanceDynBlk();
+ void AdvanceStoreDynBlk();
+
+ template <bool ReverseOperands>
+ void AdvanceBinOp();
+ void SetEntryStateForBinOp();
+
+ // An advance function for list-like nodes (Phi, SIMDIntrinsicInitN, FieldList)
+ void AdvanceList();
+ void SetEntryStateForList(GenTree* list);
+
+ // The advance function for call nodes
+ template <int state>
+ void AdvanceCall();
+
+ void Terminate();
public:
GenTreeUseEdgeIterator();
inline GenTree** operator*()
{
+ assert(m_state != -1);
return m_edge;
}
inline GenTree** operator->()
{
+ assert(m_state != -1);
return m_edge;
}
@@ -3390,7 +3485,7 @@ struct GenTreeCall final : public GenTree
//
bool HasMultiRegRetVal() const
{
-#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+#if (defined(_TARGET_X86_) || defined(_TARGET_ARM_)) && !defined(LEGACY_BACKEND)
// LEGACY_BACKEND does not use multi reg returns for calls with long return types
return varTypeIsLong(gtType);
#elif FEATURE_MULTIREG_RET
@@ -3620,8 +3715,7 @@ struct GenTreeFptrVal : public GenTree
CORINFO_METHOD_HANDLE gtFptrMethod;
#ifdef FEATURE_READYTORUN_COMPILER
- CORINFO_CONST_LOOKUP gtEntryPoint;
- CORINFO_RESOLVED_TOKEN* gtLdftnResolvedToken;
+ CORINFO_CONST_LOOKUP gtEntryPoint;
#endif
GenTreeFptrVal(var_types type, CORINFO_METHOD_HANDLE meth) : GenTree(GT_FTN_ADDR, type), gtFptrMethod(meth)
@@ -4071,6 +4165,7 @@ struct GenTreeAddrMode : public GenTreeOp
GenTreeAddrMode(var_types type, GenTreePtr base, GenTreePtr index, unsigned scale, unsigned offset)
: GenTreeOp(GT_LEA, type, base, index)
{
+ assert(base != nullptr || index != nullptr);
gtScale = scale;
gtOffset = offset;
}
@@ -4571,7 +4666,7 @@ struct GenTreePhiArg : public GenTreeLclVarCommon
#endif
};
-/* gtPutArgStk -- Argument passed on stack */
+/* gtPutArgStk -- Argument passed on stack (GT_PUTARG_STK) */
struct GenTreePutArgStk : public GenTreeUnOp
{
@@ -4580,105 +4675,58 @@ struct GenTreePutArgStk : public GenTreeUnOp
unsigned gtPadAlign; // Number of padding slots for stack alignment
#endif
-#if FEATURE_FASTTAILCALL
- bool putInIncomingArgArea; // Whether this arg needs to be placed in incoming arg area.
- // By default this is false and will be placed in out-going arg area.
- // Fast tail calls set this to true.
- // In future if we need to add more such bool fields consider bit fields.
-
- GenTreePutArgStk(genTreeOps oper,
- var_types type,
- unsigned slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(unsigned numSlots)
- PUT_STRUCT_ARG_STK_ONLY_ARG(bool isStruct),
- bool _putInIncomingArgArea = false DEBUGARG(GenTreePtr callNode = nullptr)
- DEBUGARG(bool largeNode = false))
- : GenTreeUnOp(oper, type DEBUGARG(largeNode))
+ // Don't let clang-format mess with the GenTreePutArgStk constructor.
+ // clang-format off
+
+ GenTreePutArgStk(genTreeOps oper,
+ var_types type,
+ GenTreePtr op1,
+ unsigned slotNum
+ PUT_STRUCT_ARG_STK_ONLY_ARG(unsigned numSlots),
+ bool putInIncomingArgArea = false,
+ GenTreeCall* callNode = nullptr)
+ : GenTreeUnOp(oper, type, op1 DEBUGARG(/*largeNode*/ false))
, gtSlotNum(slotNum)
#if defined(UNIX_X86_ABI)
, gtPadAlign(0)
#endif
- , putInIncomingArgArea(_putInIncomingArgArea)
+#if FEATURE_FASTTAILCALL
+ , gtPutInIncomingArgArea(putInIncomingArgArea)
+#endif // FEATURE_FASTTAILCALL
#ifdef FEATURE_PUT_STRUCT_ARG_STK
, gtPutArgStkKind(Kind::Invalid)
, gtNumSlots(numSlots)
, gtNumberReferenceSlots(0)
, gtGcPtrs(nullptr)
#endif // FEATURE_PUT_STRUCT_ARG_STK
- {
-#ifdef DEBUG
- gtCall = callNode;
+#if defined(DEBUG) || defined(UNIX_X86_ABI)
+ , gtCall(callNode)
#endif
+ {
}
- GenTreePutArgStk(genTreeOps oper,
- var_types type,
- GenTreePtr op1,
- unsigned slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(unsigned numSlots),
- bool _putInIncomingArgArea = false DEBUGARG(GenTreePtr callNode = nullptr)
- DEBUGARG(bool largeNode = false))
- : GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode))
- , gtSlotNum(slotNum)
-#if defined(UNIX_X86_ABI)
- , gtPadAlign(0)
-#endif
- , putInIncomingArgArea(_putInIncomingArgArea)
-#ifdef FEATURE_PUT_STRUCT_ARG_STK
- , gtPutArgStkKind(Kind::Invalid)
- , gtNumSlots(numSlots)
- , gtNumberReferenceSlots(0)
- , gtGcPtrs(nullptr)
-#endif // FEATURE_PUT_STRUCT_ARG_STK
+// clang-format on
+
+#if FEATURE_FASTTAILCALL
+
+ bool gtPutInIncomingArgArea; // Whether this arg needs to be placed in incoming arg area.
+ // By default this is false and will be placed in out-going arg area.
+ // Fast tail calls set this to true.
+ // In future if we need to add more such bool fields consider bit fields.
+
+ bool putInIncomingArgArea() const
{
-#ifdef DEBUG
- gtCall = callNode;
-#endif
+ return gtPutInIncomingArgArea;
}
#else // !FEATURE_FASTTAILCALL
- GenTreePutArgStk(genTreeOps oper,
- var_types type,
- unsigned slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(unsigned numSlots)
- DEBUGARG(GenTreePtr callNode = NULL) DEBUGARG(bool largeNode = false))
- : GenTreeUnOp(oper, type DEBUGARG(largeNode))
- , gtSlotNum(slotNum)
-#if defined(UNIX_X86_ABI)
- , gtPadAlign(0)
-#endif
-#ifdef FEATURE_PUT_STRUCT_ARG_STK
- , gtPutArgStkKind(Kind::Invalid)
- , gtNumSlots(numSlots)
- , gtNumberReferenceSlots(0)
- , gtGcPtrs(nullptr)
-#endif // FEATURE_PUT_STRUCT_ARG_STK
+ bool putInIncomingArgArea() const
{
-#ifdef DEBUG
- gtCall = callNode;
-#endif
+ return false;
}
- GenTreePutArgStk(genTreeOps oper,
- var_types type,
- GenTreePtr op1,
- unsigned slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(unsigned numSlots)
- DEBUGARG(GenTreePtr callNode = NULL) DEBUGARG(bool largeNode = false))
- : GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode))
- , gtSlotNum(slotNum)
-#if defined(UNIX_X86_ABI)
- , gtPadAlign(0)
-#endif
-#ifdef FEATURE_PUT_STRUCT_ARG_STK
- , gtPutArgStkKind(Kind::Invalid)
- , gtNumSlots(numSlots)
- , gtNumberReferenceSlots(0)
- , gtGcPtrs(nullptr)
-#endif // FEATURE_PUT_STRUCT_ARG_STK
- {
-#ifdef DEBUG
- gtCall = callNode;
-#endif
- }
-#endif // FEATURE_FASTTAILCALL
+#endif // !FEATURE_FASTTAILCALL
unsigned getArgOffset()
{
@@ -4698,13 +4746,12 @@ struct GenTreePutArgStk : public GenTreeUnOp
#endif
#ifdef FEATURE_PUT_STRUCT_ARG_STK
+
unsigned getArgSize()
{
return gtNumSlots * TARGET_POINTER_SIZE;
}
-#endif // FEATURE_PUT_STRUCT_ARG_STK
-#ifdef FEATURE_PUT_STRUCT_ARG_STK
//------------------------------------------------------------------------
// setGcPointers: Sets the number of references and the layout of the struct object returned by the VM.
//
@@ -4726,13 +4773,7 @@ struct GenTreePutArgStk : public GenTreeUnOp
gtNumberReferenceSlots = numPointers;
gtGcPtrs = pointers;
}
-#endif // FEATURE_PUT_STRUCT_ARG_STK
-#ifdef DEBUG
- GenTreePtr gtCall; // the call node to which this argument belongs
-#endif
-
-#ifdef FEATURE_PUT_STRUCT_ARG_STK
// Instruction selection: during codegen time, what code sequence we will be using
// to encode this operation.
// TODO-Throughput: The following information should be obtained from the child
@@ -4751,7 +4792,12 @@ struct GenTreePutArgStk : public GenTreeUnOp
unsigned gtNumSlots; // Number of slots for the argument to be passed on stack
unsigned gtNumberReferenceSlots; // Number of reference slots.
BYTE* gtGcPtrs; // gcPointers
-#endif // FEATURE_PUT_STRUCT_ARG_STK
+
+#endif // FEATURE_PUT_STRUCT_ARG_STK
+
+#if defined(DEBUG) || defined(UNIX_X86_ABI)
+ GenTreeCall* gtCall; // the call node to which this argument belongs
+#endif
#if DEBUGGABLE_GENTREE
GenTreePutArgStk() : GenTreeUnOp()
diff --git a/src/jit/gtlist.h b/src/jit/gtlist.h
index 2d9255b6ce..826eaf1207 100644
--- a/src/jit/gtlist.h
+++ b/src/jit/gtlist.h
@@ -46,7 +46,7 @@ GTNODE(CNS_STR , "sconst" ,GenTreeStrCon ,0,GTK_LEAF|GTK_CON
//-----------------------------------------------------------------------------
GTNODE(NOT , "~" ,GenTreeOp ,0,GTK_UNOP)
-GTNODE(NOP , "nop" ,GenTree ,0,GTK_UNOP)
+GTNODE(NOP , "nop" ,GenTree ,0,GTK_UNOP|GTK_NOCONTAIN)
GTNODE(NEG , "unary -" ,GenTreeOp ,0,GTK_UNOP)
GTNODE(COPY , "copy" ,GenTreeCopyOrReload,0,GTK_UNOP) // Copies a variable from its current location to a register that satisfies
// code generation constraints. The child is the actual lclVar node.
@@ -65,8 +65,8 @@ GTNODE(CMPXCHG , "cmpxchg" ,GenTreeCmpXchg ,0,GTK_SPECIAL)
GTNODE(MEMORYBARRIER , "memoryBarrier",GenTree ,0,GTK_LEAF|GTK_NOVALUE)
GTNODE(CAST , "cast" ,GenTreeCast ,0,GTK_UNOP|GTK_EXOP) // conversion to another type
-GTNODE(CKFINITE , "ckfinite" ,GenTreeOp ,0,GTK_UNOP) // Check for NaN
-GTNODE(LCLHEAP , "lclHeap" ,GenTreeOp ,0,GTK_UNOP) // alloca()
+GTNODE(CKFINITE , "ckfinite" ,GenTreeOp ,0,GTK_UNOP|GTK_NOCONTAIN) // Check for NaN
+GTNODE(LCLHEAP , "lclHeap" ,GenTreeOp ,0,GTK_UNOP|GTK_NOCONTAIN) // alloca()
GTNODE(JMP , "jump" ,GenTreeVal ,0,GTK_LEAF|GTK_NOVALUE) // Jump to another function
GTNODE(ADDR , "addr" ,GenTreeOp ,0,GTK_UNOP) // address of
@@ -226,7 +226,7 @@ GTNODE(FIELD , "field" ,GenTreeField ,0,GTK_SPECIAL)
GTNODE(ARR_ELEM , "arrMD&" ,GenTreeArrElem ,0,GTK_SPECIAL) // Multi-dimensional array-element address
GTNODE(ARR_INDEX , "arrMDIdx" ,GenTreeArrIndex ,0,GTK_BINOP|GTK_EXOP) // Effective, bounds-checked index for one dimension of a multi-dimensional array element
GTNODE(ARR_OFFSET , "arrMDOffs" ,GenTreeArrOffs ,0,GTK_SPECIAL) // Flattened offset of multi-dimensional array element
-GTNODE(CALL , "call()" ,GenTreeCall ,0,GTK_SPECIAL)
+GTNODE(CALL , "call()" ,GenTreeCall ,0,GTK_SPECIAL|GTK_NOCONTAIN)
//-----------------------------------------------------------------------------
// Statement operator nodes:
@@ -261,7 +261,7 @@ GTNODE(PHI_ARG , "phiArg" ,GenTreePhiArg ,0,GTK_LEAF|GTK_LOC
//-----------------------------------------------------------------------------
#ifndef LEGACY_BACKEND
-GTNODE(JMPTABLE , "jumpTable" ,GenTreeJumpTable ,0, GTK_LEAF) // Generates the jump table for switches
+GTNODE(JMPTABLE , "jumpTable" ,GenTreeJumpTable ,0, GTK_LEAF|GTK_NOCONTAIN) // Generates the jump table for switches
#endif
GTNODE(SWITCH_TABLE , "tableSwitch" ,GenTreeOp ,0, GTK_BINOP|GTK_NOVALUE) // Jump Table based switch construct
diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp
index b1e0f487ef..54427ba4dd 100644
--- a/src/jit/importer.cpp
+++ b/src/jit/importer.cpp
@@ -350,6 +350,12 @@ StackEntry& Compiler::impStackTop(unsigned n)
return verCurrentState.esStack[verCurrentState.esStackDepth - n - 1];
}
+
+unsigned Compiler::impStackHeight()
+{
+ return verCurrentState.esStackDepth;
+}
+
/*****************************************************************************
* Some of the trees are spilled specially. While unspilling them, or
* making a copy, these need to be handled specially. The function
@@ -1232,13 +1238,13 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr destAddr,
}
else if (src->gtOper == GT_RET_EXPR)
{
- GenTreePtr call = src->gtRetExpr.gtInlineCandidate;
+ GenTreeCall* call = src->gtRetExpr.gtInlineCandidate->AsCall();
noway_assert(call->gtOper == GT_CALL);
- if (call->AsCall()->HasRetBufArg())
+ if (call->HasRetBufArg())
{
// insert the return value buffer into the argument list as first byref parameter
- call->gtCall.gtCallArgs = gtNewListNode(destAddr, call->gtCall.gtCallArgs);
+ call->gtCallArgs = gtNewListNode(destAddr, call->gtCallArgs);
// now returns void, not a struct
src->gtType = TYP_VOID;
@@ -1252,7 +1258,7 @@ GenTreePtr Compiler::impAssignStructPtr(GenTreePtr destAddr,
{
// Case of inline method returning a struct in one or more registers.
//
- var_types returnType = (var_types)call->gtCall.gtReturnType;
+ var_types returnType = (var_types)call->gtReturnType;
// We won't need a return buffer
asgType = returnType;
@@ -1842,7 +1848,7 @@ GenTreePtr Compiler::impReadyToRunLookupToTree(CORINFO_CONST_LOOKUP* pLookup,
return gtNewIconEmbHndNode(handle, pIndirection, handleFlags, 0, nullptr, compileTimeHandle);
}
-GenTreePtr Compiler::impReadyToRunHelperToTree(
+GenTreeCall* Compiler::impReadyToRunHelperToTree(
CORINFO_RESOLVED_TOKEN* pResolvedToken,
CorInfoHelpFunc helper,
var_types type,
@@ -1850,18 +1856,14 @@ GenTreePtr Compiler::impReadyToRunHelperToTree(
CORINFO_LOOKUP_KIND* pGenericLookupKind /* =NULL. Only used with generics */)
{
CORINFO_CONST_LOOKUP lookup;
-#if COR_JIT_EE_VERSION > 460
if (!info.compCompHnd->getReadyToRunHelper(pResolvedToken, pGenericLookupKind, helper, &lookup))
{
return nullptr;
}
-#else
- info.compCompHnd->getReadyToRunHelper(pResolvedToken, helper, &lookup);
-#endif
- GenTreePtr op1 = gtNewHelperCallNode(helper, type, GTF_EXCEPT, args);
+ GenTreeCall* op1 = gtNewHelperCallNode(helper, type, GTF_EXCEPT, args);
- op1->gtCall.setEntryPoint(lookup);
+ op1->setEntryPoint(lookup);
return op1;
}
@@ -1879,9 +1881,7 @@ GenTreePtr Compiler::impMethodPointer(CORINFO_RESOLVED_TOKEN* pResolvedToken, CO
#ifdef FEATURE_READYTORUN_COMPILER
if (opts.IsReadyToRun())
{
- op1->gtFptrVal.gtEntryPoint = pCallInfo->codePointerLookup.constLookup;
- op1->gtFptrVal.gtLdftnResolvedToken = new (this, CMK_Unknown) CORINFO_RESOLVED_TOKEN;
- *op1->gtFptrVal.gtLdftnResolvedToken = *pResolvedToken;
+ op1->gtFptrVal.gtEntryPoint = pCallInfo->codePointerLookup.constLookup;
}
else
{
@@ -1929,7 +1929,7 @@ GenTreePtr Compiler::getRuntimeContextTree(CORINFO_RUNTIME_LOOKUP_KIND kind)
// Collectible types requires that for shared generic code, if we use the generic context parameter
// that we report it. (This is a conservative approach, we could detect some cases particularly when the
// context parameter is this that we don't need the eager reporting logic.)
- lvaGenericsContextUsed = true;
+ lvaGenericsContextUseCount++;
if (kind == CORINFO_LOOKUP_THISOBJ)
{
@@ -2179,9 +2179,12 @@ bool Compiler::impSpillStackEntry(unsigned level,
}
}
+ bool isNewTemp = false;
+
if (tnum == BAD_VAR_NUM)
{
- tnum = lvaGrabTemp(true DEBUGARG(reason));
+ tnum = lvaGrabTemp(true DEBUGARG(reason));
+ isNewTemp = true;
}
else if (tiVerificationNeeded && lvaTable[tnum].TypeGet() != TYP_UNDEF)
{
@@ -2211,6 +2214,13 @@ bool Compiler::impSpillStackEntry(unsigned level,
/* Assign the spilled entry to the temp */
impAssignTempGen(tnum, tree, verCurrentState.esStack[level].seTypeInfo.GetClassHandle(), level);
+ // If temp is newly introduced and a ref type, grab what type info we can.
+ if (isNewTemp && (lvaTable[tnum].lvType == TYP_REF))
+ {
+ CORINFO_CLASS_HANDLE stkHnd = verCurrentState.esStack[level].seTypeInfo.GetClassHandle();
+ lvaSetClass(tnum, tree, stkHnd);
+ }
+
// The tree type may be modified by impAssignTempGen, so use the type of the lclVar.
var_types type = genActualType(lvaTable[tnum].TypeGet());
GenTreePtr temp = gtNewLclvNode(tnum, type);
@@ -2584,6 +2594,21 @@ inline IL_OFFSETX Compiler::impCurILOffset(IL_OFFSET offs, bool callInstruction)
}
}
+//------------------------------------------------------------------------
+// impCanSpillNow: check is it possible to spill all values from eeStack to local variables.
+//
+// Arguments:
+// prevOpcode - last importer opcode
+//
+// Return Value:
+// true if it is legal, false if it could be a sequence that we do not want to divide.
+bool Compiler::impCanSpillNow(OPCODE prevOpcode)
+{
+ // Don't spill after ldtoken, because it could be a part of the InitializeArray sequence.
+ // Avoid breaking up to guarantee that impInitializeArrayIntrinsic can succeed.
+ return prevOpcode != CEE_LDTOKEN;
+}
+
/*****************************************************************************
*
* Remember the instr offset for the statements
@@ -2997,14 +3022,12 @@ GenTreePtr Compiler::impInitializeArrayIntrinsic(CORINFO_SIG_INFO* sig)
#endif
)
{
-#if COR_JIT_EE_VERSION > 460
if (newArrayCall->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_NEW_MDARR_NONVARARG))
{
return nullptr;
}
isMDArray = true;
-#endif
}
CORINFO_CLASS_HANDLE arrayClsHnd = (CORINFO_CLASS_HANDLE)newArrayCall->gtCall.compileTimeHelperArgumentHandle;
@@ -3278,13 +3301,9 @@ GenTreePtr Compiler::impIntrinsic(GenTreePtr newobjThis,
bool tailCall,
CorInfoIntrinsics* pIntrinsicID)
{
- bool mustExpand = false;
-#if COR_JIT_EE_VERSION > 460
+ bool mustExpand = false;
CorInfoIntrinsics intrinsicID = info.compCompHnd->getIntrinsicID(method, &mustExpand);
-#else
- CorInfoIntrinsics intrinsicID = info.compCompHnd->getIntrinsicID(method);
-#endif
- *pIntrinsicID = intrinsicID;
+ *pIntrinsicID = intrinsicID;
#ifndef _TARGET_ARM_
genTreeOps interlockedOperator;
@@ -3557,7 +3576,7 @@ GenTreePtr Compiler::impIntrinsic(GenTreePtr newobjThis,
case CORINFO_INTRINSIC_GetTypeFromHandle:
op1 = impStackTop(0).val;
if (op1->gtOper == GT_CALL && (op1->gtCall.gtCallType == CT_HELPER) &&
- gtIsTypeHandleToRuntimeTypeHelper(op1))
+ gtIsTypeHandleToRuntimeTypeHelper(op1->AsCall()))
{
op1 = impPopStack().val;
// Change call to return RuntimeType directly.
@@ -3570,7 +3589,7 @@ GenTreePtr Compiler::impIntrinsic(GenTreePtr newobjThis,
case CORINFO_INTRINSIC_RTH_GetValueInternal:
op1 = impStackTop(0).val;
if (op1->gtOper == GT_CALL && (op1->gtCall.gtCallType == CT_HELPER) &&
- gtIsTypeHandleToRuntimeTypeHelper(op1))
+ gtIsTypeHandleToRuntimeTypeHelper(op1->AsCall()))
{
// Old tree
// Helper-RuntimeTypeHandle -> TreeToGetNativeTypeHandle
@@ -4989,6 +5008,23 @@ GenTreePtr Compiler::impImportLdvirtftn(GenTreePtr thisPtr,
NO_WAY("Virtual call to a function added via EnC is not supported");
}
+ // CoreRT generic virtual method
+ if (((pCallInfo->sig.callConv & CORINFO_CALLCONV_GENERIC) != 0) && IsTargetAbi(CORINFO_CORERT_ABI))
+ {
+ GenTreePtr runtimeMethodHandle = nullptr;
+ if (pCallInfo->exactContextNeedsRuntimeLookup)
+ {
+ runtimeMethodHandle =
+ impRuntimeLookupToTree(pResolvedToken, &pCallInfo->codePointerLookup, pCallInfo->hMethod);
+ }
+ else
+ {
+ runtimeMethodHandle = gtNewIconEmbMethHndNode(pResolvedToken->hMethod);
+ }
+ return gtNewHelperCallNode(CORINFO_HELP_GVMLOOKUP_FOR_SLOT, TYP_I_IMPL, GTF_EXCEPT,
+ gtNewArgList(thisPtr, runtimeMethodHandle));
+ }
+
#ifdef FEATURE_READYTORUN_COMPILER
if (opts.IsReadyToRun())
{
@@ -5238,7 +5274,6 @@ void Compiler::impImportNewObjArray(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORI
//
CLANG_FORMAT_COMMENT_ANCHOR;
-#if COR_JIT_EE_VERSION > 460
if (!opts.IsReadyToRun() || IsTargetAbi(CORINFO_CORERT_ABI))
{
LclVarDsc* newObjArrayArgsVar;
@@ -5298,7 +5333,6 @@ void Compiler::impImportNewObjArray(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORI
node = gtNewHelperCallNode(CORINFO_HELP_NEW_MDARR_NONVARARG, TYP_REF, 0, args);
}
else
-#endif
{
//
// The varargs helper needs the type and method handles as last
@@ -5522,14 +5556,14 @@ bool Compiler::impCanPInvokeInlineCallSite(BasicBlock* block)
// If GTF_CALL_UNMANAGED is set, increments info.compCallUnmanaged
void Compiler::impCheckForPInvokeCall(
- GenTreePtr call, CORINFO_METHOD_HANDLE methHnd, CORINFO_SIG_INFO* sig, unsigned mflags, BasicBlock* block)
+ GenTreeCall* call, CORINFO_METHOD_HANDLE methHnd, CORINFO_SIG_INFO* sig, unsigned mflags, BasicBlock* block)
{
CorInfoUnmanagedCallConv unmanagedCallConv;
// If VM flagged it as Pinvoke, flag the call node accordingly
if ((mflags & CORINFO_FLG_PINVOKE) != 0)
{
- call->gtCall.gtCallMoreFlags |= GTF_CALL_M_PINVOKE;
+ call->gtCallMoreFlags |= GTF_CALL_M_PINVOKE;
}
if (methHnd)
@@ -5554,7 +5588,7 @@ void Compiler::impCheckForPInvokeCall(
static_assert_no_msg((unsigned)CORINFO_CALLCONV_THISCALL == (unsigned)CORINFO_UNMANAGED_CALLCONV_THISCALL);
unmanagedCallConv = CorInfoUnmanagedCallConv(callConv);
- assert(!call->gtCall.gtCallCookie);
+ assert(!call->gtCallCookie);
}
if (unmanagedCallConv != CORINFO_UNMANAGED_CALLCONV_C && unmanagedCallConv != CORINFO_UNMANAGED_CALLCONV_STDCALL &&
@@ -5614,11 +5648,11 @@ void Compiler::impCheckForPInvokeCall(
if (unmanagedCallConv == CORINFO_UNMANAGED_CALLCONV_THISCALL)
{
- call->gtCall.gtCallMoreFlags |= GTF_CALL_M_UNMGD_THISCALL;
+ call->gtCallMoreFlags |= GTF_CALL_M_UNMGD_THISCALL;
}
}
-GenTreePtr Compiler::impImportIndirectCall(CORINFO_SIG_INFO* sig, IL_OFFSETX ilOffset)
+GenTreeCall* Compiler::impImportIndirectCall(CORINFO_SIG_INFO* sig, IL_OFFSETX ilOffset)
{
var_types callRetTyp = JITtype2varType(sig->retType);
@@ -5637,7 +5671,11 @@ GenTreePtr Compiler::impImportIndirectCall(CORINFO_SIG_INFO* sig, IL_OFFSETX ilO
/* Get the function pointer */
GenTreePtr fptr = impPopStack().val;
- assert(genActualType(fptr->gtType) == TYP_I_IMPL);
+
+ // The function pointer is typically a sized to match the target pointer size
+ // However, stubgen IL optimization can change LDC.I8 to LDC.I4
+ // See ILCodeStream::LowerOpcode
+ assert(genActualType(fptr->gtType) == TYP_I_IMPL || genActualType(fptr->gtType) == TYP_INT);
#ifdef DEBUG
// This temporary must never be converted to a double in stress mode,
@@ -5652,7 +5690,7 @@ GenTreePtr Compiler::impImportIndirectCall(CORINFO_SIG_INFO* sig, IL_OFFSETX ilO
/* Create the call node */
- GenTreePtr call = gtNewIndCallNode(fptr, callRetTyp, nullptr, ilOffset);
+ GenTreeCall* call = gtNewIndCallNode(fptr, callRetTyp, nullptr, ilOffset);
call->gtFlags |= GTF_EXCEPT | (fptr->gtFlags & GTF_GLOB_EFFECT);
@@ -5922,7 +5960,7 @@ GenTreePtr Compiler::impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN* pResolve
}
break;
}
-#if COR_JIT_EE_VERSION > 460
+
case CORINFO_FIELD_STATIC_READYTORUN_HELPER:
{
#ifdef FEATURE_READYTORUN_COMPILER
@@ -5951,7 +5989,7 @@ GenTreePtr Compiler::impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN* pResolve
#endif // FEATURE_READYTORUN_COMPILER
}
break;
-#endif // COR_JIT_EE_VERSION > 460
+
default:
{
if (!(access & CORINFO_ACCESS_ADDRESS))
@@ -6111,25 +6149,6 @@ void Compiler::impInsertHelperCall(CORINFO_HELPER_DESC* helperInfo)
impAppendTree(callout, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
}
-void Compiler::impInsertCalloutForDelegate(CORINFO_METHOD_HANDLE callerMethodHnd,
- CORINFO_METHOD_HANDLE calleeMethodHnd,
- CORINFO_CLASS_HANDLE delegateTypeHnd)
-{
-#ifdef FEATURE_CORECLR
- if (!info.compCompHnd->isDelegateCreationAllowed(delegateTypeHnd, calleeMethodHnd))
- {
- // Call the JIT_DelegateSecurityCheck helper before calling the actual function.
- // This helper throws an exception if the CLR host disallows the call.
-
- GenTreePtr helper = gtNewHelperCallNode(CORINFO_HELP_DELEGATE_SECURITY_CHECK, TYP_VOID, GTF_EXCEPT,
- gtNewArgList(gtNewIconEmbClsHndNode(delegateTypeHnd),
- gtNewIconEmbMethHndNode(calleeMethodHnd)));
- // Append the callout statement
- impAppendTree(helper, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
- }
-#endif // FEATURE_CORECLR
-}
-
// Checks whether the return types of caller and callee are compatible
// so that callee can be tail called. Note that here we don't check
// compatibility in IL Verifier sense, but on the lines of return type
@@ -6376,12 +6395,14 @@ var_types Compiler::impImportCall(OPCODE opcode,
GenTreeArgList* args = nullptr;
CORINFO_THIS_TRANSFORM constraintCallThisTransform = CORINFO_NO_THIS_TRANSFORM;
CORINFO_CONTEXT_HANDLE exactContextHnd = nullptr;
- BOOL exactContextNeedsRuntimeLookup = FALSE;
+ bool exactContextNeedsRuntimeLookup = false;
bool canTailCall = true;
const char* szCanTailCallFailReason = nullptr;
int tailCall = prefixFlags & PREFIX_TAILCALL;
bool readonlyCall = (prefixFlags & PREFIX_READONLY) != 0;
+ CORINFO_RESOLVED_TOKEN* ldftnToken = nullptr;
+
// Synchronized methods need to call CORINFO_HELP_MON_EXIT at the end. We could
// do that before tailcalls, but that is probably not the intended
// semantic. So just disallow tailcalls from synchronized methods.
@@ -6432,7 +6453,6 @@ var_types Compiler::impImportCall(OPCODE opcode,
eeGetSig(pResolvedToken->token, info.compScopeHnd, impTokenLookupContextHandle, &calliSig);
callRetTyp = JITtype2varType(calliSig.retType);
- clsHnd = calliSig.retTypeClass;
call = impImportIndirectCall(&calliSig, ilOffset);
@@ -6464,11 +6484,13 @@ var_types Compiler::impImportCall(OPCODE opcode,
if (IsTargetAbi(CORINFO_CORERT_ABI))
{
- bool managedCall = (calliSig.callConv & GTF_CALL_UNMANAGED) == 0;
+ bool managedCall = (((calliSig.callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_STDCALL) &&
+ ((calliSig.callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_C) &&
+ ((calliSig.callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_THISCALL) &&
+ ((calliSig.callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_FASTCALL));
if (managedCall)
{
- call->AsCall()->SetFatPointerCandidate();
- setMethodHasFatPointer();
+ addFatPointerCandidate(call->AsCall());
}
}
}
@@ -6519,7 +6541,7 @@ var_types Compiler::impImportCall(OPCODE opcode,
if (mflags & CORINFO_FLG_DONT_INLINE_CALLER)
{
- compInlineResult->NoteFatal(InlineObservation::CALLEE_HAS_NOINLINE_CALLEE);
+ compInlineResult->NoteFatal(InlineObservation::CALLEE_STACK_CRAWL_MARK);
return callRetTyp;
}
@@ -6632,10 +6654,9 @@ var_types Compiler::impImportCall(OPCODE opcode,
// Work out what sort of call we're making.
// Dispense with virtual calls implemented via LDVIRTFTN immediately.
- constraintCallThisTransform = callInfo->thisTransform;
-
+ constraintCallThisTransform = callInfo->thisTransform;
exactContextHnd = callInfo->contextHandle;
- exactContextNeedsRuntimeLookup = callInfo->exactContextNeedsRuntimeLookup;
+ exactContextNeedsRuntimeLookup = callInfo->exactContextNeedsRuntimeLookup == TRUE;
// Recursive call is treaded as a loop to the begining of the method.
if (methHnd == info.compMethodHnd)
@@ -6773,6 +6794,7 @@ var_types Compiler::impImportCall(OPCODE opcode,
nullptr DEBUGARG("LDVIRTFTN this pointer"));
GenTreePtr fptr = impImportLdvirtftn(thisPtr, pResolvedToken, callInfo);
+
if (compDonotInline())
{
return callRetTyp;
@@ -6792,6 +6814,11 @@ var_types Compiler::impImportCall(OPCODE opcode,
call->gtCall.gtCallObjp = thisPtrCopy;
call->gtFlags |= GTF_EXCEPT | (fptr->gtFlags & GTF_GLOB_EFFECT);
+ if (((sig->callConv & CORINFO_CALLCONV_GENERIC) != 0) && IsTargetAbi(CORINFO_CORERT_ABI))
+ {
+ // CoreRT generic virtual method: need to handle potential fat function pointers
+ addFatPointerCandidate(call->AsCall());
+ }
#ifdef FEATURE_READYTORUN_COMPILER
if (opts.IsReadyToRun())
{
@@ -6946,6 +6973,14 @@ var_types Compiler::impImportCall(OPCODE opcode,
}
#endif // !FEATURE_VARARG
+#ifdef UNIX_X86_ABI
+ if (call->gtCall.callSig == nullptr)
+ {
+ call->gtCall.callSig = new (this, CMK_CorSig) CORINFO_SIG_INFO;
+ *call->gtCall.callSig = *sig;
+ }
+#endif // UNIX_X86_ABI
+
if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG ||
(sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_NATIVEVARARG)
{
@@ -7054,7 +7089,7 @@ var_types Compiler::impImportCall(OPCODE opcode,
{
// New lexical block here to avoid compilation errors because of GOTOs.
BasicBlock* block = compIsForInlining() ? impInlineInfo->iciBlock : compCurBB;
- impCheckForPInvokeCall(call, methHnd, sig, mflags, block);
+ impCheckForPInvokeCall(call->AsCall(), methHnd, sig, mflags, block);
}
if (call->gtFlags & GTF_CALL_UNMANAGED)
@@ -7279,6 +7314,21 @@ var_types Compiler::impImportCall(OPCODE opcode,
exactContextHnd = nullptr;
}
+ if ((opcode == CEE_NEWOBJ) && ((clsFlags & CORINFO_FLG_DELEGATE) != 0))
+ {
+ // Only verifiable cases are supported.
+ // dup; ldvirtftn; newobj; or ldftn; newobj.
+ // IL test could contain unverifiable sequence, in this case optimization should not be done.
+ if (impStackHeight() > 0)
+ {
+ typeInfo delegateTypeInfo = impStackTop().seTypeInfo;
+ if (delegateTypeInfo.IsToken())
+ {
+ ldftnToken = delegateTypeInfo.GetToken();
+ }
+ }
+ }
+
//-------------------------------------------------------------------------
// The main group of arguments
@@ -7315,8 +7365,10 @@ var_types Compiler::impImportCall(OPCODE opcode,
if ((call->gtFlags & GTF_CALL_VIRT_KIND_MASK) != GTF_CALL_NONVIRT)
{
/* only true object pointers can be virtual */
-
assert(obj->gtType == TYP_REF);
+
+ // See if we can devirtualize.
+ impDevirtualizeCall(call->AsCall(), obj, callInfo, &exactContextHnd);
}
else
{
@@ -7357,7 +7409,7 @@ var_types Compiler::impImportCall(OPCODE opcode,
{
// New inliner morph it in impImportCall.
// This will allow us to inline the call to the delegate constructor.
- call = fgOptimizeDelegateConstructor(call, &exactContextHnd);
+ call = fgOptimizeDelegateConstructor(call->AsCall(), &exactContextHnd, ldftnToken);
}
if (!bIntrinsicImported)
@@ -7371,7 +7423,7 @@ var_types Compiler::impImportCall(OPCODE opcode,
#endif // defined(DEBUG) || defined(INLINE_DATA)
// Is it an inline candidate?
- impMarkInlineCandidate(call, exactContextHnd, callInfo);
+ impMarkInlineCandidate(call, exactContextHnd, exactContextNeedsRuntimeLookup, callInfo);
}
// append the call node.
@@ -7595,7 +7647,7 @@ DONE:
#endif // defined(DEBUG) || defined(INLINE_DATA)
// Is it an inline candidate?
- impMarkInlineCandidate(call, exactContextHnd, callInfo);
+ impMarkInlineCandidate(call, exactContextHnd, exactContextNeedsRuntimeLookup, callInfo);
}
DONE_CALL:
@@ -7658,7 +7710,7 @@ DONE_CALL:
bool fatPointerCandidate = call->AsCall()->IsFatPointerCandidate();
if (varTypeIsStruct(callRetTyp))
{
- call = impFixupCallStructReturn(call, sig->retTypeClass);
+ call = impFixupCallStructReturn(call->AsCall(), sig->retTypeClass);
}
if ((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0)
@@ -7686,16 +7738,39 @@ DONE_CALL:
unsigned calliSlot = lvaGrabTemp(true DEBUGARG("calli"));
LclVarDsc* varDsc = &lvaTable[calliSlot];
varDsc->lvVerTypeInfo = tiRetVal;
- impAssignTempGen(calliSlot, call, clsHnd, (unsigned)CHECK_SPILL_NONE);
+ impAssignTempGen(calliSlot, call, tiRetVal.GetClassHandle(), (unsigned)CHECK_SPILL_NONE);
// impAssignTempGen can change src arg list and return type for call that returns struct.
var_types type = genActualType(lvaTable[calliSlot].TypeGet());
call = gtNewLclvNode(calliSlot, type);
}
}
+
// For non-candidates we must also spill, since we
// might have locals live on the eval stack that this
// call can modify.
- impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("non-inline candidate call"));
+ //
+ // Suppress this for certain well-known call targets
+ // that we know won't modify locals, eg calls that are
+ // recognized in gtCanOptimizeTypeEquality. Otherwise
+ // we may break key fragile pattern matches later on.
+ bool spillStack = true;
+ if (call->IsCall())
+ {
+ GenTreeCall* callNode = call->AsCall();
+ if ((callNode->gtCallType == CT_HELPER) && gtIsTypeHandleToRuntimeTypeHelper(callNode))
+ {
+ spillStack = false;
+ }
+ else if ((callNode->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) != 0)
+ {
+ spillStack = false;
+ }
+ }
+
+ if (spillStack)
+ {
+ impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("non-inline candidate call"));
+ }
}
}
@@ -7820,33 +7895,29 @@ var_types Compiler::impImportJitTestLabelMark(int numArgs)
// Return Value:
// Returns new GenTree node after fixing struct return of call node
//
-GenTreePtr Compiler::impFixupCallStructReturn(GenTreePtr call, CORINFO_CLASS_HANDLE retClsHnd)
+GenTreePtr Compiler::impFixupCallStructReturn(GenTreeCall* call, CORINFO_CLASS_HANDLE retClsHnd)
{
- assert(call->gtOper == GT_CALL);
-
if (!varTypeIsStruct(call))
{
return call;
}
- call->gtCall.gtRetClsHnd = retClsHnd;
-
- GenTreeCall* callNode = call->AsCall();
+ call->gtRetClsHnd = retClsHnd;
#if FEATURE_MULTIREG_RET
// Initialize Return type descriptor of call node
- ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
retTypeDesc->InitializeStructReturnType(this, retClsHnd);
#endif // FEATURE_MULTIREG_RET
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
// Not allowed for FEATURE_CORCLR which is the only SKU available for System V OSs.
- assert(!callNode->IsVarargs() && "varargs not allowed for System V OSs.");
+ assert(!call->IsVarargs() && "varargs not allowed for System V OSs.");
// The return type will remain as the incoming struct type unless normalized to a
// single eightbyte return type below.
- callNode->gtReturnType = call->gtType;
+ call->gtReturnType = call->gtType;
unsigned retRegCount = retTypeDesc->GetReturnRegCount();
if (retRegCount != 0)
@@ -7854,14 +7925,14 @@ GenTreePtr Compiler::impFixupCallStructReturn(GenTreePtr call, CORINFO_CLASS_HAN
if (retRegCount == 1)
{
// struct returned in a single register
- callNode->gtReturnType = retTypeDesc->GetReturnRegType(0);
+ call->gtReturnType = retTypeDesc->GetReturnRegType(0);
}
else
{
// must be a struct returned in two registers
assert(retRegCount == 2);
- if ((!callNode->CanTailCall()) && (!callNode->IsInlineCandidate()))
+ if ((!call->CanTailCall()) && (!call->IsInlineCandidate()))
{
// Force a call returning multi-reg struct to be always of the IR form
// tmp = call
@@ -7876,7 +7947,7 @@ GenTreePtr Compiler::impFixupCallStructReturn(GenTreePtr call, CORINFO_CLASS_HAN
else
{
// struct not returned in registers i.e returned via hiddden retbuf arg.
- callNode->gtCallMoreFlags |= GTF_CALL_M_RETBUFFARG;
+ call->gtCallMoreFlags |= GTF_CALL_M_RETBUFFARG;
}
#else // not FEATURE_UNIX_AMD64_STRUCT_PASSING
@@ -7885,15 +7956,15 @@ GenTreePtr Compiler::impFixupCallStructReturn(GenTreePtr call, CORINFO_CLASS_HAN
// There is no fixup necessary if the return type is a HFA struct.
// HFA structs are returned in registers for ARM32 and ARM64
//
- if (!call->gtCall.IsVarargs() && IsHfa(retClsHnd))
+ if (!call->IsVarargs() && IsHfa(retClsHnd))
{
- if (call->gtCall.CanTailCall())
+ if (call->CanTailCall())
{
if (info.compIsVarArgs)
{
// We cannot tail call because control needs to return to fixup the calling
// convention for result return.
- call->gtCall.gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL;
+ call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL;
}
else
{
@@ -7926,12 +7997,12 @@ GenTreePtr Compiler::impFixupCallStructReturn(GenTreePtr call, CORINFO_CLASS_HAN
if (howToReturnStruct == SPK_ByReference)
{
assert(returnType == TYP_UNKNOWN);
- call->gtCall.gtCallMoreFlags |= GTF_CALL_M_RETBUFFARG;
+ call->gtCallMoreFlags |= GTF_CALL_M_RETBUFFARG;
}
else
{
assert(returnType != TYP_UNKNOWN);
- call->gtCall.gtReturnType = returnType;
+ call->gtReturnType = returnType;
// ToDo: Refactor this common code sequence into its own method as it is used 4+ times
if ((returnType == TYP_LONG) && (compLongUsed == false))
@@ -7949,7 +8020,7 @@ GenTreePtr Compiler::impFixupCallStructReturn(GenTreePtr call, CORINFO_CLASS_HAN
if (retRegCount >= 2)
{
- if ((!callNode->CanTailCall()) && (!callNode->IsInlineCandidate()))
+ if ((!call->CanTailCall()) && (!call->IsInlineCandidate()))
{
// Force a call returning multi-reg struct to be always of the IR form
// tmp = call
@@ -9379,6 +9450,9 @@ GenTreePtr Compiler::impCastClassOrIsInstToTree(GenTreePtr op1,
// Make QMark node a top level node by spilling it.
unsigned tmp = lvaGrabTemp(true DEBUGARG("spilling QMark2"));
impAssignTempGen(tmp, qmarkNull, (unsigned)CHECK_SPILL_NONE);
+
+ // TODO: Is it possible op1 has a better type?
+ lvaSetClass(tmp, pResolvedToken->hClass);
return gtNewLclvNode(tmp, TYP_REF);
#endif
}
@@ -9458,7 +9532,6 @@ void Compiler::impImportBlockCode(BasicBlock* block)
int prefixFlags = 0;
bool explicitTailCall, constraintCall, readonlyCall;
- bool insertLdloc = false; // set by CEE_DUP and cleared by following store
typeInfo tiRetVal;
unsigned numArgs = info.compArgsCount;
@@ -9500,7 +9573,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
/* Has it been a while since we last saw a non-empty stack (which
guarantees that the tree depth isnt accumulating. */
- if ((opcodeOffs - lastSpillOffs) > 200)
+ if ((opcodeOffs - lastSpillOffs) > MAX_TREE_SIZE && impCanSpillNow(prevOpcode))
{
impSpillStackEnsure();
lastSpillOffs = opcodeOffs;
@@ -9637,6 +9710,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
GenTreeArgList* args = nullptr; // What good do these "DUMMY_INIT"s do?
GenTreePtr newObjThisPtr = DUMMY_INIT(NULL);
bool uns = DUMMY_INIT(false);
+ bool isLocal = false;
/* Get the next opcode and the size of its parameters */
@@ -9892,7 +9966,9 @@ void Compiler::impImportBlockCode(BasicBlock* block)
{
lclNum = lvaArg0Var;
}
- lvaTable[lclNum].lvArgWrite = 1;
+
+ // We should have seen this arg write in the prescan
+ assert(lvaTable[lclNum].lvHasILStoreOp);
if (tiVerificationNeeded)
{
@@ -9909,12 +9985,14 @@ void Compiler::impImportBlockCode(BasicBlock* block)
goto VAR_ST;
case CEE_STLOC:
- lclNum = getU2LittleEndian(codeAddr);
+ lclNum = getU2LittleEndian(codeAddr);
+ isLocal = true;
JITDUMP(" %u", lclNum);
goto LOC_ST;
case CEE_STLOC_S:
- lclNum = getU1LittleEndian(codeAddr);
+ lclNum = getU1LittleEndian(codeAddr);
+ isLocal = true;
JITDUMP(" %u", lclNum);
goto LOC_ST;
@@ -9922,7 +10000,8 @@ void Compiler::impImportBlockCode(BasicBlock* block)
case CEE_STLOC_1:
case CEE_STLOC_2:
case CEE_STLOC_3:
- lclNum = (opcode - CEE_STLOC_0);
+ isLocal = true;
+ lclNum = (opcode - CEE_STLOC_0);
assert(lclNum >= 0 && lclNum < 4);
LOC_ST:
@@ -10023,31 +10102,32 @@ void Compiler::impImportBlockCode(BasicBlock* block)
}
}
- /* Filter out simple assignments to itself */
-
- if (op1->gtOper == GT_LCL_VAR && lclNum == op1->gtLclVarCommon.gtLclNum)
+ // If this is a local and the local is a ref type, see
+ // if we can improve type information based on the
+ // value being assigned.
+ if (isLocal && (lclTyp == TYP_REF))
{
- if (insertLdloc)
- {
- // This is a sequence of (ldloc, dup, stloc). Can simplify
- // to (ldloc, stloc). Goto LDVAR to reconstruct the ldloc node.
- CLANG_FORMAT_COMMENT_ANCHOR;
+ // We should have seen a stloc in our IL prescan.
+ assert(lvaTable[lclNum].lvHasILStoreOp);
-#ifdef DEBUG
- if (tiVerificationNeeded)
- {
- assert(
- typeInfo::AreEquivalent(tiRetVal, NormaliseForStack(lvaTable[lclNum].lvVerTypeInfo)));
- }
-#endif
+ const bool isSingleILStoreLocal =
+ !lvaTable[lclNum].lvHasMultipleILStoreOp && !lvaTable[lclNum].lvHasLdAddrOp;
- op1 = nullptr;
- insertLdloc = false;
+ // Conservative check that there is just one
+ // definition that reaches this store.
+ const bool hasSingleReachingDef = (block->bbStackDepthOnEntry() == 0);
- impLoadVar(lclNum, opcodeOffs + sz + 1);
- break;
+ if (isSingleILStoreLocal && hasSingleReachingDef)
+ {
+ lvaUpdateClass(lclNum, op1, clsHnd);
}
- else if (opts.compDbgCode)
+ }
+
+ /* Filter out simple assignments to itself */
+
+ if (op1->gtOper == GT_LCL_VAR && lclNum == op1->gtLclVarCommon.gtLclNum)
+ {
+ if (opts.compDbgCode)
{
op1 = gtNewNothingNode();
goto SPILL_APPEND;
@@ -10104,26 +10184,6 @@ void Compiler::impImportBlockCode(BasicBlock* block)
op1 = gtNewAssignNode(op2, op1);
}
- /* If insertLdloc is true, then we need to insert a ldloc following the
- stloc. This is done when converting a (dup, stloc) sequence into
- a (stloc, ldloc) sequence. */
-
- if (insertLdloc)
- {
- // From SPILL_APPEND
- impAppendTree(op1, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
-
-#ifdef DEBUG
- // From DONE_APPEND
- impNoteLastILoffs();
-#endif
- op1 = nullptr;
- insertLdloc = false;
-
- impLoadVar(lclNum, opcodeOffs + sz + 1, tiRetVal);
- break;
- }
-
goto SPILL_APPEND;
case CEE_LDLOCA:
@@ -11566,22 +11626,6 @@ void Compiler::impImportBlockCode(BasicBlock* block)
op1 = impPopStack().val;
assertImp(genActualTypeIsIntOrI(op1->TypeGet()));
-#ifdef _TARGET_64BIT_
- // Widen 'op1' on 64-bit targets
- if (op1->TypeGet() != TYP_I_IMPL)
- {
- if (op1->OperGet() == GT_CNS_INT)
- {
- op1->gtType = TYP_I_IMPL;
- }
- else
- {
- op1 = gtNewCastNode(TYP_I_IMPL, op1, TYP_I_IMPL);
- }
- }
-#endif // _TARGET_64BIT_
- assert(genActualType(op1->TypeGet()) == TYP_I_IMPL);
-
/* We can create a switch node */
op1 = gtNewOperNode(GT_SWITCH, TYP_VOID, op1);
@@ -11941,48 +11985,30 @@ void Compiler::impImportBlockCode(BasicBlock* block)
impStackTop(0);
}
- // Convert a (dup, stloc) sequence into a (stloc, ldloc) sequence in the following cases:
- // - If this is non-debug code - so that CSE will recognize the two as equal.
- // This helps eliminate a redundant bounds check in cases such as:
- // ariba[i+3] += some_value;
- // - If the top of the stack is a non-leaf that may be expensive to clone.
+ // If the expression to dup is simple, just clone it.
+ // Otherwise spill it to a temp, and reload the temp
+ // twice.
+ op1 = impPopStack(tiRetVal);
- if (codeAddr < codeEndp)
+ if (!opts.compDbgCode && !op1->IsIntegralConst(0) && !op1->IsFPZero() && !op1->IsLocal())
{
- OPCODE nextOpcode = (OPCODE)getU1LittleEndian(codeAddr);
- if (impIsAnySTLOC(nextOpcode))
+ const unsigned tmpNum = lvaGrabTemp(true DEBUGARG("dup spill"));
+ impAssignTempGen(tmpNum, op1, tiRetVal.GetClassHandle(), (unsigned)CHECK_SPILL_ALL);
+ var_types type = genActualType(lvaTable[tmpNum].TypeGet());
+ op1 = gtNewLclvNode(tmpNum, type);
+
+ // Propagate type info to the temp
+ if (type == TYP_REF)
{
- if (!opts.compDbgCode)
- {
- insertLdloc = true;
- break;
- }
- GenTree* stackTop = impStackTop().val;
- if (!stackTop->IsIntegralConst(0) && !stackTop->IsFPZero() && !stackTop->IsLocal())
- {
- insertLdloc = true;
- break;
- }
+ lvaSetClass(tmpNum, op1, tiRetVal.GetClassHandle());
}
}
- /* Pull the top value from the stack */
- op1 = impPopStack(tiRetVal);
-
- /* Clone the value */
op1 = impCloneExpr(op1, &op2, tiRetVal.GetClassHandle(), (unsigned)CHECK_SPILL_ALL,
nullptr DEBUGARG("DUP instruction"));
- /* Either the tree started with no global effects, or impCloneExpr
- evaluated the tree to a temp and returned two copies of that
- temp. Either way, neither op1 nor op2 should have side effects.
- */
assert(!(op1->gtFlags & GTF_GLOB_EFFECT) && !(op2->gtFlags & GTF_GLOB_EFFECT));
-
- /* Push the tree/temp back on the stack */
impPushOnStack(op1, tiRetVal);
-
- /* Push the copy on the stack */
impPushOnStack(op2, tiRetVal);
break;
@@ -12290,7 +12316,8 @@ void Compiler::impImportBlockCode(BasicBlock* block)
return;
}
- impPushOnStack(op1, typeInfo(resolvedToken.hMethod));
+ CORINFO_RESOLVED_TOKEN* heapToken = impAllocateToken(resolvedToken);
+ impPushOnStack(op1, typeInfo(heapToken));
break;
}
@@ -12395,7 +12422,10 @@ void Compiler::impImportBlockCode(BasicBlock* block)
return;
}
- impPushOnStack(fptr, typeInfo(resolvedToken.hMethod));
+ CORINFO_RESOLVED_TOKEN* heapToken = impAllocateToken(resolvedToken);
+ assert(heapToken->tokenType == CORINFO_TOKENKIND_Method);
+ heapToken->tokenType = CORINFO_TOKENKIND_Ldvirtftn;
+ impPushOnStack(fptr, typeInfo(heapToken));
break;
}
@@ -12465,11 +12495,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
/* NEWOBJ does not respond to CONSTRAINED */
prefixFlags &= ~PREFIX_CONSTRAINED;
-#if COR_JIT_EE_VERSION > 460
_impResolveToken(CORINFO_TOKENKIND_NewObj);
-#else
- _impResolveToken(CORINFO_TOKENKIND_Method);
-#endif
eeGetCallInfo(&resolvedToken, nullptr /* constraint typeRef*/,
addVerifyFlag(combine(CORINFO_CALLINFO_SECURITYCHECKS, CORINFO_CALLINFO_ALLOWINSTPARAM)),
@@ -12673,6 +12699,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
// without exhaustive walk over all expressions.
impAssignTempGen(lclNum, op1, (unsigned)CHECK_SPILL_NONE);
+ lvaSetClass(lclNum, resolvedToken.hClass, true /* is Exact */);
newObjThisPtr = gtNewLclvNode(lclNum, TYP_REF);
}
@@ -12770,11 +12797,31 @@ void Compiler::impImportBlockCode(BasicBlock* block)
prefixFlags |= PREFIX_TAILCALL_EXPLICIT;
}
}
+ }
- // Note that when running under tail call stress, a call will be marked as explicit tail prefixed
- // hence will not be considered for implicit tail calling.
- bool isRecursive = (callInfo.hMethod == info.compMethodHnd);
- if (impIsImplicitTailCallCandidate(opcode, codeAddr + sz, codeEndp, prefixFlags, isRecursive))
+ // This is split up to avoid goto flow warnings.
+ bool isRecursive;
+ isRecursive = !compIsForInlining() && (callInfo.hMethod == info.compMethodHnd);
+
+ // Note that when running under tail call stress, a call will be marked as explicit tail prefixed
+ // hence will not be considered for implicit tail calling.
+ if (impIsImplicitTailCallCandidate(opcode, codeAddr + sz, codeEndp, prefixFlags, isRecursive))
+ {
+ if (compIsForInlining())
+ {
+#if FEATURE_TAILCALL_OPT_SHARED_RETURN
+ // Are we inlining at an implicit tail call site? If so the we can flag
+ // implicit tail call sites in the inline body. These call sites
+ // often end up in non BBJ_RETURN blocks, so only flag them when
+ // we're able to handle shared returns.
+ if (impInlineInfo->iciCall->IsImplicitTailCall())
+ {
+ JITDUMP(" (Inline Implicit Tail call: prefixFlags |= PREFIX_TAILCALL_IMPLICIT)");
+ prefixFlags |= PREFIX_TAILCALL_IMPLICIT;
+ }
+#endif // FEATURE_TAILCALL_OPT_SHARED_RETURN
+ }
+ else
{
JITDUMP(" (Implicit Tail call: prefixFlags |= PREFIX_TAILCALL_IMPLICIT)");
prefixFlags |= PREFIX_TAILCALL_IMPLICIT;
@@ -12793,7 +12840,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
impHandleAccessAllowed(callInfo.accessAllowed, &callInfo.callsiteCalloutHelper);
#if 0 // DevDiv 410397 - This breaks too many obfuscated apps to do this in an in-place release
-
+
// DevDiv 291703 - we need to check for accessibility between the caller of InitializeArray
// and the field it is reading, thus it is now unverifiable to not immediately precede with
// ldtoken <filed token>, and we now check accessibility
@@ -12838,14 +12885,6 @@ void Compiler::impImportBlockCode(BasicBlock* block)
assert(verCheckDelegateCreation(delegateCreateStart, codeAddr - 1, delegateMethodRef));
}
#endif
-
-#ifdef FEATURE_CORECLR
- // In coreclr the delegate transparency rule needs to be enforced even if verification is disabled
- typeInfo tiActualFtn = impStackTop(0).seTypeInfo;
- CORINFO_METHOD_HANDLE delegateMethodHandle = tiActualFtn.GetMethod2();
-
- impInsertCalloutForDelegate(info.compMethodHnd, delegateMethodHandle, resolvedToken.hClass);
-#endif // FEATURE_CORECLR
}
callTyp = impImportCall(opcode, &resolvedToken, constraintCall ? &constrainedResolvedToken : nullptr,
@@ -12932,9 +12971,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
return;
case CORINFO_FIELD_STATIC_GENERICS_STATIC_HELPER:
-#if COR_JIT_EE_VERSION > 460
case CORINFO_FIELD_STATIC_READYTORUN_HELPER:
-#endif
/* We may be able to inline the field accessors in specific instantiations of generic
* methods */
compInlineResult->NoteFatal(InlineObservation::CALLSITE_LDFLD_NEEDS_HELPER);
@@ -13165,9 +13202,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
case CORINFO_FIELD_STATIC_RVA_ADDRESS:
case CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER:
case CORINFO_FIELD_STATIC_GENERICS_STATIC_HELPER:
-#if COR_JIT_EE_VERSION > 460
case CORINFO_FIELD_STATIC_READYTORUN_HELPER:
-#endif
op1 = impImportStaticFieldAccess(&resolvedToken, (CORINFO_ACCESS_FLAGS)aflags, &fieldInfo,
lclTyp);
break;
@@ -13191,6 +13226,18 @@ void Compiler::impImportBlockCode(BasicBlock* block)
}
break;
+ case CORINFO_FIELD_INTRINSIC_ISLITTLEENDIAN:
+ {
+ assert(aflags & CORINFO_ACCESS_GET);
+#if BIGENDIAN
+ op1 = gtNewIconNode(0, lclTyp);
+#else
+ op1 = gtNewIconNode(1, lclTyp);
+#endif
+ goto FIELD_DONE;
+ }
+ break;
+
default:
assert(!"Unexpected fieldAccessor");
}
@@ -13311,10 +13358,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
return;
case CORINFO_FIELD_STATIC_GENERICS_STATIC_HELPER:
-#if COR_JIT_EE_VERSION > 460
case CORINFO_FIELD_STATIC_READYTORUN_HELPER:
-#endif
-
/* We may be able to inline the field accessors in specific instantiations of generic
* methods */
compInlineResult->NoteFatal(InlineObservation::CALLSITE_STFLD_NEEDS_HELPER);
@@ -13433,9 +13477,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
case CORINFO_FIELD_STATIC_RVA_ADDRESS:
case CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER:
case CORINFO_FIELD_STATIC_GENERICS_STATIC_HELPER:
-#if COR_JIT_EE_VERSION > 460
case CORINFO_FIELD_STATIC_READYTORUN_HELPER:
-#endif
op1 = impImportStaticFieldAccess(&resolvedToken, (CORINFO_ACCESS_FLAGS)aflags, &fieldInfo,
lclTyp);
break;
@@ -13611,9 +13653,10 @@ void Compiler::impImportBlockCode(BasicBlock* block)
Verify(elemTypeHnd == nullptr ||
!(info.compCompHnd->getClassAttribs(elemTypeHnd) & CORINFO_FLG_CONTAINS_STACK_PTR),
"array of byref-like type");
- tiRetVal = verMakeTypeInfo(resolvedToken.hClass);
}
+ tiRetVal = verMakeTypeInfo(resolvedToken.hClass);
+
accessAllowedResult =
info.compCompHnd->canAccessClass(&resolvedToken, info.compMethodHnd, &calloutHelper);
impHandleAccessAllowed(accessAllowedResult, &calloutHelper);
@@ -13748,7 +13791,7 @@ void Compiler::impImportBlockCode(BasicBlock* block)
#ifdef FEATURE_READYTORUN_COMPILER
if (opts.IsReadyToRun())
{
- GenTreePtr opLookup =
+ GenTreeCall* opLookup =
impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_ISINSTANCEOF, TYP_REF,
gtNewArgList(op1));
usingReadyToRunHelper = (opLookup != nullptr);
@@ -14279,8 +14322,8 @@ void Compiler::impImportBlockCode(BasicBlock* block)
#ifdef FEATURE_READYTORUN_COMPILER
if (opts.IsReadyToRun())
{
- GenTreePtr opLookup = impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_CHKCAST,
- TYP_REF, gtNewArgList(op1));
+ GenTreeCall* opLookup = impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_CHKCAST,
+ TYP_REF, gtNewArgList(op1));
usingReadyToRunHelper = (opLookup != nullptr);
op1 = (usingReadyToRunHelper ? opLookup : op1);
@@ -14782,11 +14825,8 @@ void Compiler::impImportBlockCode(BasicBlock* block)
prevOpcode = opcode;
prefixFlags = 0;
- assert(!insertLdloc || opcode == CEE_DUP);
}
- assert(!insertLdloc);
-
return;
#undef _impResolveToken
}
@@ -14994,6 +15034,16 @@ bool Compiler::impReturnInstruction(BasicBlock* block, int prefixFlags, OPCODE&
Verify(verCurrentState.esStackDepth == expectedStack, "stack non-empty on return");
}
+#ifdef DEBUG
+ // If we are importing an inlinee and have GC ref locals we always
+ // need to have a spill temp for the return value. This temp
+ // should have been set up in advance, over in fgFindBasicBlocks.
+ if (compIsForInlining() && impInlineInfo->HasGcRefLocals() && (info.compRetType != TYP_VOID))
+ {
+ assert(lvaInlineeReturnSpillTemp != BAD_VAR_NUM);
+ }
+#endif // DEBUG
+
GenTree* op2 = nullptr;
GenTree* op1 = nullptr;
CORINFO_CLASS_HANDLE retClsHnd = nullptr;
@@ -15100,7 +15150,7 @@ bool Compiler::impReturnInstruction(BasicBlock* block, int prefixFlags, OPCODE&
if (lvaInlineeReturnSpillTemp != BAD_VAR_NUM)
{
assert(info.compRetNativeType != TYP_VOID &&
- (fgMoreThanOneReturnBlock() || impInlineInfo->hasPinnedLocals));
+ (fgMoreThanOneReturnBlock() || impInlineInfo->HasGcRefLocals()));
// This is a bit of a workaround...
// If we are inlining a call that returns a struct, where the actual "native" return type is
@@ -15181,8 +15231,7 @@ bool Compiler::impReturnInstruction(BasicBlock* block, int prefixFlags, OPCODE&
// compRetNativeType is TYP_STRUCT.
// This implies that struct return via RetBuf arg or multi-reg struct return
- GenTreePtr iciCall = impInlineInfo->iciCall;
- assert(iciCall->gtOper == GT_CALL);
+ GenTreeCall* iciCall = impInlineInfo->iciCall->AsCall();
// Assign the inlinee return into a spill temp.
// spill temp only exists if there are multiple return points
@@ -15191,7 +15240,7 @@ bool Compiler::impReturnInstruction(BasicBlock* block, int prefixFlags, OPCODE&
// in this case we have to insert multiple struct copies to the temp
// and the retexpr is just the temp.
assert(info.compRetNativeType != TYP_VOID);
- assert(fgMoreThanOneReturnBlock() || impInlineInfo->hasPinnedLocals);
+ assert(fgMoreThanOneReturnBlock() || impInlineInfo->HasGcRefLocals());
impAssignTempGen(lvaInlineeReturnSpillTemp, op2, se.seTypeInfo.GetClassHandle(),
(unsigned)CHECK_SPILL_ALL);
@@ -15246,7 +15295,7 @@ bool Compiler::impReturnInstruction(BasicBlock* block, int prefixFlags, OPCODE&
if (retRegCount != 0)
{
- assert(!iciCall->AsCall()->HasRetBufArg());
+ assert(!iciCall->HasRetBufArg());
assert(retRegCount >= 2);
if (lvaInlineeReturnSpillTemp != BAD_VAR_NUM)
{
@@ -15265,8 +15314,8 @@ bool Compiler::impReturnInstruction(BasicBlock* block, int prefixFlags, OPCODE&
else
#endif // defined(_TARGET_ARM64_)
{
- assert(iciCall->AsCall()->HasRetBufArg());
- GenTreePtr dest = gtCloneExpr(iciCall->gtCall.gtCallArgs->gtOp.gtOp1);
+ assert(iciCall->HasRetBufArg());
+ GenTreePtr dest = gtCloneExpr(iciCall->gtCallArgs->gtOp.gtOp1);
// spill temp only exists if there are multiple return points
if (lvaInlineeReturnSpillTemp != BAD_VAR_NUM)
{
@@ -15946,11 +15995,11 @@ SPILLSTACK:
}
else
{
- assert(addTree->gtOper == GT_SWITCH && genActualType(addTree->gtOp.gtOp1->gtType) == TYP_I_IMPL);
+ assert(addTree->gtOper == GT_SWITCH && genActualTypeIsIntOrI(addTree->gtOp.gtOp1->TypeGet()));
unsigned temp = lvaGrabTemp(true DEBUGARG("spill addStmt SWITCH"));
impAssignTempGen(temp, addTree->gtOp.gtOp1, level);
- addTree->gtOp.gtOp1 = gtNewLclvNode(temp, TYP_I_IMPL);
+ addTree->gtOp.gtOp1 = gtNewLclvNode(temp, genActualType(addTree->gtOp.gtOp1->TypeGet()));
}
}
@@ -16921,7 +16970,7 @@ void Compiler::impMakeDiscretionaryInlineObservations(InlineInfo* pInlineInfo, I
{
frequency = InlineCallsiteFrequency::LOOP;
}
- else if ((pInlineInfo->iciBlock->bbFlags & BBF_PROF_WEIGHT) && (pInlineInfo->iciBlock->bbWeight > BB_ZERO_WEIGHT))
+ else if (pInlineInfo->iciBlock->hasProfileWeight() && (pInlineInfo->iciBlock->bbWeight > BB_ZERO_WEIGHT))
{
frequency = InlineCallsiteFrequency::WARM;
}
@@ -17378,7 +17427,8 @@ void Compiler::impInlineInitVars(InlineInfo* pInlineInfo)
// Ignore the type context argument
if (hasTypeCtxtArg && (argCnt == typeCtxtArg))
{
- typeCtxtArg = 0xFFFFFFFF;
+ pInlineInfo->typeContextArg = typeCtxtArg;
+ typeCtxtArg = 0xFFFFFFFF;
continue;
}
@@ -17621,6 +17671,11 @@ void Compiler::impInlineInitVars(InlineInfo* pInlineInfo)
lclVarInfo[i + argCnt].lclIsPinned = isPinned;
lclVarInfo[i + argCnt].lclTypeInfo = type;
+ if (varTypeIsGC(type))
+ {
+ pInlineInfo->numberOfGcRefLocals++;
+ }
+
if (isPinned)
{
// Pinned locals may cause inlines to fail.
@@ -17685,6 +17740,23 @@ void Compiler::impInlineInitVars(InlineInfo* pInlineInfo)
#endif // FEATURE_SIMD
}
+//------------------------------------------------------------------------
+// impInlineFetchLocal: get a local var that represents an inlinee local
+//
+// Arguments:
+// lclNum -- number of the inlinee local
+// reason -- debug string describing purpose of the local var
+//
+// Returns:
+// Number of the local to use
+//
+// Notes:
+// This method is invoked only for locals actually used in the
+// inlinee body.
+//
+// Allocates a new temp if necessary, and copies key properties
+// over from the inlinee local var info.
+
unsigned Compiler::impInlineFetchLocal(unsigned lclNum DEBUGARG(const char* reason))
{
assert(compIsForInlining());
@@ -17693,107 +17765,144 @@ unsigned Compiler::impInlineFetchLocal(unsigned lclNum DEBUGARG(const char* reas
if (tmpNum == BAD_VAR_NUM)
{
- var_types lclTyp = impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclTypeInfo;
+ const InlLclVarInfo& inlineeLocal = impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt];
+ const var_types lclTyp = inlineeLocal.lclTypeInfo;
// The lifetime of this local might span multiple BBs.
// So it is a long lifetime local.
impInlineInfo->lclTmpNum[lclNum] = tmpNum = lvaGrabTemp(false DEBUGARG(reason));
- lvaTable[tmpNum].lvType = lclTyp;
- if (impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclHasLdlocaOp)
- {
- lvaTable[tmpNum].lvHasLdAddrOp = 1;
- }
+ // Copy over key info
+ lvaTable[tmpNum].lvType = lclTyp;
+ lvaTable[tmpNum].lvHasLdAddrOp = inlineeLocal.lclHasLdlocaOp;
+ lvaTable[tmpNum].lvPinned = inlineeLocal.lclIsPinned;
+ lvaTable[tmpNum].lvHasILStoreOp = inlineeLocal.lclHasStlocOp;
+ lvaTable[tmpNum].lvHasMultipleILStoreOp = inlineeLocal.lclHasMultipleStlocOp;
- if (impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclIsPinned)
+ // Copy over class handle for ref types. Note this may be a
+ // shared type -- someday perhaps we can get the exact
+ // signature and pass in a more precise type.
+ if (lclTyp == TYP_REF)
{
- lvaTable[tmpNum].lvPinned = 1;
-
- if (!impInlineInfo->hasPinnedLocals)
- {
- // If the inlinee returns a value, use a spill temp
- // for the return value to ensure that even in case
- // where the return expression refers to one of the
- // pinned locals, we can unpin the local right after
- // the inlined method body.
- if ((info.compRetNativeType != TYP_VOID) && (lvaInlineeReturnSpillTemp == BAD_VAR_NUM))
- {
- lvaInlineeReturnSpillTemp =
- lvaGrabTemp(false DEBUGARG("Inline candidate pinned local return spill temp"));
- lvaTable[lvaInlineeReturnSpillTemp].lvType = info.compRetNativeType;
- }
- }
-
- impInlineInfo->hasPinnedLocals = true;
+ lvaSetClass(tmpNum, inlineeLocal.lclVerTypeInfo.GetClassHandleForObjRef());
}
- if (impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclVerTypeInfo.IsStruct())
+ if (inlineeLocal.lclVerTypeInfo.IsStruct())
{
if (varTypeIsStruct(lclTyp))
{
- lvaSetStruct(tmpNum,
- impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclVerTypeInfo.GetClassHandle(),
- true /* unsafe value cls check */);
+ lvaSetStruct(tmpNum, inlineeLocal.lclVerTypeInfo.GetClassHandle(), true /* unsafe value cls check */);
}
else
{
// This is a wrapped primitive. Make sure the verstate knows that
- lvaTable[tmpNum].lvVerTypeInfo =
- impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclVerTypeInfo;
+ lvaTable[tmpNum].lvVerTypeInfo = inlineeLocal.lclVerTypeInfo;
}
}
+
+#ifdef DEBUG
+ // Sanity check that we're properly prepared for gc ref locals.
+ if (varTypeIsGC(lclTyp))
+ {
+ // Since there are gc locals we should have seen them earlier
+ // and if there was a return value, set up the spill temp.
+ assert(impInlineInfo->HasGcRefLocals());
+ assert((info.compRetNativeType == TYP_VOID) || (lvaInlineeReturnSpillTemp != BAD_VAR_NUM));
+ }
+ else
+ {
+ // Make sure all pinned locals count as gc refs.
+ assert(!inlineeLocal.lclIsPinned);
+ }
+#endif // DEBUG
}
return tmpNum;
}
-// A method used to return the GenTree (usually a GT_LCL_VAR) representing the arguments of the inlined method.
-// Only use this method for the arguments of the inlinee method.
-// !!! Do not use it for the locals of the inlinee method. !!!!
+//------------------------------------------------------------------------
+// impInlineFetchArg: return tree node for argument value in an inlinee
+//
+// Arguments:
+// lclNum -- argument number in inlinee IL
+// inlArgInfo -- argument info for inlinee
+// lclVarInfo -- var info for inlinee
+//
+// Returns:
+// Tree for the argument's value. Often an inlinee-scoped temp
+// GT_LCL_VAR but can be other tree kinds, if the argument
+// expression from the caller can be directly substituted into the
+// inlinee body.
+//
+// Notes:
+// Must be used only for arguments -- use impInlineFetchLocal for
+// inlinee locals.
+//
+// Direct substitution is performed when the formal argument cannot
+// change value in the inlinee body (no starg or ldarga), and the
+// actual argument expression's value cannot be changed if it is
+// substituted it into the inlinee body.
+//
+// Even if an inlinee-scoped temp is returned here, it may later be
+// "bashed" to a caller-supplied tree when arguments are actually
+// passed (see fgInlinePrependStatements). Bashing can happen if
+// the argument ends up being single use and other conditions are
+// met. So the contents of the tree returned here may not end up
+// being the ones ultimately used for the argument.
+//
+// This method will side effect inlArgInfo. It should only be called
+// for actual uses of the argument in the inlinee.
GenTreePtr Compiler::impInlineFetchArg(unsigned lclNum, InlArgInfo* inlArgInfo, InlLclVarInfo* lclVarInfo)
{
- /* Get the argument type */
- var_types lclTyp = lclVarInfo[lclNum].lclTypeInfo;
+ // Cache the relevant arg and lcl info for this argument.
+ // We will modify argInfo but not lclVarInfo.
+ InlArgInfo& argInfo = inlArgInfo[lclNum];
+ const InlLclVarInfo& lclInfo = lclVarInfo[lclNum];
+ const bool argCanBeModified = argInfo.argHasLdargaOp || argInfo.argHasStargOp;
+ const var_types lclTyp = lclInfo.lclTypeInfo;
+ GenTreePtr op1 = nullptr;
- GenTreePtr op1 = nullptr;
-
- // constant or address of local
- if (inlArgInfo[lclNum].argIsInvariant && !inlArgInfo[lclNum].argHasLdargaOp && !inlArgInfo[lclNum].argHasStargOp)
+ if (argInfo.argIsInvariant && !argCanBeModified)
{
- /* Clone the constant. Note that we cannot directly use argNode
- in the trees even if inlArgInfo[lclNum].argIsUsed==false as this
- would introduce aliasing between inlArgInfo[].argNode and
- impInlineExpr. Then gtFoldExpr() could change it, causing further
- references to the argument working off of the bashed copy. */
-
- op1 = gtCloneExpr(inlArgInfo[lclNum].argNode);
+ // Directly substitute constants or addresses of locals
+ //
+ // Clone the constant. Note that we cannot directly use
+ // argNode in the trees even if !argInfo.argIsUsed as this
+ // would introduce aliasing between inlArgInfo[].argNode and
+ // impInlineExpr. Then gtFoldExpr() could change it, causing
+ // further references to the argument working off of the
+ // bashed copy.
+ op1 = gtCloneExpr(argInfo.argNode);
PREFIX_ASSUME(op1 != nullptr);
- inlArgInfo[lclNum].argTmpNum = (unsigned)-1; // illegal temp
+ argInfo.argTmpNum = BAD_VAR_NUM;
}
- else if (inlArgInfo[lclNum].argIsLclVar && !inlArgInfo[lclNum].argHasLdargaOp && !inlArgInfo[lclNum].argHasStargOp)
+ else if (argInfo.argIsLclVar && !argCanBeModified)
{
- /* Argument is a local variable (of the caller)
- * Can we re-use the passed argument node? */
-
- op1 = inlArgInfo[lclNum].argNode;
- inlArgInfo[lclNum].argTmpNum = op1->gtLclVarCommon.gtLclNum;
+ // Directly substitute caller locals
+ //
+ // Use the caller-supplied node if this is the first use.
+ op1 = argInfo.argNode;
+ argInfo.argTmpNum = op1->gtLclVarCommon.gtLclNum;
- if (inlArgInfo[lclNum].argIsUsed)
+ // Use an equivalent copy if this is the second or subsequent use.
+ if (argInfo.argIsUsed)
{
assert(op1->gtOper == GT_LCL_VAR);
assert(lclNum == op1->gtLclVar.gtLclILoffs);
+ var_types newTyp = lclTyp;
+
if (!lvaTable[op1->gtLclVarCommon.gtLclNum].lvNormalizeOnLoad())
{
- lclTyp = genActualType(lclTyp);
+ newTyp = genActualType(lclTyp);
}
- /* Create a new lcl var node - remember the argument lclNum */
- op1 = gtNewLclvNode(op1->gtLclVarCommon.gtLclNum, lclTyp, op1->gtLclVar.gtLclILoffs);
+ // Create a new lcl var node - remember the argument lclNum
+ op1 = gtNewLclvNode(op1->gtLclVarCommon.gtLclNum, newTyp, op1->gtLclVar.gtLclILoffs);
}
}
- else if (inlArgInfo[lclNum].argIsByRefToStructLocal && !inlArgInfo[lclNum].argHasStargOp)
+ else if (argInfo.argIsByRefToStructLocal && !argInfo.argHasStargOp)
{
/* Argument is a by-ref address to a struct, a normed struct, or its field.
In these cases, don't spill the byref to a local, simply clone the tree and use it.
@@ -17812,59 +17921,65 @@ GenTreePtr Compiler::impInlineFetchArg(unsigned lclNum, InlArgInfo* inlArgInfo,
then we change the argument tree (of "ldloca.s V_1") to TYP_I_IMPL to match the callee signature. We'll
soon afterwards reject the inlining anyway, since the tree we return isn't a GT_LCL_VAR.
*/
- assert(inlArgInfo[lclNum].argNode->TypeGet() == TYP_BYREF ||
- inlArgInfo[lclNum].argNode->TypeGet() == TYP_I_IMPL);
- op1 = gtCloneExpr(inlArgInfo[lclNum].argNode);
+ assert(argInfo.argNode->TypeGet() == TYP_BYREF || argInfo.argNode->TypeGet() == TYP_I_IMPL);
+ op1 = gtCloneExpr(argInfo.argNode);
}
else
{
/* Argument is a complex expression - it must be evaluated into a temp */
- if (inlArgInfo[lclNum].argHasTmp)
+ if (argInfo.argHasTmp)
{
- assert(inlArgInfo[lclNum].argIsUsed);
- assert(inlArgInfo[lclNum].argTmpNum < lvaCount);
+ assert(argInfo.argIsUsed);
+ assert(argInfo.argTmpNum < lvaCount);
/* Create a new lcl var node - remember the argument lclNum */
- op1 = gtNewLclvNode(inlArgInfo[lclNum].argTmpNum, genActualType(lclTyp));
+ op1 = gtNewLclvNode(argInfo.argTmpNum, genActualType(lclTyp));
/* This is the second or later use of the this argument,
so we have to use the temp (instead of the actual arg) */
- inlArgInfo[lclNum].argBashTmpNode = nullptr;
+ argInfo.argBashTmpNode = nullptr;
}
else
{
/* First time use */
- assert(inlArgInfo[lclNum].argIsUsed == false);
+ assert(!argInfo.argIsUsed);
/* Reserve a temp for the expression.
* Use a large size node as we may change it later */
- unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Inlining Arg"));
+ const unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Inlining Arg"));
lvaTable[tmpNum].lvType = lclTyp;
+
+ // Copy over class handle for ref types. Note this may be
+ // further improved if it is a shared type and we know the exact context.
+ if (lclTyp == TYP_REF)
+ {
+ lvaSetClass(tmpNum, lclInfo.lclVerTypeInfo.GetClassHandleForObjRef());
+ }
+
assert(lvaTable[tmpNum].lvAddrExposed == 0);
- if (inlArgInfo[lclNum].argHasLdargaOp)
+ if (argInfo.argHasLdargaOp)
{
lvaTable[tmpNum].lvHasLdAddrOp = 1;
}
- if (lclVarInfo[lclNum].lclVerTypeInfo.IsStruct())
+ if (lclInfo.lclVerTypeInfo.IsStruct())
{
if (varTypeIsStruct(lclTyp))
{
- lvaSetStruct(tmpNum, impInlineInfo->lclVarInfo[lclNum].lclVerTypeInfo.GetClassHandle(),
- true /* unsafe value cls check */);
+ lvaSetStruct(tmpNum, lclInfo.lclVerTypeInfo.GetClassHandle(), true /* unsafe value cls check */);
}
else
{
// This is a wrapped primitive. Make sure the verstate knows that
- lvaTable[tmpNum].lvVerTypeInfo = impInlineInfo->lclVarInfo[lclNum].lclVerTypeInfo;
+ lvaTable[tmpNum].lvVerTypeInfo = lclInfo.lclVerTypeInfo;
}
}
- inlArgInfo[lclNum].argHasTmp = true;
- inlArgInfo[lclNum].argTmpNum = tmpNum;
+ argInfo.argHasTmp = true;
+ argInfo.argTmpNum = tmpNum;
// If we require strict exception order, then arguments must
// be evaluated in sequence before the body of the inlined method.
@@ -17875,7 +17990,7 @@ GenTreePtr Compiler::impInlineFetchArg(unsigned lclNum, InlArgInfo* inlArgInfo,
// TODO-1stClassStructs: We currently do not reuse an existing lclVar
// if it is a struct, because it requires some additional handling.
- if (!varTypeIsStruct(lclTyp) && (!inlArgInfo[lclNum].argHasSideEff) && (!inlArgInfo[lclNum].argHasGlobRef))
+ if (!varTypeIsStruct(lclTyp) && !argInfo.argHasSideEff && !argInfo.argHasGlobRef)
{
/* Get a *LARGE* LCL_VAR node */
op1 = gtNewLclLNode(tmpNum, genActualType(lclTyp), lclNum);
@@ -17884,21 +17999,20 @@ GenTreePtr Compiler::impInlineFetchArg(unsigned lclNum, InlArgInfo* inlArgInfo,
If there are no further uses of the arg, we may be
able to use the actual arg node instead of the temp.
If we do see any further uses, we will clear this. */
- inlArgInfo[lclNum].argBashTmpNode = op1;
+ argInfo.argBashTmpNode = op1;
}
else
{
/* Get a small LCL_VAR node */
op1 = gtNewLclvNode(tmpNum, genActualType(lclTyp));
/* No bashing of this argument */
- inlArgInfo[lclNum].argBashTmpNode = nullptr;
+ argInfo.argBashTmpNode = nullptr;
}
}
}
- /* Mark the argument as used */
-
- inlArgInfo[lclNum].argIsUsed = true;
+ // Mark this argument as used.
+ argInfo.argIsUsed = true;
return op1;
}
@@ -17977,16 +18091,28 @@ BOOL Compiler::impInlineIsGuaranteedThisDerefBeforeAnySideEffects(GenTreePtr ad
return TRUE;
}
-/******************************************************************************/
-// Check the inlining eligibility of this GT_CALL node.
-// Mark GTF_CALL_INLINE_CANDIDATE on the GT_CALL node
-
-// Todo: find a way to record the failure reasons in the IR (or
-// otherwise build tree context) so when we do the inlining pass we
-// can capture these reasons
+//------------------------------------------------------------------------
+// impMarkInlineCandidate: determine if this call can be subsequently inlined
+//
+// Arguments:
+// callNode -- call under scrutiny
+// exactContextHnd -- context handle for inlining
+// exactContextNeedsRuntimeLookup -- true if context required runtime lookup
+// callInfo -- call info from VM
+//
+// Notes:
+// If callNode is an inline candidate, this method sets the flag
+// GTF_CALL_INLINE_CANDIDATE, and ensures that helper methods have
+// filled in the associated InlineCandidateInfo.
+//
+// If callNode is not an inline candidate, and the reason is
+// something that is inherent to the method being called, the
+// method may be marked as "noinline" to short-circuit any
+// future assessments of calls to this method.
void Compiler::impMarkInlineCandidate(GenTreePtr callNode,
CORINFO_CONTEXT_HANDLE exactContextHnd,
+ bool exactContextNeedsRuntimeLookup,
CORINFO_CALL_INFO* callInfo)
{
// Let the strategy know there's another call
@@ -18172,6 +18298,10 @@ void Compiler::impMarkInlineCandidate(GenTreePtr callNode,
// The old value should be NULL
assert(call->gtInlineCandidateInfo == nullptr);
+ // The new value should not be NULL.
+ assert(inlineCandidateInfo != nullptr);
+ inlineCandidateInfo->exactContextNeedsRuntimeLookup = exactContextNeedsRuntimeLookup;
+
call->gtInlineCandidateInfo = inlineCandidateInfo;
// Mark the call node as inline candidate.
@@ -18297,4 +18427,335 @@ bool Compiler::IsMathIntrinsic(GenTreePtr tree)
{
return (tree->OperGet() == GT_INTRINSIC) && IsMathIntrinsic(tree->gtIntrinsic.gtIntrinsicId);
}
-/*****************************************************************************/
+
+//------------------------------------------------------------------------
+// impDevirtualizeCall: Attempt to change a virtual vtable call into a
+// normal call
+//
+// Arguments:
+// call -- the call node to examine/modify
+// thisObj -- the value of 'this' for the call
+// callInfo -- [IN/OUT] info about the call from the VM
+// exactContextHnd -- [OUT] updated context handle iff call devirtualized
+//
+// Notes:
+// Virtual calls in IL will always "invoke" the base class method.
+//
+// This transformation looks for evidence that the type of 'this'
+// in the call is exactly known, is a final class or would invoke
+// a final method, and if that and other safety checks pan out,
+// modifies the call and the call info to create a direct call.
+//
+// This transformation is initially done in the importer and not
+// in some subsequent optimization pass because we want it to be
+// upstream of inline candidate identification.
+//
+// However, later phases may supply improved type information that
+// can enable further devirtualization. We currently reinvoke this
+// code after inlining, if the return value of the inlined call is
+// the 'this obj' of a subsequent virtual call.
+//
+void Compiler::impDevirtualizeCall(GenTreeCall* call,
+ GenTreePtr thisObj,
+ CORINFO_CALL_INFO* callInfo,
+ CORINFO_CONTEXT_HANDLE* exactContextHandle)
+{
+ // This should be a virtual vtable or virtual stub call.
+ assert(call->IsVirtual());
+
+ // Bail if not optimizing
+ if (opts.MinOpts())
+ {
+ return;
+ }
+
+ // Bail if debuggable codegen
+ if (opts.compDbgCode)
+ {
+ return;
+ }
+
+#if defined(DEBUG)
+ // Bail if devirt is disabled.
+ if (JitConfig.JitEnableDevirtualization() == 0)
+ {
+ return;
+ }
+
+ const bool doPrint = JitConfig.JitPrintDevirtualizedMethods() == 1;
+#endif // DEBUG
+
+ // Fetch information about the virtual method we're calling.
+ CORINFO_METHOD_HANDLE baseMethod = callInfo->hMethod;
+ unsigned baseMethodAttribs = callInfo->methodFlags;
+
+ if (baseMethodAttribs == 0)
+ {
+ // For late devirt we may not have method attributes, so fetch them.
+ baseMethodAttribs = info.compCompHnd->getMethodAttribs(baseMethod);
+ }
+ else
+ {
+#if defined(DEBUG)
+ // Validate that callInfo has up to date method flags
+ const DWORD freshBaseMethodAttribs = info.compCompHnd->getMethodAttribs(baseMethod);
+ assert(freshBaseMethodAttribs == baseMethodAttribs);
+#endif // DEBUG
+ }
+
+ // In R2R mode, we might see virtual stub calls to
+ // non-virtuals. For instance cases where the non-virtual method
+ // is in a different assembly but is called via CALLVIRT. For
+ // verison resilience we must allow for the fact that the method
+ // might become virtual in some update.
+ //
+ // In non-R2R modes CALLVIRT <nonvirtual> will be turned into a
+ // regular call+nullcheck upstream, so we won't reach this
+ // point.
+ if ((baseMethodAttribs & CORINFO_FLG_VIRTUAL) == 0)
+ {
+ assert(call->IsVirtualStub());
+ assert(opts.IsReadyToRun());
+ JITDUMP("\nimpDevirtualizeCall: [R2R] base method not virtual, sorry\n");
+ return;
+ }
+
+ // See what we know about the type of 'this' in the call.
+ bool isExact = false;
+ bool objIsNonNull = false;
+ CORINFO_CLASS_HANDLE objClass = gtGetClassHandle(thisObj, &isExact, &objIsNonNull);
+
+ // Bail if we know nothing.
+ if (objClass == nullptr)
+ {
+ JITDUMP("\nimpDevirtualizeCall: no type available (op=%s)\n", GenTree::OpName(thisObj->OperGet()));
+ return;
+ }
+
+ // Fetch information about the class that introduced the virtual method.
+ CORINFO_CLASS_HANDLE baseClass = info.compCompHnd->getMethodClass(baseMethod);
+ const DWORD baseClassAttribs = info.compCompHnd->getClassAttribs(baseClass);
+
+#if !defined(FEATURE_CORECLR)
+ // If base class is not beforefieldinit then devirtualizing may
+ // cause us to miss a base class init trigger. Spec says we don't
+ // need a trigger for ref class callvirts but desktop seems to
+ // have one anyways. So defer.
+ if ((baseClassAttribs & CORINFO_FLG_BEFOREFIELDINIT) == 0)
+ {
+ JITDUMP("\nimpDevirtualizeCall: base class has precise initialization, sorry\n");
+ return;
+ }
+#endif // FEATURE_CORECLR
+
+ // Is the call an interface call?
+ const bool isInterface = (baseClassAttribs & CORINFO_FLG_INTERFACE) != 0;
+
+ // If the objClass is sealed (final), then we may be able to devirtualize.
+ const DWORD objClassAttribs = info.compCompHnd->getClassAttribs(objClass);
+ const bool objClassIsFinal = (objClassAttribs & CORINFO_FLG_FINAL) != 0;
+
+#if defined(DEBUG)
+ const char* callKind = isInterface ? "interface" : "virtual";
+ const char* objClassNote = "[?]";
+ const char* objClassName = "?objClass";
+ const char* baseClassName = "?baseClass";
+ const char* baseMethodName = "?baseMethod";
+
+ if (verbose || doPrint)
+ {
+ objClassNote = isExact ? " [exact]" : objClassIsFinal ? " [final]" : "";
+ objClassName = info.compCompHnd->getClassName(objClass);
+ baseClassName = info.compCompHnd->getClassName(baseClass);
+ baseMethodName = eeGetMethodName(baseMethod, nullptr);
+
+ if (verbose)
+ {
+ printf("\nimpDevirtualizeCall: Trying to devirtualize %s call:\n"
+ " class for 'this' is %s%s (attrib %08x)\n"
+ " base method is %s::%s\n",
+ callKind, objClassName, objClassNote, objClassAttribs, baseClassName, baseMethodName);
+ }
+ }
+#endif // defined(DEBUG)
+
+ // Bail if obj class is an interface.
+ // See for instance System.ValueTuple`8::GetHashCode, where lcl 0 is System.IValueTupleInternal
+ // IL_021d: ldloc.0
+ // IL_021e: callvirt instance int32 System.Object::GetHashCode()
+ if ((objClassAttribs & CORINFO_FLG_INTERFACE) != 0)
+ {
+ JITDUMP("--- obj class is interface, sorry\n");
+ return;
+ }
+
+ if (isInterface)
+ {
+ assert(call->IsVirtualStub());
+ JITDUMP("--- base class is interface\n");
+ }
+
+ // Fetch the method that would be called based on the declared type of 'this'
+ CORINFO_CONTEXT_HANDLE ownerType = callInfo->contextHandle;
+ CORINFO_METHOD_HANDLE derivedMethod = info.compCompHnd->resolveVirtualMethod(baseMethod, objClass, ownerType);
+
+ // If we failed to get a handle, we can't devirtualize. This can
+ // happen when prejitting, if the devirtualization crosses
+ // servicing bubble boundaries.
+ if (derivedMethod == nullptr)
+ {
+ JITDUMP("--- no derived method, sorry\n");
+ return;
+ }
+
+ // Fetch method attributes to see if method is marked final.
+ const DWORD derivedMethodAttribs = info.compCompHnd->getMethodAttribs(derivedMethod);
+ const bool derivedMethodIsFinal = ((derivedMethodAttribs & CORINFO_FLG_FINAL) != 0);
+
+#if defined(DEBUG)
+ const char* derivedClassName = "?derivedClass";
+ const char* derivedMethodName = "?derivedMethod";
+
+ const char* note = "speculative";
+ if (isExact)
+ {
+ note = "exact";
+ }
+ else if (objClassIsFinal)
+ {
+ note = "final class";
+ }
+ else if (derivedMethodIsFinal)
+ {
+ note = "final method";
+ }
+
+ if (verbose || doPrint)
+ {
+ derivedMethodName = eeGetMethodName(derivedMethod, &derivedClassName);
+ if (verbose)
+ {
+ printf(" devirt to %s::%s -- %s\n", derivedClassName, derivedMethodName, note);
+ gtDispTree(call);
+ }
+ }
+#endif // defined(DEBUG)
+
+ if (!isExact && !objClassIsFinal && !derivedMethodIsFinal)
+ {
+ // Type is not exact, and neither class or method is final.
+ //
+ // We could speculatively devirtualize, but there's no
+ // reason to believe the derived method is the one that
+ // is likely to be invoked.
+ //
+ // If there's currently no further overriding (that is, at
+ // the time of jitting, objClass has no subclasses that
+ // override this method), then perhaps we'd be willing to
+ // make a bet...?
+ JITDUMP(" Class not final or exact, method not final, no devirtualization\n");
+ return;
+ }
+
+ // For interface calls we must have an exact type or final class.
+ if (isInterface && !isExact && !objClassIsFinal)
+ {
+ JITDUMP(" Class not final or exact for interface, no devirtualization\n");
+ return;
+ }
+
+ JITDUMP(" %s; can devirtualize\n", note);
+
+ // Make the updates.
+ call->gtFlags &= ~GTF_CALL_VIRT_VTABLE;
+ call->gtFlags &= ~GTF_CALL_VIRT_STUB;
+ call->gtCallMethHnd = derivedMethod;
+ call->gtCallType = CT_USER_FUNC;
+
+ // Virtual calls include an implicit null check, which we may
+ // now need to make explicit.
+ if (!objIsNonNull)
+ {
+ call->gtFlags |= GTF_CALL_NULLCHECK;
+ }
+
+ // Clear the inline candidate info (may be non-null since
+ // it's a union field used for other things by virtual
+ // stubs)
+ call->gtInlineCandidateInfo = nullptr;
+
+ // Fetch the class that introduced the derived method.
+ //
+ // Note this may not equal objClass, if there is a
+ // final method that objClass inherits.
+ CORINFO_CLASS_HANDLE derivedClass = info.compCompHnd->getMethodClass(derivedMethod);
+
+#ifdef FEATURE_READYTORUN_COMPILER
+ if (opts.IsReadyToRun())
+ {
+ // For R2R, getCallInfo triggers bookkeeping on the zap
+ // side so we need to call it here.
+ //
+ // First, cons up a suitable resolved token.
+ CORINFO_RESOLVED_TOKEN derivedResolvedToken = {};
+
+ derivedResolvedToken.tokenScope = info.compScopeHnd;
+ derivedResolvedToken.tokenContext = callInfo->contextHandle;
+ derivedResolvedToken.token = info.compCompHnd->getMethodDefFromMethod(derivedMethod);
+ derivedResolvedToken.tokenType = CORINFO_TOKENKIND_Method;
+ derivedResolvedToken.hClass = derivedClass;
+ derivedResolvedToken.hMethod = derivedMethod;
+
+ // Look up the new call info.
+ CORINFO_CALL_INFO derivedCallInfo;
+ eeGetCallInfo(&derivedResolvedToken, nullptr, addVerifyFlag(CORINFO_CALLINFO_ALLOWINSTPARAM), &derivedCallInfo);
+
+ // Update the call.
+ call->gtCallMoreFlags &= ~GTF_CALL_M_VIRTSTUB_REL_INDIRECT;
+ call->gtCallMoreFlags &= ~GTF_CALL_M_R2R_REL_INDIRECT;
+ call->setEntryPoint(derivedCallInfo.codePointerLookup.constLookup);
+ }
+#endif // FEATURE_READYTORUN_COMPILER
+
+ // Need to update call info too. This is fragile
+ // but hopefully the derived method conforms to
+ // the base in most other ways.
+ callInfo->hMethod = derivedMethod;
+ callInfo->methodFlags = derivedMethodAttribs;
+ callInfo->contextHandle = MAKE_METHODCONTEXT(derivedMethod);
+
+ // Update context handle.
+ if ((exactContextHandle != nullptr) && (*exactContextHandle != nullptr))
+ {
+ *exactContextHandle = MAKE_METHODCONTEXT(derivedMethod);
+ }
+
+#if defined(DEBUG)
+ if (verbose)
+ {
+ printf("... after devirt...\n");
+ gtDispTree(call);
+ }
+
+ if (doPrint)
+ {
+ printf("Devirtualized %s call to %s:%s; now direct call to %s:%s [%s]\n", callKind, baseClassName,
+ baseMethodName, derivedClassName, derivedMethodName, note);
+ }
+#endif // defined(DEBUG)
+}
+
+//------------------------------------------------------------------------
+// impAllocateToken: create CORINFO_RESOLVED_TOKEN into jit-allocated memory and init it.
+//
+// Arguments:
+// token - init value for the allocated token.
+//
+// Return Value:
+// pointer to token into jit-allocated memory.
+CORINFO_RESOLVED_TOKEN* Compiler::impAllocateToken(CORINFO_RESOLVED_TOKEN token)
+{
+ CORINFO_RESOLVED_TOKEN* memory = (CORINFO_RESOLVED_TOKEN*)compGetMem(sizeof(token));
+ *memory = token;
+ return memory;
+}
diff --git a/src/jit/inline.def b/src/jit/inline.def
index 2a6f5a3f73..ff0b21100e 100644
--- a/src/jit/inline.def
+++ b/src/jit/inline.def
@@ -39,7 +39,6 @@ INLINE_OBSERVATION(HAS_LEAVE, bool, "has leave",
INLINE_OBSERVATION(HAS_MANAGED_VARARGS, bool, "managed varargs", FATAL, CALLEE)
INLINE_OBSERVATION(HAS_NATIVE_VARARGS, bool, "native varargs", FATAL, CALLEE)
INLINE_OBSERVATION(HAS_NO_BODY, bool, "has no body", FATAL, CALLEE)
-INLINE_OBSERVATION(HAS_NOINLINE_CALLEE, bool, "in corelib, noinline callee", FATAL, CALLEE)
INLINE_OBSERVATION(HAS_NULL_FOR_LDELEM, bool, "has null pointer for ldelem", FATAL, CALLEE)
INLINE_OBSERVATION(IS_ARRAY_METHOD, bool, "is array method", FATAL, CALLEE)
INLINE_OBSERVATION(IS_GENERIC_VIRTUAL, bool, "generic virtual", FATAL, CALLEE)
@@ -56,6 +55,7 @@ INLINE_OBSERVATION(NEEDS_SECURITY_CHECK, bool, "needs security check",
INLINE_OBSERVATION(NO_METHOD_INFO, bool, "cannot get method info", FATAL, CALLEE)
INLINE_OBSERVATION(NOT_PROFITABLE_INLINE, bool, "unprofitable inline", FATAL, CALLEE)
INLINE_OBSERVATION(RANDOM_REJECT, bool, "random reject", FATAL, CALLEE)
+INLINE_OBSERVATION(STACK_CRAWL_MARK, bool, "uses stack crawl mark", FATAL, CALLEE)
INLINE_OBSERVATION(STFLD_NEEDS_HELPER, bool, "stfld needs helper", FATAL, CALLEE)
INLINE_OBSERVATION(THROW_WITH_INVALID_STACK, bool, "throw with invalid stack", FATAL, CALLEE)
INLINE_OBSERVATION(TOO_MANY_ARGUMENTS, bool, "too many arguments", FATAL, CALLEE)
diff --git a/src/jit/inline.h b/src/jit/inline.h
index 2634ebe6fa..ee07130676 100644
--- a/src/jit/inline.h
+++ b/src/jit/inline.h
@@ -506,6 +506,7 @@ struct InlineCandidateInfo
var_types fncRetType;
CORINFO_METHOD_HANDLE ilCallerHandle; // the logical IL caller of this inlinee.
CORINFO_CONTEXT_HANDLE exactContextHnd;
+ bool exactContextNeedsRuntimeLookup;
CorInfoInitClassResult initClassResult;
};
@@ -513,31 +514,32 @@ struct InlineCandidateInfo
struct InlArgInfo
{
- unsigned argIsUsed : 1; // is this arg used at all?
- unsigned argIsInvariant : 1; // the argument is a constant or a local variable address
- unsigned argIsLclVar : 1; // the argument is a local variable
- unsigned argIsThis : 1; // the argument is the 'this' pointer
- unsigned argHasSideEff : 1; // the argument has side effects
- unsigned argHasGlobRef : 1; // the argument has a global ref
- unsigned argHasTmp : 1; // the argument will be evaluated to a temp
- unsigned argIsByRefToStructLocal : 1; // Is this arg an address of a struct local or a normed struct local or a
- // field in them?
- unsigned argHasLdargaOp : 1; // Is there LDARGA(s) operation on this argument?
- unsigned argHasStargOp : 1; // Is there STARG(s) operation on this argument?
-
- unsigned argTmpNum; // the argument tmp number
- GenTreePtr argNode;
- GenTreePtr argBashTmpNode; // tmp node created, if it may be replaced with actual arg
+ GenTreePtr argNode; // caller node for this argument
+ GenTreePtr argBashTmpNode; // tmp node created, if it may be replaced with actual arg
+ unsigned argTmpNum; // the argument tmp number
+ unsigned argIsUsed : 1; // is this arg used at all?
+ unsigned argIsInvariant : 1; // the argument is a constant or a local variable address
+ unsigned argIsLclVar : 1; // the argument is a local variable
+ unsigned argIsThis : 1; // the argument is the 'this' pointer
+ unsigned argHasSideEff : 1; // the argument has side effects
+ unsigned argHasGlobRef : 1; // the argument has a global ref
+ unsigned argHasTmp : 1; // the argument will be evaluated to a temp
+ unsigned argHasLdargaOp : 1; // Is there LDARGA(s) operation on this argument?
+ unsigned argHasStargOp : 1; // Is there STARG(s) operation on this argument?
+ unsigned argIsByRefToStructLocal : 1; // Is this arg an address of a struct local or a normed struct local or a
+ // field in them?
};
-// InlArgInfo describes inline candidate local variable properties.
+// InlLclVarInfo describes inline candidate argument and local variable properties.
struct InlLclVarInfo
{
- var_types lclTypeInfo;
typeInfo lclVerTypeInfo;
- bool lclHasLdlocaOp; // Is there LDLOCA(s) operation on this argument?
- bool lclIsPinned;
+ var_types lclTypeInfo;
+ unsigned lclHasLdlocaOp : 1; // Is there LDLOCA(s) operation on this local?
+ unsigned lclHasStlocOp : 1; // Is there a STLOC on this local?
+ unsigned lclHasMultipleStlocOp : 1; // Is there more than one STLOC on this local
+ unsigned lclIsPinned : 1;
};
// InlineInfo provides detailed information about a particular inline candidate.
@@ -563,8 +565,16 @@ struct InlineInfo
int lclTmpNum[MAX_INL_LCLS]; // map local# -> temp# (-1 if unused)
InlLclVarInfo lclVarInfo[MAX_INL_LCLS + MAX_INL_ARGS + 1]; // type information from local sig
- bool thisDereferencedFirst;
- bool hasPinnedLocals;
+ unsigned numberOfGcRefLocals; // Number of TYP_REF and TYP_BYREF locals
+
+ bool HasGcRefLocals() const
+ {
+ return numberOfGcRefLocals > 0;
+ }
+
+ bool thisDereferencedFirst;
+ unsigned typeContextArg;
+
#ifdef FEATURE_SIMD
bool hasSIMDTypeArgLocalOrReturn;
#endif // FEATURE_SIMD
diff --git a/src/jit/instr.cpp b/src/jit/instr.cpp
index 7332ba6c71..e2435cab28 100644
--- a/src/jit/instr.cpp
+++ b/src/jit/instr.cpp
@@ -797,7 +797,6 @@ void CodeGen::sched_AM(instruction ins,
}
else if (addr->IsCnsIntOrI())
{
-#ifdef RELOC_SUPPORT
// Do we need relocations?
if (compiler->opts.compReloc && addr->IsIconHandle())
{
@@ -806,7 +805,6 @@ void CodeGen::sched_AM(instruction ins,
// so that we can uniquely identify the handle
assert(offs <= 4);
}
-#endif
ssize_t disp = addr->gtIntCon.gtIconVal + offs;
if ((insType == eIT_Store) && (ireg != REG_NA))
{
@@ -1113,7 +1111,6 @@ void CodeGen::sched_AM(instruction ins,
assert(addr->IsCnsIntOrI());
-#ifdef RELOC_SUPPORT
// Do we need relocations?
if (compiler->opts.compReloc && addr->IsIconHandle())
{
@@ -1122,7 +1119,7 @@ void CodeGen::sched_AM(instruction ins,
// so that we can uniquely identify the handle
assert(offs <= 4);
}
-#endif
+
reg = REG_NA;
ssize_t disp = addr->gtIntCon.gtIconVal + offs;
@@ -1251,9 +1248,12 @@ void CodeGen::sched_AM(instruction ins,
* Emit a "call [r/m]" instruction (the r/m operand given by a tree).
*/
-void CodeGen::instEmit_indCall(GenTreePtr call,
+// clang-format off
+void CodeGen::instEmit_indCall(GenTreeCall* call,
size_t argSize,
- emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize))
+ emitAttr retSize
+ MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize))
+// clang-format on
{
GenTreePtr addr;
@@ -1266,18 +1266,16 @@ void CodeGen::instEmit_indCall(GenTreePtr call,
CORINFO_SIG_INFO* sigInfo = nullptr;
- assert(call->gtOper == GT_CALL);
-
/* Get hold of the function address */
- assert(call->gtCall.gtCallType == CT_INDIRECT);
- addr = call->gtCall.gtCallAddr;
+ assert(call->gtCallType == CT_INDIRECT);
+ addr = call->gtCallAddr;
assert(addr);
#ifdef DEBUG
// Pass the call signature information from the GenTree node so the emitter can associate
// native call sites with the signatures they were generated from.
- sigInfo = call->gtCall.callSig;
+ sigInfo = call->callSig;
#endif // DEBUG
#if CPU_LOAD_STORE_ARCH
@@ -1290,11 +1288,19 @@ void CodeGen::instEmit_indCall(GenTreePtr call,
{
ssize_t funcPtr = addr->gtIntCon.gtIconVal;
+ // clang-format off
getEmitter()->emitIns_Call(emitter::EC_FUNC_ADDR,
NULL, // methHnd
- INDEBUG_LDISASM_COMMA(sigInfo)(void*) funcPtr, argSize,
- retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
- gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+ INDEBUG_LDISASM_COMMA(sigInfo)
+ (void*) funcPtr,
+ argSize,
+ retSize
+ MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+ gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur);
+ // clang-format on
+
return;
}
}
@@ -1347,11 +1353,19 @@ void CodeGen::instEmit_indCall(GenTreePtr call,
{
ssize_t funcPtr = addr->gtIntCon.gtIconVal;
+ // clang-format off
getEmitter()->emitIns_Call(emitter::EC_FUNC_ADDR,
nullptr, // methHnd
- INDEBUG_LDISASM_COMMA(sigInfo)(void*) funcPtr, argSize,
- retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
- gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+ INDEBUG_LDISASM_COMMA(sigInfo)
+ (void*) funcPtr,
+ argSize,
+ retSize
+ MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+ gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur);
+ // clang-format on
+
return;
}
}
@@ -1386,7 +1400,7 @@ void CodeGen::instEmit_indCall(GenTreePtr call,
INDEBUG(bool yes =)
genCreateAddrMode(addr, -1, true, RBM_NONE, &rev, &rv1, &rv2, &mul, &cns);
- INDEBUG(PREFIX_ASSUME(yes)); // since we have called genMakeAddressable() on call->gtCall.gtCallAddr
+ INDEBUG(PREFIX_ASSUME(yes)); // since we have called genMakeAddressable() on call->gtCallAddr
/* Get the additional operands if any */
@@ -1409,14 +1423,23 @@ void CodeGen::instEmit_indCall(GenTreePtr call,
#endif // CPU_LOAD_STORE_ARCH
+ // clang-format off
getEmitter()->emitIns_Call(emitCallType,
nullptr, // methHnd
- INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr
- argSize, retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
- gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+ INDEBUG_LDISASM_COMMA(sigInfo)
+ nullptr, // addr
+ argSize,
+ retSize
+ MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+ gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur,
BAD_IL_OFFSET, // ilOffset
- brg, xrg, mul,
+ brg,
+ xrg,
+ mul,
cns); // addressing mode values
+ // clang-format on
}
#ifdef LEGACY_BACKEND
@@ -2326,7 +2349,7 @@ void CodeGen::inst_RV_TT(instruction ins,
#if CPU_LOAD_STORE_ARCH
if (ins == INS_mov)
{
-#if defined(_TARGET_ARM_)
+#if defined(_TARGET_ARM_) && CPU_LONG_USES_REGPAIR
if (tree->TypeGet() != TYP_LONG)
{
ins = ins_Move_Extend(tree->TypeGet(), (tree->gtFlags & GTF_REG_VAL) != 0);
@@ -2341,7 +2364,7 @@ void CodeGen::inst_RV_TT(instruction ins,
ins = ins_Move_Extend(TYP_INT,
(tree->gtFlags & GTF_REG_VAL) != 0 && genRegPairHi(tree->gtRegPair) != REG_STK);
}
-#elif defined(_TARGET_ARM64_)
+#elif defined(_TARGET_ARM_) || defined(_TARGET_ARM64_)
ins = ins_Move_Extend(tree->TypeGet(), (tree->gtFlags & GTF_REG_VAL) != 0);
#else
NYI("CodeGen::inst_RV_TT with INS_mov");
@@ -2485,9 +2508,11 @@ AGAIN:
default:
regNumber regTmp;
#ifndef LEGACY_BACKEND
+#if CPU_LONG_USES_REGPAIR
if (tree->TypeGet() == TYP_LONG)
regTmp = (offs == 0) ? genRegPairLo(tree->gtRegPair) : genRegPairHi(tree->gtRegPair);
else
+#endif // CPU_LONG_USES_REGPAIR
regTmp = tree->gtRegNum;
#else // LEGACY_BACKEND
if (varTypeIsFloating(tree))
@@ -2595,17 +2620,6 @@ AGAIN:
constVal = (ssize_t)(tree->gtLngCon.gtLconVal >> 32);
size = EA_4BYTE;
}
-#ifndef LEGACY_BACKEND
-#ifdef _TARGET_ARM_
- if ((ins != INS_mov) && !arm_Valid_Imm_For_Instr(ins, constVal, flags))
- {
- regNumber constReg = (offs == 0) ? genRegPairLo(tree->gtRegPair) : genRegPairHi(tree->gtRegPair);
- instGen_Set_Reg_To_Imm(size, constReg, constVal);
- getEmitter()->emitIns_R_R(ins, size, reg, constReg, flags);
- break;
- }
-#endif // _TARGET_ARM_
-#endif // !LEGACY_BACKEND
inst_RV_IV(ins, reg, constVal, size, flags);
break;
@@ -3573,9 +3587,13 @@ instruction CodeGen::ins_FloatSqrt(var_types type)
{
ins = INS_sqrtsd;
}
+ else if (type == TYP_FLOAT)
+ {
+ ins = INS_sqrtss;
+ }
else
{
- // Right now sqrt of scalar single is not needed.
+ assert(!"ins_FloatSqrt: Unsupported type");
unreached();
}
@@ -3873,9 +3891,7 @@ void CodeGen::instGen_Set_Reg_To_Zero(emitAttr size, regNumber reg, insFlags fla
*/
void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags)
{
-#if RELOC_SUPPORT
if (!compiler->opts.compReloc)
-#endif // RELOC_SUPPORT
{
size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs
}
diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h
index 8ab3a845ba..729bece554 100644
--- a/src/jit/instrsxarch.h
+++ b/src/jit/instrsxarch.h
@@ -232,9 +232,10 @@ INST3( maxsd, "maxsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x5F)) /
INST3( xorpd, "xorpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x57)) // XOR packed doubles
INST3( andps, "andps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x54)) // AND packed singles
INST3( andpd, "andpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x54)) // AND packed doubles
-INST3( sqrtsd, "sqrtsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x51)) // Sqrt of a scalar double
-INST3( sqrtps, "sqrtps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x51)) // Sqrt of a packed float
-INST3( sqrtpd, "sqrtpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x51)) // Sqrt of a packed double
+INST3( sqrtps, "sqrtps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x51)) // Sqrt of packed singles
+INST3( sqrtss, "sqrtss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x51)) // Sqrt of scalar single
+INST3( sqrtpd, "sqrtpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x51)) // Sqrt of packed doubles
+INST3( sqrtsd, "sqrtsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x51)) // Sqrt of scalar double
INST3( andnps, "andnps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x55)) // And-Not packed singles
INST3( andnpd, "andnpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x55)) // And-Not packed doubles
INST3( orps, "orps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x56)) // Or packed singles
@@ -310,7 +311,6 @@ INST3(LAST_SSE2_INSTRUCTION, "LAST_SSE2_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE
#ifndef LEGACY_BACKEND
INST3(FIRST_SSE4_INSTRUCTION, "FIRST_SSE4_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
-// Most of the following instructions should be included in the method Is4ByteAVXInstruction()
// enum name FP updmode rf wf MR MI RM
INST3( dpps, "dpps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x40)) // Packed bit-wise AND NOT of two xmm regs
INST3( dppd, "dppd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x41)) // Packed bit-wise AND NOT of two xmm regs
@@ -323,6 +323,14 @@ INST3( phaddd, "phaddd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SS
INST3( pabsb, "pabsb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1C)) // Packed absolute value of bytes
INST3( pabsw, "pabsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1D)) // Packed absolute value of 16-bit integers
INST3( pabsd, "pabsd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1E)) // Packed absolute value of 32-bit integers
+INST3( pminsb, "pminsb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x38)) // packed minimum signed bytes
+INST3( pminsd, "pminsd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x39)) // packed minimum 32-bit signed integers
+INST3( pminuw, "pminuw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x3A)) // packed minimum 16-bit unsigned integers
+INST3( pminud, "pminud" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x3B)) // packed minimum 32-bit unsigned integers
+INST3( pmaxsb, "pmaxsb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x3C)) // packed maximum signed bytes
+INST3( pmaxsd, "pmaxsd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x3D)) // packed maximum 32-bit signed integers
+INST3( pmaxuw, "pmaxuw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x3E)) // packed maximum 16-bit unsigned integers
+INST3( pmaxud, "pmaxud" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x3F)) // packed maximum 32-bit unsigned integers
INST3(LAST_SSE4_INSTRUCTION, "LAST_SSE4_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
INST3(FIRST_AVX_INSTRUCTION, "FIRST_AVX_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
diff --git a/src/jit/jit.h b/src/jit/jit.h
index 05b154e15a..ee3f8c9117 100644
--- a/src/jit/jit.h
+++ b/src/jit/jit.h
@@ -172,6 +172,31 @@
#define _TARGET_ARMARCH_
#endif
+// If the UNIX_AMD64_ABI is defined make sure that _TARGET_AMD64_ is also defined.
+#if defined(UNIX_AMD64_ABI)
+#if !defined(_TARGET_AMD64_)
+#error When UNIX_AMD64_ABI is defined you must define _TARGET_AMD64_ defined as well.
+#endif
+#endif
+
+// If the UNIX_X86_ABI is defined make sure that _TARGET_X86_ is also defined.
+#if defined(UNIX_X86_ABI)
+#if !defined(_TARGET_X86_)
+#error When UNIX_X86_ABI is defined you must define _TARGET_X86_ defined as well.
+#endif
+#endif
+
+#if defined(PLATFORM_UNIX)
+#define _HOST_UNIX_
+#endif
+
+// Are we generating code to target Unix? This is true if we will run on Unix (_HOST_UNIX_ is defined).
+// It's also true if we are building an altjit targetting Unix, which we determine by checking if either
+// UNIX_AMD64_ABI or UNIX_X86_ABI is defined.
+#if defined(_HOST_UNIX_) || ((defined(UNIX_AMD64_ABI) || defined(UNIX_X86_ABI)) && defined(ALT_JIT))
+#define _TARGET_UNIX_
+#endif
+
// --------------------------------------------------------------------------------
// IMAGE_FILE_MACHINE_TARGET
// --------------------------------------------------------------------------------
@@ -190,7 +215,14 @@
// Include the AMD64 unwind codes when appropriate.
#if defined(_TARGET_AMD64_)
+// We need to temporarily set PLATFORM_UNIX, if necessary, to get the Unix-specific unwind codes.
+#if defined(_TARGET_UNIX_) && !defined(_HOST_UNIX_)
+#define PLATFORM_UNIX
+#endif
#include "win64unwind.h"
+#if defined(_TARGET_UNIX_) && !defined(_HOST_UNIX_)
+#undef PLATFORM_UNIX
+#endif
#endif
// Macros for defining strongly-typed enums. Use as follows:
@@ -216,23 +248,6 @@
#define __PLACEMENT_NEW_INLINE // don't bring in the global placement new, it is easy to make a mistake
// with our new(compiler*) pattern.
-#if COR_JIT_EE_VER > 460
-#define NO_CLRCONFIG // Don't bring in the usual CLRConfig infrastructure, since the JIT uses the JIT/EE
- // interface to retrieve config values.
-
-// This is needed for contract.inl when FEATURE_STACK_PROBE is enabled.
-struct CLRConfig
-{
- static struct ConfigKey
- {
- } EXTERNAL_NO_SO_NOT_MAINLINE;
- static DWORD GetConfigValue(const ConfigKey& key)
- {
- return 0;
- }
-};
-#endif
-
#include "utilcode.h" // this defines assert as _ASSERTE
#include "host.h" // this redefines assert for the JIT to use assertAbort
#include "utils.h"
@@ -727,17 +742,6 @@ private:
/*****************************************************************************/
-#define SECURITY_CHECK 1
-#define VERIFY_IMPORTER 1
-
-/*****************************************************************************/
-
-#if !defined(RELOC_SUPPORT)
-#define RELOC_SUPPORT 1
-#endif
-
-/*****************************************************************************/
-
#include "error.h"
/*****************************************************************************/
diff --git a/src/jit/jit.settings.targets b/src/jit/jit.settings.targets
index 8749b80242..bde639556b 100644
--- a/src/jit/jit.settings.targets
+++ b/src/jit/jit.settings.targets
@@ -120,6 +120,9 @@
<CppCompile Include="..\TargetArm.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='True'" Include="..\registerfp.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\DecomposeLongs.cpp" />
+ <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\LowerArmArch.cpp" />
+ <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\lsraarmarch.cpp" />
+ <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\CodeGenArmArch.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\LowerArm.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\lsraarm.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\CodeGenArm.cpp" />
@@ -129,6 +132,9 @@
<!-- ARM64 target is always RyuJIT backend -->
<CppCompile Include="..\emitarm64.cpp" />
<CppCompile Include="..\TargetArm64.cpp" />
+ <CppCompile Include="..\LowerArmArch.cpp" />
+ <CppCompile Include="..\lsraarmarch.cpp" />
+ <CppCompile Include="..\CodeGenArmArch.cpp" />
<CppCompile Include="..\LowerArm64.cpp" />
<CppCompile Include="..\lsraarm64.cpp" />
<CppCompile Include="..\CodeGenArm64.cpp" />
diff --git a/src/jit/jitconfigvalues.h b/src/jit/jitconfigvalues.h
index 4623fe8268..624ad1a191 100644
--- a/src/jit/jitconfigvalues.h
+++ b/src/jit/jitconfigvalues.h
@@ -6,6 +6,10 @@
#error CONFIG_INTEGER, CONFIG_STRING, and CONFIG_METHODSET must be defined before including this file.
#endif // !defined(CONFIG_INTEGER) || !defined(CONFIG_STRING) || !defined(CONFIG_METHODSET)
+#ifdef DEBUG
+#define OPT_CONFIG // Enable optimization level configuration.
+#endif
+
#if defined(DEBUG)
CONFIG_INTEGER(AltJitLimit, W("AltJitLimit"), 0) // Max number of functions to use altjit for (decimal)
CONFIG_INTEGER(AltJitSkipOnAssert, W("AltJitSkipOnAssert"), 0) // If AltJit hits an assert, fall back to the fallback
@@ -36,13 +40,6 @@ CONFIG_INTEGER(JitDebugLogLoopCloning, W("JitDebugLogLoopCloning"), 0) // In deb
CONFIG_INTEGER(JitDefaultFill, W("JitDefaultFill"), 0xff) // In debug builds, initialize the memory allocated by the nra
// with this byte.
CONFIG_INTEGER(JitDirectAlloc, W("JitDirectAlloc"), 0)
-CONFIG_INTEGER(JitDoAssertionProp, W("JitDoAssertionProp"), 1) // Perform assertion propagation optimization
-CONFIG_INTEGER(JitDoCopyProp, W("JitDoCopyProp"), 1) // Perform copy propagation on variables that appear redundant
-CONFIG_INTEGER(JitDoEarlyProp, W("JitDoEarlyProp"), 1) // Perform Early Value Propagataion
-CONFIG_INTEGER(JitDoLoopHoisting, W("JitDoLoopHoisting"), 1) // Perform loop hoisting on loop invariant values
-CONFIG_INTEGER(JitDoRangeAnalysis, W("JitDoRangeAnalysis"), 1) // Perform range check analysis
-CONFIG_INTEGER(JitDoSsa, W("JitDoSsa"), 1) // Perform Static Single Assignment (SSA) numbering on the variables
-CONFIG_INTEGER(JitDoValueNumber, W("JitDoValueNumber"), 1) // Perform value numbering on method expressions
CONFIG_INTEGER(JitDoubleAlign, W("JitDoubleAlign"), 1)
CONFIG_INTEGER(JitDumpASCII, W("JitDumpASCII"), 1) // Uses only ASCII characters in tree dumps
CONFIG_INTEGER(JitDumpFgDot, W("JitDumpFgDot"), 0) // Set to non-zero to emit Dot instead of Xml Flowgraph dump
@@ -51,6 +48,9 @@ CONFIG_INTEGER(JitDumpToDebugger, W("JitDumpToDebugger"), 0) // Output JitDu
CONFIG_INTEGER(JitDumpVerboseSsa, W("JitDumpVerboseSsa"), 0) // Produce especially verbose dump output for SSA
CONFIG_INTEGER(JitDumpVerboseTrees, W("JitDumpVerboseTrees"), 0) // Enable more verbose tree dumps
CONFIG_INTEGER(JitEmitPrintRefRegs, W("JitEmitPrintRefRegs"), 0)
+CONFIG_INTEGER(JitEnableDevirtualization, W("JitEnableDevirtualization"), 1) // Enable devirtualization in importer
+CONFIG_INTEGER(JitEnableLateDevirtualization, W("JitEnableLateDevirtualization"), 1) // Enable devirtualization after
+ // inlining
CONFIG_INTEGER(JitExpensiveDebugCheckLevel, W("JitExpensiveDebugCheckLevel"), 0) // Level indicates how much checking
// beyond the default to do in debug
// builds (currently 1-2)
@@ -97,6 +97,7 @@ CONFIG_INTEGER(JitOrder, W("JitOrder"), 0)
CONFIG_INTEGER(JitPInvokeCheckEnabled, W("JITPInvokeCheckEnabled"), 0)
CONFIG_INTEGER(JitPInvokeEnabled, W("JITPInvokeEnabled"), 1)
CONFIG_INTEGER(JitPrintInlinedMethods, W("JitPrintInlinedMethods"), 0)
+CONFIG_INTEGER(JitPrintDevirtualizedMethods, W("JitPrintDevirtualizedMethods"), 0)
CONFIG_INTEGER(JitRequired, W("JITRequired"), -1)
CONFIG_INTEGER(JitRoundFloat, W("JITRoundFloat"), DEFAULT_ROUND_LEVEL)
CONFIG_INTEGER(JitSkipArrayBoundCheck, W("JitSkipArrayBoundCheck"), 0)
@@ -154,12 +155,10 @@ CONFIG_METHODSET(JitNoProcedureSplittingEH, W("JitNoProcedureSplittingEH")) // D
// exception handling
CONFIG_METHODSET(JitStressOnly, W("JitStressOnly")) // Internal Jit stress mode: stress only the specified method(s)
CONFIG_METHODSET(JitUnwindDump, W("JitUnwindDump")) // Dump the unwind codes for the method
-CONFIG_METHODSET(JitOptRepeat, W("JitOptRepeat")) // Runs optimizer multiple times on the method
-CONFIG_INTEGER(JitOptRepeatCount, W("JitOptRepeatCount"), 2) // Number of times to repeat opts when repeating
-CONFIG_METHODSET(NgenDisasm, W("NgenDisasm")) // Same as JitDisasm, but for ngen
-CONFIG_METHODSET(NgenDump, W("NgenDump")) // Same as JitDump, but for ngen
-CONFIG_METHODSET(NgenDumpIR, W("NgenDumpIR")) // Same as JitDumpIR, but for ngen
-CONFIG_METHODSET(NgenEHDump, W("NgenEHDump")) // Dump the EH table for the method, as reported to the VM
+CONFIG_METHODSET(NgenDisasm, W("NgenDisasm")) // Same as JitDisasm, but for ngen
+CONFIG_METHODSET(NgenDump, W("NgenDump")) // Same as JitDump, but for ngen
+CONFIG_METHODSET(NgenDumpIR, W("NgenDumpIR")) // Same as JitDumpIR, but for ngen
+CONFIG_METHODSET(NgenEHDump, W("NgenEHDump")) // Dump the EH table for the method, as reported to the VM
CONFIG_METHODSET(NgenGCDump, W("NgenGCDump"))
CONFIG_METHODSET(NgenUnwindDump, W("NgenUnwindDump")) // Dump the unwind codes for the method
CONFIG_STRING(JitDumpFg, W("JitDumpFg")) // Dumps Xml/Dot Flowgraph for specified method
@@ -219,6 +218,15 @@ CONFIG_INTEGER(JitEnableNoWayAssert, W("JitEnableNoWayAssert"), 0)
CONFIG_INTEGER(JitEnableNoWayAssert, W("JitEnableNoWayAssert"), 1)
#endif // !defined(DEBUG) && !defined(_DEBUG)
+#if !defined(JIT32_GCENCODER)
+#if defined(_TARGET_AMD64_) && defined(FEATURE_CORECLR)
+#define JitMinOptsTrackGCrefs_Default 0 // Not tracking GC refs in MinOpts is new behavior
+#else
+#define JitMinOptsTrackGCrefs_Default 1
+#endif
+CONFIG_INTEGER(JitMinOptsTrackGCrefs, W("JitMinOptsTrackGCrefs"), JitMinOptsTrackGCrefs_Default) // Track GC roots
+#endif // !defined(JIT32_GCENCODER)
+
// The following should be wrapped inside "#if MEASURE_MEM_ALLOC / #endif", but
// some files include this one without bringing in the definitions from "jit.h"
// so we don't always know what the "true" value of that flag should be. For now
@@ -236,6 +244,19 @@ CONFIG_INTEGER(JitInlineSIMDMultiplier, W("JitInlineSIMDMultiplier"), 3)
CONFIG_INTEGER(JitNoRngChks, W("JitNoRngChks"), 0) // If 1, don't generate range checks
#endif // defined(FEATURE_ENABLE_NO_RANGE_CHECKS)
+#if defined(OPT_CONFIG)
+CONFIG_INTEGER(JitDoAssertionProp, W("JitDoAssertionProp"), 1) // Perform assertion propagation optimization
+CONFIG_INTEGER(JitDoCopyProp, W("JitDoCopyProp"), 1) // Perform copy propagation on variables that appear redundant
+CONFIG_INTEGER(JitDoEarlyProp, W("JitDoEarlyProp"), 1) // Perform Early Value Propagataion
+CONFIG_INTEGER(JitDoLoopHoisting, W("JitDoLoopHoisting"), 1) // Perform loop hoisting on loop invariant values
+CONFIG_INTEGER(JitDoRangeAnalysis, W("JitDoRangeAnalysis"), 1) // Perform range check analysis
+CONFIG_INTEGER(JitDoSsa, W("JitDoSsa"), 1) // Perform Static Single Assignment (SSA) numbering on the variables
+CONFIG_INTEGER(JitDoValueNumber, W("JitDoValueNumber"), 1) // Perform value numbering on method expressions
+
+CONFIG_METHODSET(JitOptRepeat, W("JitOptRepeat")) // Runs optimizer multiple times on the method
+CONFIG_INTEGER(JitOptRepeatCount, W("JitOptRepeatCount"), 2) // Number of times to repeat opts when repeating
+#endif // defined(OPT_CONFIG)
+
CONFIG_INTEGER(JitRegisterFP, W("JitRegisterFP"), 3) // Control FP enregistration
CONFIG_INTEGER(JitTelemetry, W("JitTelemetry"), 1) // If non-zero, gather JIT telemetry data
CONFIG_INTEGER(JitVNMapSelBudget, W("JitVNMapSelBudget"), 100) // Max # of MapSelect's considered for a particular
@@ -250,6 +271,9 @@ CONFIG_STRING(AltJitExcludeAssemblies,
W("AltJitExcludeAssemblies")) // Do not use AltJit on this semicolon-delimited list of assemblies.
#endif // defined(ALT_JIT)
+CONFIG_INTEGER(JitMeasureIR, W("JitMeasureIR"), 0) // If set, measure the IR size after some phases and report it in
+ // the time log.
+
CONFIG_STRING(JitFuncInfoFile, W("JitFuncInfoLogFile")) // If set, gather JIT function info and write to this file.
CONFIG_STRING(JitTimeLogCsv, W("JitTimeLogCsv")) // If set, gather JIT throughput data and write to a CSV file. This
// mode must be used in internal retail builds.
diff --git a/src/jit/jitee.h b/src/jit/jitee.h
index f9bd83f5bb..7b0e4a02dc 100644
--- a/src/jit/jitee.h
+++ b/src/jit/jitee.h
@@ -78,6 +78,8 @@ public:
JIT_FLAG_USE_PINVOKE_HELPERS = 36, // The JIT should use the PINVOKE_{BEGIN,END} helpers instead of emitting inline transitions
JIT_FLAG_REVERSE_PINVOKE = 37, // The JIT should insert REVERSE_PINVOKE_{ENTER,EXIT} helpers into method prolog/epilog
JIT_FLAG_DESKTOP_QUIRKS = 38, // The JIT should generate desktop-quirk-compatible code
+ JIT_FLAG_TIER0 = 39, // This is the initial tier for tiered compilation which should generate code as quickly as possible
+ JIT_FLAG_TIER1 = 40, // This is the final tier (for now) for tiered compilation which should generate high quality code
};
// clang-format on
@@ -127,72 +129,6 @@ public:
return m_jitFlags == 0;
}
-#if COR_JIT_EE_VERSION <= 460
-
- void SetFromOldFlags(unsigned corJitFlags, unsigned corJitFlags2)
- {
- Reset();
-
-#define CONVERT_OLD_FLAG(oldf, newf) \
- if ((corJitFlags & (oldf)) != 0) \
- this->Set(JitFlags::newf);
-#define CONVERT_OLD_FLAG2(oldf, newf) \
- if ((corJitFlags2 & (oldf)) != 0) \
- this->Set(JitFlags::newf);
-
- CONVERT_OLD_FLAG(CORJIT_FLG_SPEED_OPT, JIT_FLAG_SPEED_OPT)
- CONVERT_OLD_FLAG(CORJIT_FLG_SIZE_OPT, JIT_FLAG_SIZE_OPT)
- CONVERT_OLD_FLAG(CORJIT_FLG_DEBUG_CODE, JIT_FLAG_DEBUG_CODE)
- CONVERT_OLD_FLAG(CORJIT_FLG_DEBUG_EnC, JIT_FLAG_DEBUG_EnC)
- CONVERT_OLD_FLAG(CORJIT_FLG_DEBUG_INFO, JIT_FLAG_DEBUG_INFO)
- CONVERT_OLD_FLAG(CORJIT_FLG_MIN_OPT, JIT_FLAG_MIN_OPT)
- CONVERT_OLD_FLAG(CORJIT_FLG_GCPOLL_CALLS, JIT_FLAG_GCPOLL_CALLS)
- CONVERT_OLD_FLAG(CORJIT_FLG_MCJIT_BACKGROUND, JIT_FLAG_MCJIT_BACKGROUND)
-
-#if defined(_TARGET_X86_)
-
- CONVERT_OLD_FLAG(CORJIT_FLG_PINVOKE_RESTORE_ESP, JIT_FLAG_PINVOKE_RESTORE_ESP)
- CONVERT_OLD_FLAG(CORJIT_FLG_TARGET_P4, JIT_FLAG_TARGET_P4)
- CONVERT_OLD_FLAG(CORJIT_FLG_USE_FCOMI, JIT_FLAG_USE_FCOMI)
- CONVERT_OLD_FLAG(CORJIT_FLG_USE_CMOV, JIT_FLAG_USE_CMOV)
- CONVERT_OLD_FLAG(CORJIT_FLG_USE_SSE2, JIT_FLAG_USE_SSE2)
-
-#elif defined(_TARGET_AMD64_)
-
- CONVERT_OLD_FLAG(CORJIT_FLG_USE_SSE3_4, JIT_FLAG_USE_SSE3_4)
- CONVERT_OLD_FLAG(CORJIT_FLG_USE_AVX, JIT_FLAG_USE_AVX)
- CONVERT_OLD_FLAG(CORJIT_FLG_USE_AVX2, JIT_FLAG_USE_AVX2)
- CONVERT_OLD_FLAG(CORJIT_FLG_USE_AVX_512, JIT_FLAG_USE_AVX_512)
- CONVERT_OLD_FLAG(CORJIT_FLG_FEATURE_SIMD, JIT_FLAG_FEATURE_SIMD)
-
-#endif // !defined(_TARGET_X86_) && !defined(_TARGET_AMD64_)
-
- CONVERT_OLD_FLAG(CORJIT_FLG_MAKEFINALCODE, JIT_FLAG_MAKEFINALCODE)
- CONVERT_OLD_FLAG(CORJIT_FLG_READYTORUN, JIT_FLAG_READYTORUN)
- CONVERT_OLD_FLAG(CORJIT_FLG_PROF_ENTERLEAVE, JIT_FLAG_PROF_ENTERLEAVE)
- CONVERT_OLD_FLAG(CORJIT_FLG_PROF_REJIT_NOPS, JIT_FLAG_PROF_REJIT_NOPS)
- CONVERT_OLD_FLAG(CORJIT_FLG_PROF_NO_PINVOKE_INLINE, JIT_FLAG_PROF_NO_PINVOKE_INLINE)
- CONVERT_OLD_FLAG(CORJIT_FLG_SKIP_VERIFICATION, JIT_FLAG_SKIP_VERIFICATION)
- CONVERT_OLD_FLAG(CORJIT_FLG_PREJIT, JIT_FLAG_PREJIT)
- CONVERT_OLD_FLAG(CORJIT_FLG_RELOC, JIT_FLAG_RELOC)
- CONVERT_OLD_FLAG(CORJIT_FLG_IMPORT_ONLY, JIT_FLAG_IMPORT_ONLY)
- CONVERT_OLD_FLAG(CORJIT_FLG_IL_STUB, JIT_FLAG_IL_STUB)
- CONVERT_OLD_FLAG(CORJIT_FLG_PROCSPLIT, JIT_FLAG_PROCSPLIT)
- CONVERT_OLD_FLAG(CORJIT_FLG_BBINSTR, JIT_FLAG_BBINSTR)
- CONVERT_OLD_FLAG(CORJIT_FLG_BBOPT, JIT_FLAG_BBOPT)
- CONVERT_OLD_FLAG(CORJIT_FLG_FRAMED, JIT_FLAG_FRAMED)
- CONVERT_OLD_FLAG(CORJIT_FLG_ALIGN_LOOPS, JIT_FLAG_ALIGN_LOOPS)
- CONVERT_OLD_FLAG(CORJIT_FLG_PUBLISH_SECRET_PARAM, JIT_FLAG_PUBLISH_SECRET_PARAM)
- CONVERT_OLD_FLAG(CORJIT_FLG_GCPOLL_INLINE, JIT_FLAG_GCPOLL_INLINE)
-
- CONVERT_OLD_FLAG2(CORJIT_FLG2_SAMPLING_JIT_BACKGROUND, JIT_FLAG_SAMPLING_JIT_BACKGROUND)
-
-#undef CONVERT_OLD_FLAG
-#undef CONVERT_OLD_FLAG2
- }
-
-#else // COR_JIT_EE_VERSION > 460
-
void SetFromFlags(CORJIT_FLAGS flags)
{
// We don't want to have to check every one, so we assume it is exactly the same values as the JitFlag
@@ -253,12 +189,12 @@ public:
FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_USE_PINVOKE_HELPERS, JIT_FLAG_USE_PINVOKE_HELPERS);
FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_REVERSE_PINVOKE, JIT_FLAG_REVERSE_PINVOKE);
FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_DESKTOP_QUIRKS, JIT_FLAG_DESKTOP_QUIRKS);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_TIER0, JIT_FLAG_TIER0);
+ FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_TIER1, JIT_FLAG_TIER1);
#undef FLAGS_EQUAL
}
-#endif // COR_JIT_EE_VERSION > 460
-
private:
unsigned __int64 m_jitFlags;
};
diff --git a/src/jit/jitgcinfo.h b/src/jit/jitgcinfo.h
index 3f8d8afe88..7b17b84204 100644
--- a/src/jit/jitgcinfo.h
+++ b/src/jit/jitgcinfo.h
@@ -295,7 +295,11 @@ public:
// references, building up mappings from tuples of <reg/offset X byref/pinning> to the corresponding
// slot id (in the two member fields declared above). In the "do work" mode, we use these slot ids to
// actually declare live ranges to the encoder.
- void gcMakeRegPtrTable(GcInfoEncoder* gcInfoEncoder, unsigned codeSize, unsigned prologSize, MakeRegPtrMode mode);
+ void gcMakeRegPtrTable(GcInfoEncoder* gcInfoEncoder,
+ unsigned codeSize,
+ unsigned prologSize,
+ MakeRegPtrMode mode,
+ unsigned* callCntRef);
#endif
#ifdef JIT32_GCENCODER
diff --git a/src/jit/jitpch.h b/src/jit/jitpch.h
index 2e69e79208..1fe8f27302 100644
--- a/src/jit/jitpch.h
+++ b/src/jit/jitpch.h
@@ -15,10 +15,6 @@
#include <cstdlib>
#include <intrin.h>
-#if COR_JIT_EE_VERSION <= 460
-#include "corjithost.h"
-#include "jithost.h"
-#endif
#include "jitconfig.h"
#include "jit.h"
#include "iallocator.h"
diff --git a/src/jit/jitstd/type_traits.h b/src/jit/jitstd/type_traits.h
index 1e853e8cca..f0f8518c40 100644
--- a/src/jit/jitstd/type_traits.h
+++ b/src/jit/jitstd/type_traits.h
@@ -178,14 +178,15 @@ struct make_unsigned<int>
typedef unsigned int type;
};
-#ifndef PLATFORM_UNIX
+#ifndef _HOST_UNIX_
template<>
struct make_unsigned<long>
{
typedef unsigned long type;
};
-#endif // PLATFORM_UNIX
+
+#endif // !_HOST_UNIX_
template<>
struct make_unsigned<__int64>
diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp
index b4e4cc6e55..5bcb1c8f77 100644
--- a/src/jit/lclvars.cpp
+++ b/src/jit/lclvars.cpp
@@ -38,6 +38,8 @@ void Compiler::lvaInit()
lvaRefCountingStarted = false;
lvaLocalVarRefCounted = false;
+ lvaGenericsContextUseCount = 0;
+
lvaSortAgain = false; // false: We don't need to call lvaSortOnly()
lvaTrackedFixed = false; // false: We can still add new tracked variables
@@ -50,6 +52,7 @@ void Compiler::lvaInit()
#if FEATURE_FIXED_OUT_ARGS
lvaPInvokeFrameRegSaveVar = BAD_VAR_NUM;
lvaOutgoingArgSpaceVar = BAD_VAR_NUM;
+ lvaOutgoingArgSpaceSize = PhasedVar<unsigned>();
#endif // FEATURE_FIXED_OUT_ARGS
#ifdef _TARGET_ARM_
lvaPromotedStructAssemblyScratchVar = BAD_VAR_NUM;
@@ -246,10 +249,17 @@ void Compiler::lvaInitTypeRef()
CORINFO_CLASS_HANDLE typeHnd;
CorInfoTypeWithMod corInfoType =
info.compCompHnd->getArgType(&info.compMethodInfo->locals, localsSig, &typeHnd);
+
lvaInitVarDsc(varDsc, varNum, strip(corInfoType), typeHnd, localsSig, &info.compMethodInfo->locals);
varDsc->lvPinned = ((corInfoType & CORINFO_TYPE_MOD_PINNED) != 0);
varDsc->lvOnFrame = true; // The final home for this local variable might be our local stack frame
+
+ if (strip(corInfoType) == CORINFO_TYPE_CLASS)
+ {
+ CORINFO_CLASS_HANDLE clsHnd = info.compCompHnd->getArgClass(&info.compMethodInfo->locals, localsSig);
+ lvaSetClass(varNum, clsHnd);
+ }
}
if ( // If there already exist unsafe buffers, don't mark more structs as unsafe
@@ -395,6 +405,7 @@ void Compiler::lvaInitThisPtr(InitVarDscInfo* varDscInfo)
else
{
varDsc->lvType = TYP_REF;
+ lvaSetClass(varDscInfo->varNum, info.compClassHnd);
}
if (tiVerificationNeeded)
@@ -549,6 +560,12 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo)
lvaInitVarDsc(varDsc, varDscInfo->varNum, strip(corInfoType), typeHnd, argLst, &info.compMethodInfo->args);
+ if (strip(corInfoType) == CORINFO_TYPE_CLASS)
+ {
+ CORINFO_CLASS_HANDLE clsHnd = info.compCompHnd->getArgClass(&info.compMethodInfo->args, argLst);
+ lvaSetClass(varDscInfo->varNum, clsHnd);
+ }
+
// For ARM, ARM64, and AMD64 varargs, all arguments go in integer registers
var_types argType = mangleVarArgsType(varDsc->TypeGet());
var_types origArgType = argType;
@@ -660,11 +677,6 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo)
codeGen->regSet.rsMaskPreSpillRegArg |= regMask;
}
}
- else
- {
- varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame
- }
-
#else // !_TARGET_ARM_
#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
@@ -706,13 +718,12 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo)
}
}
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#endif // !_TARGET_ARM_
// The final home for this incoming register might be our local stack frame.
// For System V platforms the final home will always be on the local stack frame.
varDsc->lvOnFrame = true;
-#endif // !_TARGET_ARM_
-
bool canPassArgInRegisters = false;
#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
@@ -1522,15 +1533,8 @@ void Compiler::lvaCanPromoteStructType(CORINFO_CLASS_HANDLE typeHnd,
pFieldInfo->fldOffset = (BYTE)fldOffset;
pFieldInfo->fldOrdinal = ordinal;
CorInfoType corType = info.compCompHnd->getFieldType(pFieldInfo->fldHnd, &pFieldInfo->fldTypeHnd);
- var_types varType = JITtype2varType(corType);
- pFieldInfo->fldType = varType;
- unsigned size = genTypeSize(varType);
- pFieldInfo->fldSize = size;
-
- if (varTypeIsGC(varType))
- {
- containsGCpointers = true;
- }
+ pFieldInfo->fldType = JITtype2varType(corType);
+ pFieldInfo->fldSize = genTypeSize(pFieldInfo->fldType);
#ifdef FEATURE_SIMD
// Check to see if this is a SIMD type.
@@ -1542,8 +1546,7 @@ void Compiler::lvaCanPromoteStructType(CORINFO_CLASS_HANDLE typeHnd,
var_types simdBaseType = getBaseTypeAndSizeOfSIMDType(pFieldInfo->fldTypeHnd, &simdSize);
if (simdBaseType != TYP_UNKNOWN)
{
- varType = getSIMDTypeForSize(simdSize);
- pFieldInfo->fldType = varType;
+ pFieldInfo->fldType = getSIMDTypeForSize(simdSize);
pFieldInfo->fldSize = simdSize;
}
}
@@ -1551,8 +1554,60 @@ void Compiler::lvaCanPromoteStructType(CORINFO_CLASS_HANDLE typeHnd,
if (pFieldInfo->fldSize == 0)
{
- // Non-primitive struct field. Don't promote.
- return;
+ // Size of TYP_BLK, TYP_FUNC, TYP_VOID and TYP_STRUCT is zero.
+ // Early out if field type is other than TYP_STRUCT.
+ // This is a defensive check as we don't expect a struct to have
+ // fields of TYP_BLK, TYP_FUNC or TYP_VOID.
+ if (pFieldInfo->fldType != TYP_STRUCT)
+ {
+ return;
+ }
+
+ // Non-primitive struct field.
+ // Try to promote structs of single field of scalar types aligned at their
+ // natural boundary.
+
+ // Do Not promote if the struct field in turn has more than one field.
+ if (info.compCompHnd->getClassNumInstanceFields(pFieldInfo->fldTypeHnd) != 1)
+ {
+ return;
+ }
+
+ // Do not promote if the single field is not aligned at its natural boundary within
+ // the struct field.
+ CORINFO_FIELD_HANDLE fHnd = info.compCompHnd->getFieldInClass(pFieldInfo->fldTypeHnd, 0);
+ unsigned fOffset = info.compCompHnd->getFieldOffset(fHnd);
+ if (fOffset != 0)
+ {
+ return;
+ }
+
+ CORINFO_CLASS_HANDLE cHnd;
+ CorInfoType fieldCorType = info.compCompHnd->getFieldType(fHnd, &cHnd);
+ var_types fieldVarType = JITtype2varType(fieldCorType);
+ unsigned fieldSize = genTypeSize(fieldVarType);
+
+ // Do not promote if either not a primitive type or size equal to ptr size on
+ // target or a struct containing a single floating-point field.
+ //
+ // TODO-PERF: Structs containing a single floating-point field on Amd64
+ // needs to be passed in integer registers. Right now LSRA doesn't support
+ // passing of floating-point LCL_VARS in integer registers. Enabling promotion
+ // of such structs results in an assert in lsra right now.
+ //
+ // TODO-PERF: Right now promotion is confined to struct containing a ptr sized
+ // field (int/uint/ref/byref on 32-bits and long/ulong/ref/byref on 64-bits).
+ // Though this would serve the purpose of promoting Span<T> containing ByReference<T>,
+ // this can be extended to other primitive types as long as they are aligned at their
+ // natural boundary.
+ if (fieldSize == 0 || fieldSize != TARGET_POINTER_SIZE || varTypeIsFloating(fieldVarType))
+ {
+ return;
+ }
+
+ // Retype the field as the type of the single field of the struct
+ pFieldInfo->fldType = fieldVarType;
+ pFieldInfo->fldSize = fieldSize;
}
if ((pFieldInfo->fldOffset % pFieldInfo->fldSize) != 0)
@@ -1563,6 +1618,11 @@ void Compiler::lvaCanPromoteStructType(CORINFO_CLASS_HANDLE typeHnd,
return;
}
+ if (varTypeIsGC(pFieldInfo->fldType))
+ {
+ containsGCpointers = true;
+ }
+
// The end offset for this field should never be larger than our structSize.
noway_assert(fldOffset + pFieldInfo->fldSize <= structSize);
@@ -1657,7 +1717,6 @@ void Compiler::lvaCanPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo* S
noway_assert(varTypeIsStruct(varDsc));
noway_assert(!varDsc->lvPromoted); // Don't ask again :)
-#ifdef FEATURE_SIMD
// If this lclVar is used in a SIMD intrinsic, then we don't want to struct promote it.
// Note, however, that SIMD lclVars that are NOT used in a SIMD intrinsic may be
// profitably promoted.
@@ -1667,24 +1726,134 @@ void Compiler::lvaCanPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo* S
return;
}
-#endif
-
- // TODO-PERF - Allow struct promotion for HFA register arguments
-
// Explicitly check for HFA reg args and reject them for promotion here.
// Promoting HFA args will fire an assert in lvaAssignFrameOffsets
// when the HFA reg arg is struct promoted.
//
+ // TODO-PERF - Allow struct promotion for HFA register arguments
if (varDsc->lvIsHfaRegArg())
{
StructPromotionInfo->canPromote = false;
return;
}
+#if !FEATURE_MULTIREG_STRUCT_PROMOTE
+ if (varDsc->lvIsMultiRegArg)
+ {
+ JITDUMP("Skipping V%02u: marked lvIsMultiRegArg.\n", lclNum);
+ StructPromotionInfo->canPromote = false;
+ return;
+ }
+#endif
+
+ if (varDsc->lvIsMultiRegRet)
+ {
+ JITDUMP("Skipping V%02u: marked lvIsMultiRegRet.\n", lclNum);
+ StructPromotionInfo->canPromote = false;
+ return;
+ }
+
CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
lvaCanPromoteStructType(typeHnd, StructPromotionInfo, true);
}
+//--------------------------------------------------------------------------------------------
+// lvaShouldPromoteStructVar - Should a struct var be promoted if it can be promoted?
+// This routine mainly performs profitability checks. Right now it also has
+// some correctness checks due to limitations of down-stream phases.
+//
+// Arguments:
+// lclNum - Struct local number
+// structPromotionInfo - In Parameter; struct promotion information
+//
+// Returns
+// true if the struct should be promoted
+bool Compiler::lvaShouldPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo* structPromotionInfo)
+{
+ assert(lclNum < lvaCount);
+ assert(structPromotionInfo->canPromote);
+
+ LclVarDsc* varDsc = &lvaTable[lclNum];
+ assert(varTypeIsStruct(varDsc));
+
+ bool shouldPromote = true;
+
+ // We *can* promote; *should* we promote?
+ // We should only do so if promotion has potential savings. One source of savings
+ // is if a field of the struct is accessed, since this access will be turned into
+ // an access of the corresponding promoted field variable. Even if there are no
+ // field accesses, but only block-level operations on the whole struct, if the struct
+ // has only one or two fields, then doing those block operations field-wise is probably faster
+ // than doing a whole-variable block operation (e.g., a hardware "copy loop" on x86).
+ // Struct promotion also provides the following benefits: reduce stack frame size,
+ // reduce the need for zero init of stack frame and fine grained constant/copy prop.
+ // Asm diffs indicate that promoting structs up to 3 fields is a net size win.
+ // So if no fields are accessed independently, and there are four or more fields,
+ // then do not promote.
+ //
+ // TODO: Ideally we would want to consider the impact of whether the struct is
+ // passed as a parameter or assigned the return value of a call. Because once promoted,
+ // struct copying is done by field by field assignment instead of a more efficient
+ // rep.stos or xmm reg based copy.
+ if (structPromotionInfo->fieldCnt > 3 && !varDsc->lvFieldAccessed)
+ {
+ JITDUMP("Not promoting promotable struct local V%02u: #fields = %d, fieldAccessed = %d.\n", lclNum,
+ structPromotionInfo->fieldCnt, varDsc->lvFieldAccessed);
+ shouldPromote = false;
+ }
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+ // TODO-PERF - Only do this when the LclVar is used in an argument context
+ // TODO-ARM64 - HFA support should also eliminate the need for this.
+ // TODO-LSRA - Currently doesn't support the passing of floating point LCL_VARS in the integer registers
+ //
+ // For now we currently don't promote structs with a single float field
+ // Promoting it can cause us to shuffle it back and forth between the int and
+ // the float regs when it is used as a argument, which is very expensive for XARCH
+ //
+ else if ((structPromotionInfo->fieldCnt == 1) && varTypeIsFloating(structPromotionInfo->fields[0].fldType))
+ {
+ JITDUMP("Not promoting promotable struct local V%02u: #fields = %d because it is a struct with "
+ "single float field.\n",
+ lclNum, structPromotionInfo->fieldCnt);
+ shouldPromote = false;
+ }
+#endif // _TARGET_AMD64_ || _TARGET_ARM64_
+ else if (varDsc->lvIsParam)
+ {
+#if FEATURE_MULTIREG_STRUCT_PROMOTE
+ // Is this a variable holding a value with exactly two fields passed in
+ // multiple registers?
+ if ((structPromotionInfo->fieldCnt != 2) && lvaIsMultiregStruct(varDsc))
+ {
+ JITDUMP("Not promoting multireg struct local V%02u, because lvIsParam is true and #fields != 2\n", lclNum);
+ shouldPromote = false;
+ }
+ else
+#endif // !FEATURE_MULTIREG_STRUCT_PROMOTE
+
+ // TODO-PERF - Implement struct promotion for incoming multireg structs
+ // Currently it hits assert(lvFieldCnt==1) in lclvar.cpp line 4417
+
+ if (structPromotionInfo->fieldCnt != 1)
+ {
+ JITDUMP("Not promoting promotable struct local V%02u, because lvIsParam is true and #fields = "
+ "%d.\n",
+ lclNum, structPromotionInfo->fieldCnt);
+ shouldPromote = false;
+ }
+ }
+
+ //
+ // If the lvRefCnt is zero and we have a struct promoted parameter we can end up with an extra store of
+ // the the incoming register into the stack frame slot.
+ // In that case, we would like to avoid promortion.
+ // However we haven't yet computed the lvRefCnt values so we can't do that.
+ //
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+ return shouldPromote;
+}
+
/*****************************************************************************
* Promote a struct type local */
@@ -2123,6 +2292,199 @@ void Compiler::lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, bool
}
}
+//------------------------------------------------------------------------
+// lvaSetClass: set class information for a local var.
+//
+// Arguments:
+// varNum -- number of the variable
+// clsHnd -- class handle to use in set or update
+// isExact -- true if class is known exactly
+//
+// Notes:
+// varNum must not already have a ref class handle.
+
+void Compiler::lvaSetClass(unsigned varNum, CORINFO_CLASS_HANDLE clsHnd, bool isExact)
+{
+ noway_assert(varNum < lvaCount);
+
+ // If we are just importing, we cannot reliably track local ref types,
+ // since the jit maps CORINFO_TYPE_VAR to TYP_REF.
+ if (compIsForImportOnly())
+ {
+ return;
+ }
+
+ // Else we should have a type handle.
+ assert(clsHnd != nullptr);
+
+ LclVarDsc* varDsc = &lvaTable[varNum];
+ assert(varDsc->lvType == TYP_REF);
+
+ // We shoud not have any ref type information for this var.
+ assert(varDsc->lvClassHnd == nullptr);
+ assert(!varDsc->lvClassIsExact);
+
+ JITDUMP("\nlvaSetClass: setting class for V%02i to (%p) %s %s\n", varNum, clsHnd,
+ info.compCompHnd->getClassName(clsHnd), isExact ? " [exact]" : "");
+
+ varDsc->lvClassHnd = clsHnd;
+ varDsc->lvClassIsExact = isExact;
+}
+
+//------------------------------------------------------------------------
+// lvaSetClass: set class information for a local var from a tree or stack type
+//
+// Arguments:
+// varNum -- number of the variable. Must be a single def local
+// tree -- tree establishing the variable's value
+// stackHnd -- handle for the type from the evaluation stack
+//
+// Notes:
+// Preferentially uses the tree's type, when available. Since not all
+// tree kinds can track ref types, the stack type is used as a
+// fallback.
+
+void Compiler::lvaSetClass(unsigned varNum, GenTreePtr tree, CORINFO_CLASS_HANDLE stackHnd)
+{
+ bool isExact = false;
+ bool isNonNull = false;
+ CORINFO_CLASS_HANDLE clsHnd = gtGetClassHandle(tree, &isExact, &isNonNull);
+
+ if (clsHnd != nullptr)
+ {
+ lvaSetClass(varNum, clsHnd, isExact);
+ }
+ else if (stackHnd != nullptr)
+ {
+ lvaSetClass(varNum, stackHnd);
+ }
+}
+
+//------------------------------------------------------------------------
+// lvaUpdateClass: update class information for a local var.
+//
+// Arguments:
+// varNum -- number of the variable
+// clsHnd -- class handle to use in set or update
+// isExact -- true if class is known exactly
+//
+// Notes:
+//
+// This method models the type update rule for an assignment.
+//
+// Updates currently should only happen for single-def user args or
+// locals, when we are processing the expression actually being
+// used to initialize the local (or inlined arg). The update will
+// change the local from the declared type to the type of the
+// initial value.
+//
+// These updates should always *improve* what we know about the
+// type, that is making an inexact type exact, or changing a type
+// to some subtype. However the jit lacks precise type information
+// for shared code, so ensuring this is so is currently not
+// possible.
+
+void Compiler::lvaUpdateClass(unsigned varNum, CORINFO_CLASS_HANDLE clsHnd, bool isExact)
+{
+ noway_assert(varNum < lvaCount);
+
+ // If we are just importing, we cannot reliably track local ref types,
+ // since the jit maps CORINFO_TYPE_VAR to TYP_REF.
+ if (compIsForImportOnly())
+ {
+ return;
+ }
+
+ // Else we should have a class handle to consider
+ assert(clsHnd != nullptr);
+
+ LclVarDsc* varDsc = &lvaTable[varNum];
+ assert(varDsc->lvType == TYP_REF);
+
+ // We should already have a class
+ assert(varDsc->lvClassHnd != nullptr);
+
+#if defined(DEBUG)
+
+ // In general we only expect one update per local var. However if
+ // a block is re-imported and that block has the only STLOC for
+ // the var, we may see multiple updates. All subsequent updates
+ // should agree on the type, since reimportation is triggered by
+ // type mismatches for things other than ref types.
+ if (varDsc->lvClassInfoUpdated)
+ {
+ assert(varDsc->lvClassHnd == clsHnd);
+ assert(varDsc->lvClassIsExact == isExact);
+ }
+
+ // This counts as an update, even if nothing changes.
+ varDsc->lvClassInfoUpdated = true;
+
+#endif // defined(DEBUG)
+
+ // If previous type was exact, there is nothing to update. Would
+ // like to verify new type is compatible but can't do this yet.
+ if (varDsc->lvClassIsExact)
+ {
+ return;
+ }
+
+ // Are we updating the type?
+ if (varDsc->lvClassHnd != clsHnd)
+ {
+ JITDUMP("\nlvaUpdateClass: Updating class for V%02i from (%p) %s to (%p) %s %s\n", varNum, varDsc->lvClassHnd,
+ info.compCompHnd->getClassName(varDsc->lvClassHnd), clsHnd, info.compCompHnd->getClassName(clsHnd),
+ isExact ? " [exact]" : "");
+
+ varDsc->lvClassHnd = clsHnd;
+ varDsc->lvClassIsExact = isExact;
+ return;
+ }
+
+ // Class info matched. Are we updating exactness?
+ if (isExact)
+ {
+ JITDUMP("\nlvaUpdateClass: Updating class for V%02i (%p) %s to be exact\n", varNum, varDsc->lvClassHnd,
+ info.compCompHnd->getClassName(varDsc->lvClassHnd));
+
+ varDsc->lvClassIsExact = isExact;
+ return;
+ }
+
+ // Else we have the same handle and (in)exactness as before. Do nothing.
+ return;
+}
+
+//------------------------------------------------------------------------
+// lvaUpdateClass: Uupdate class information for a local var from a tree
+// or stack type
+//
+// Arguments:
+// varNum -- number of the variable. Must be a single def local
+// tree -- tree establishing the variable's value
+// stackHnd -- handle for the type from the evaluation stack
+//
+// Notes:
+// Preferentially uses the tree's type, when available. Since not all
+// tree kinds can track ref types, the stack type is used as a
+// fallback.
+
+void Compiler::lvaUpdateClass(unsigned varNum, GenTreePtr tree, CORINFO_CLASS_HANDLE stackHnd)
+{
+ bool isExact = false;
+ bool isNonNull = false;
+ CORINFO_CLASS_HANDLE clsHnd = gtGetClassHandle(tree, &isExact, &isNonNull);
+
+ if (clsHnd != nullptr)
+ {
+ lvaUpdateClass(varNum, clsHnd, isExact);
+ }
+ else if (stackHnd != nullptr)
+ {
+ lvaUpdateClass(varNum, stackHnd);
+ }
+}
+
/*****************************************************************************
* Returns the array of BYTEs containing the GC layout information
*/
@@ -2134,9 +2496,14 @@ BYTE* Compiler::lvaGetGcLayout(unsigned varNum)
return lvaTable[varNum].lvGcLayout;
}
-/*****************************************************************************
- * Return the number of bytes needed for a local variable
- */
+//------------------------------------------------------------------------
+// lvaLclSize: returns size of a local variable, in bytes
+//
+// Arguments:
+// varNum -- variable to query
+//
+// Returns:
+// Number of bytes needed on the frame for such a local.
unsigned Compiler::lvaLclSize(unsigned varNum)
{
@@ -2152,10 +2519,8 @@ unsigned Compiler::lvaLclSize(unsigned varNum)
case TYP_LCLBLK:
#if FEATURE_FIXED_OUT_ARGS
- noway_assert(lvaOutgoingArgSpaceSize >= 0);
noway_assert(varNum == lvaOutgoingArgSpaceVar);
return lvaOutgoingArgSpaceSize;
-
#else // FEATURE_FIXED_OUT_ARGS
assert(!"Unknown size");
NO_WAY("Target doesn't support TYP_LCLBLK");
@@ -2217,8 +2582,41 @@ unsigned Compiler::lvaLclExactSize(unsigned varNum)
return genTypeSize(varType);
}
+// getCalledCount -- get the value used to normalized weights for this method
+// if we don't have profile data then getCalledCount will return BB_UNITY_WEIGHT (100)
+// otherwise it returns the number of times that profile data says the method was called.
+//
+BasicBlock::weight_t BasicBlock::getCalledCount(Compiler* comp)
+{
+ // when we don't have profile data then fgCalledCount will be BB_UNITY_WEIGHT (100)
+ BasicBlock::weight_t calledCount = comp->fgCalledCount;
+
+ // If we haven't yet reach the place where we setup fgCalledCount it could still be zero
+ // so return a reasonable value to use until we set it.
+ //
+ if (calledCount == 0)
+ {
+ if (comp->fgIsUsingProfileWeights())
+ {
+ // When we use profile data block counts we have exact counts,
+ // not multiples of BB_UNITY_WEIGHT (100)
+ calledCount = 1;
+ }
+ else
+ {
+ calledCount = comp->fgFirstBB->bbWeight;
+
+ if (calledCount == 0)
+ {
+ calledCount = BB_UNITY_WEIGHT;
+ }
+ }
+ }
+ return calledCount;
+}
+
// getBBWeight -- get the normalized weight of this block
-unsigned BasicBlock::getBBWeight(Compiler* comp)
+BasicBlock::weight_t BasicBlock::getBBWeight(Compiler* comp)
{
if (this->bbWeight == 0)
{
@@ -2226,22 +2624,50 @@ unsigned BasicBlock::getBBWeight(Compiler* comp)
}
else
{
- unsigned calledWeight = comp->fgCalledWeight;
- if (calledWeight == 0)
- {
- calledWeight = comp->fgFirstBB->bbWeight;
- if (calledWeight == 0)
- {
- calledWeight = BB_UNITY_WEIGHT;
- }
- }
+ weight_t calledCount = getCalledCount(comp);
+
+ // Normalize the bbWeights by multiplying by BB_UNITY_WEIGHT and dividing by the calledCount.
+ //
+ // 1. For methods that do not have IBC data the called weight will always be 100 (BB_UNITY_WEIGHT)
+ // and the entry point bbWeight value is almost always 100 (BB_UNITY_WEIGHT)
+ // 2. For methods that do have IBC data the called weight is the actual number of calls
+ // from the IBC data and the entry point bbWeight value is almost always the actual
+ // number of calls from the IBC data.
+ //
+ // "almost always" - except for the rare case where a loop backedge jumps to BB01
+ //
+ // We also perform a rounding operation by adding half of the 'calledCount' before performing
+ // the division.
+ //
+ // Thus for both cases we will return 100 (BB_UNITY_WEIGHT) for the entry point BasicBlock
+ //
+ // Note that with a 100 (BB_UNITY_WEIGHT) values between 1 and 99 represent decimal fractions.
+ // (i.e. 33 represents 33% and 75 represents 75%, and values greater than 100 require
+ // some kind of loop backedge)
+ //
+
if (this->bbWeight < (BB_MAX_WEIGHT / BB_UNITY_WEIGHT))
{
- return max(1, (((this->bbWeight * BB_UNITY_WEIGHT) + (calledWeight / 2)) / calledWeight));
+ // Calculate the result using unsigned arithmetic
+ weight_t result = ((this->bbWeight * BB_UNITY_WEIGHT) + (calledCount / 2)) / calledCount;
+
+ // We don't allow a value of zero, as that would imply rarely run
+ return max(1, result);
}
else
{
- return (unsigned)((((double)this->bbWeight * (double)BB_UNITY_WEIGHT) / (double)calledWeight) + 0.5);
+ // Calculate the full result using floating point
+ double fullResult = ((double)this->bbWeight * (double)BB_UNITY_WEIGHT) / (double)calledCount;
+
+ if (fullResult < (double)BB_MAX_WEIGHT)
+ {
+ // Add 0.5 and truncate to unsigned
+ return (weight_t)(fullResult + 0.5);
+ }
+ else
+ {
+ return BB_MAX_WEIGHT;
+ }
}
}
}
@@ -2522,7 +2948,7 @@ int __cdecl Compiler::RefCntCmp(const void* op1, const void* op2)
}
if (varTypeIsGC(dsc2->TypeGet()))
{
- weight1 += BB_UNITY_WEIGHT / 2;
+ weight2 += BB_UNITY_WEIGHT / 2;
}
if (dsc2->lvRegister)
@@ -2859,6 +3285,10 @@ void Compiler::lvaSortByRefCount()
lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_PinningRef));
#endif
}
+ else if (opts.MinOpts() && !JitConfig.JitMinOptsTrackGCrefs() && varTypeIsGC(varDsc->TypeGet()))
+ {
+ varDsc->lvTracked = 0;
+ }
// Are we not optimizing and we have exception handlers?
// if so mark all args and locals "do not enregister".
@@ -3192,23 +3622,9 @@ void Compiler::lvaMarkLclRefs(GenTreePtr tree)
}
#if ASSERTION_PROP
- /* Exclude the normal entry block */
- if (fgDomsComputed && (lvaMarkRefsCurBlock->bbNum != 1) && lvaMarkRefsCurBlock->bbIDom != nullptr)
+ if (fgDomsComputed && IsDominatedByExceptionalEntry(lvaMarkRefsCurBlock))
{
- // If any entry block except the normal entry block dominates the block, then mark the local with the
- // lvVolatileHint flag.
-
- if (BlockSetOps::MayBeUninit(lvaMarkRefsCurBlock->bbDoms))
- {
- // Lazy init (If a block is not dominated by any other block, we'll redo this every time, but it'll be fast)
- BlockSetOps::AssignNoCopy(this, lvaMarkRefsCurBlock->bbDoms, fgGetDominatorSet(lvaMarkRefsCurBlock));
- BlockSetOps::RemoveElemD(this, lvaMarkRefsCurBlock->bbDoms, fgFirstBB->bbNum);
- }
- assert(fgEnterBlksSetValid);
- if (!BlockSetOps::IsEmptyIntersection(this, lvaMarkRefsCurBlock->bbDoms, fgEnterBlks))
- {
- varDsc->lvVolatileHint = 1;
- }
+ SetVolatileHint(varDsc);
}
/* Record if the variable has a single def or not */
@@ -3293,6 +3709,29 @@ void Compiler::lvaMarkLclRefs(GenTreePtr tree)
#endif
}
+//------------------------------------------------------------------------
+// IsDominatedByExceptionalEntry: Check is the block dominated by an exception entry block.
+//
+// Arguments:
+// block - the checking block.
+//
+bool Compiler::IsDominatedByExceptionalEntry(BasicBlock* block)
+{
+ assert(fgDomsComputed);
+ return block->IsDominatedByExceptionalEntryFlag();
+}
+
+//------------------------------------------------------------------------
+// SetVolatileHint: Set a local var's volatile hint.
+//
+// Arguments:
+// varDsc - the local variable that needs the hint.
+//
+void Compiler::SetVolatileHint(LclVarDsc* varDsc)
+{
+ varDsc->lvVolatileHint = true;
+}
+
/*****************************************************************************
*
* Helper passed to Compiler::fgWalkTreePre() to do variable ref marking.
@@ -3381,7 +3820,7 @@ void Compiler::lvaMarkLocalVars()
}
}
- lvaAllocOutgoingArgSpace();
+ lvaAllocOutgoingArgSpaceVar();
#if !FEATURE_EH_FUNCLETS
@@ -3516,7 +3955,7 @@ void Compiler::lvaMarkLocalVars()
lvaSortByRefCount();
}
-void Compiler::lvaAllocOutgoingArgSpace()
+void Compiler::lvaAllocOutgoingArgSpaceVar()
{
#if FEATURE_FIXED_OUT_ARGS
@@ -3532,21 +3971,6 @@ void Compiler::lvaAllocOutgoingArgSpace()
lvaTable[lvaOutgoingArgSpaceVar].lvRefCnt = 1;
lvaTable[lvaOutgoingArgSpaceVar].lvRefCntWtd = BB_UNITY_WEIGHT;
-
- if (lvaOutgoingArgSpaceSize == 0)
- {
- if (compUsesThrowHelper || compIsProfilerHookNeeded())
- {
- // Need to make sure the MIN_ARG_AREA_FOR_CALL space is added to the frame if:
- // 1. there are calls to THROW_HEPLPER methods.
- // 2. we are generating profiling Enter/Leave/TailCall hooks. This will ensure
- // that even methods without any calls will have outgoing arg area space allocated.
- //
- // An example for these two cases is Windows Amd64, where the ABI requires to have 4 slots for
- // the outgoing arg space if the method makes any calls.
- lvaOutgoingArgSpaceSize = MIN_ARG_AREA_FOR_CALL;
- }
- }
}
noway_assert(lvaOutgoingArgSpaceVar >= info.compLocalsCount && lvaOutgoingArgSpaceVar < lvaCount);
@@ -5799,10 +6223,14 @@ void Compiler::lvaAlignFrame()
}
// Align the stack with STACK_ALIGN value.
- int adjustFrameSize = compLclFrameSize;
+ int adjustFrameSize = compLclFrameSize;
#if defined(UNIX_X86_ABI)
+ bool isEbpPushed = codeGen->isFramePointerUsed();
+#if DOUBLE_ALIGN
+ isEbpPushed |= genDoubleAlign();
+#endif
// we need to consider spilled register(s) plus return address and/or EBP
- int adjustCount = compCalleeRegsPushed + 1 + (codeGen->isFramePointerUsed() ? 1 : 0);
+ int adjustCount = compCalleeRegsPushed + 1 + (isEbpPushed ? 1 : 0);
adjustFrameSize += (adjustCount * REGSIZE_BYTES) % STACK_ALIGN;
#endif
if ((adjustFrameSize % STACK_ALIGN) != 0)
@@ -5832,11 +6260,15 @@ void Compiler::lvaAssignFrameOffsetsToPromotedStructs()
//
if (varDsc->lvIsStructField
#ifndef UNIX_AMD64_ABI
+#if !defined(_TARGET_ARM_) || defined(LEGACY_BACKEND)
+ // Non-legacy ARM: lo/hi parts of a promoted long arg need to be updated.
+
// For System V platforms there is no outgoing args space.
// A register passed struct arg is homed on the stack in a separate local var.
// The offset of these structs is already calculated in lvaAssignVirtualFrameOffsetToArg methos.
// Make sure the code below is not executed for these structs and the offset is not changed.
&& !varDsc->lvIsParam
+#endif // !defined(_TARGET_ARM_) || defined(LEGACY_BACKEND)
#endif // UNIX_AMD64_ABI
)
{
@@ -6256,6 +6688,14 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t r
{
printf(" stack-byref");
}
+ if (varDsc->lvClassHnd != nullptr)
+ {
+ printf(" class-hnd");
+ }
+ if (varDsc->lvClassIsExact)
+ {
+ printf(" exact");
+ }
#ifndef _TARGET_64BIT_
if (varDsc->lvStructDoubleAlign)
printf(" double-align");
diff --git a/src/jit/compatjit/.gitmirror b/src/jit/legacynonjit/.gitmirror
index f507630f94..f507630f94 100644
--- a/src/jit/compatjit/.gitmirror
+++ b/src/jit/legacynonjit/.gitmirror
diff --git a/src/jit/legacyjit/CMakeLists.txt b/src/jit/legacynonjit/CMakeLists.txt
index 73a4600a66..de66d81e8e 100644
--- a/src/jit/legacyjit/CMakeLists.txt
+++ b/src/jit/legacynonjit/CMakeLists.txt
@@ -1,29 +1,34 @@
-project(legacyjit)
+project(legacynonjit)
-add_definitions(-DLEGACY_BACKEND)
add_definitions(-DALT_JIT)
add_definitions(-DFEATURE_NO_HOST)
add_definitions(-DSELF_NO_HOST)
add_definitions(-DFEATURE_READYTORUN_COMPILER)
remove_definitions(-DFEATURE_MERGE_JIT_AND_ENGINE)
-# No SIMD in legacy back-end.
remove_definitions(-DFEATURE_SIMD)
remove_definitions(-DFEATURE_AVX_SUPPORT)
+add_definitions(-DLEGACY_BACKEND)
+
+remove_definitions(-D_TARGET_X86_=1)
+add_definitions(-D_TARGET_ARM_)
+set(JIT_ARCH_ALTJIT_SOURCES ${JIT_ARM_SOURCES})
+
if(WIN32)
- add_definitions(-DFX_VER_INTERNALNAME_STR=legacyjit.dll)
+ add_definitions(-DFX_VER_INTERNALNAME_STR=legacynonjit.dll)
endif(WIN32)
-add_library_clr(legacyjit
+add_library_clr(legacynonjit
SHARED
${SHARED_LIB_SOURCES}
+ ${JIT_ARCH_ALTJIT_SOURCES}
)
-add_dependencies(legacyjit jit_exports)
+add_dependencies(legacynonjit jit_exports)
-set_property(TARGET legacyjit APPEND_STRING PROPERTY LINK_FLAGS ${JIT_EXPORTS_LINKER_OPTION})
-set_property(TARGET legacyjit APPEND_STRING PROPERTY LINK_DEPENDS ${JIT_EXPORTS_FILE})
+set_property(TARGET legacynonjit APPEND_STRING PROPERTY LINK_FLAGS ${JIT_EXPORTS_LINKER_OPTION})
+set_property(TARGET legacynonjit APPEND_STRING PROPERTY LINK_DEPENDS ${JIT_EXPORTS_FILE})
set(RYUJIT_LINK_LIBRARIES
utilcodestaticnohost
@@ -54,9 +59,9 @@ else()
)
endif(CLR_CMAKE_PLATFORM_UNIX)
-target_link_libraries(legacyjit
+target_link_libraries(legacynonjit
${RYUJIT_LINK_LIBRARIES}
)
# add the install targets
-install_clr(legacyjit)
+install_clr(legacynonjit)
diff --git a/src/jit/legacynonjit/legacynonjit.def b/src/jit/legacynonjit/legacynonjit.def
new file mode 100644
index 0000000000..1603af74ca
--- /dev/null
+++ b/src/jit/legacynonjit/legacynonjit.def
@@ -0,0 +1,7 @@
+; Licensed to the .NET Foundation under one or more agreements.
+; The .NET Foundation licenses this file to you under the MIT license.
+; See the LICENSE file in the project root for more information.
+EXPORTS
+ getJit
+ jitStartup
+ sxsJitStartup
diff --git a/src/jit/linuxnonjit/CMakeLists.txt b/src/jit/linuxnonjit/CMakeLists.txt
new file mode 100644
index 0000000000..87ec1e45c9
--- /dev/null
+++ b/src/jit/linuxnonjit/CMakeLists.txt
@@ -0,0 +1,71 @@
+project(linuxnonjit)
+
+add_definitions(-DALT_JIT)
+add_definitions(-DFEATURE_NO_HOST)
+add_definitions(-DSELF_NO_HOST)
+add_definitions(-DFEATURE_READYTORUN_COMPILER)
+remove_definitions(-DFEATURE_MERGE_JIT_AND_ENGINE)
+
+if (CLR_CMAKE_PLATFORM_ARCH_I386)
+ remove_definitions(-DFEATURE_SIMD)
+ remove_definitions(-DFEATURE_AVX_SUPPORT)
+ add_definitions(-DUNIX_X86_ABI)
+ set(JIT_ARCH_ALTJIT_SOURCES ${JIT_I386_SOURCES})
+elseif(CLR_CMAKE_PLATFORM_ARCH_AMD64)
+ add_definitions(-DUNIX_AMD64_ABI)
+ add_definitions(-DFEATURE_UNIX_AMD64_STRUCT_PASSING)
+ set(JIT_ARCH_ALTJIT_SOURCES ${JIT_AMD64_SOURCES})
+else()
+ clr_unknown_arch()
+endif()
+
+if(WIN32)
+ add_definitions(-DFX_VER_INTERNALNAME_STR=linuxnonjit.dll)
+endif(WIN32)
+
+add_library_clr(linuxnonjit
+ SHARED
+ ${SHARED_LIB_SOURCES}
+ ${JIT_ARCH_ALTJIT_SOURCES}
+)
+
+add_dependencies(linuxnonjit jit_exports)
+
+set_property(TARGET linuxnonjit APPEND_STRING PROPERTY LINK_FLAGS ${JIT_EXPORTS_LINKER_OPTION})
+set_property(TARGET linuxnonjit APPEND_STRING PROPERTY LINK_DEPENDS ${JIT_EXPORTS_FILE})
+
+set(RYUJIT_LINK_LIBRARIES
+ utilcodestaticnohost
+ gcinfo
+)
+
+if(CLR_CMAKE_PLATFORM_UNIX)
+ list(APPEND RYUJIT_LINK_LIBRARIES
+ mscorrc_debug
+ coreclrpal
+ palrt
+ )
+else()
+ list(APPEND RYUJIT_LINK_LIBRARIES
+ ${STATIC_MT_CRT_LIB}
+ ${STATIC_MT_VCRT_LIB}
+ kernel32.lib
+ advapi32.lib
+ ole32.lib
+ oleaut32.lib
+ uuid.lib
+ user32.lib
+ version.lib
+ shlwapi.lib
+ bcrypt.lib
+ crypt32.lib
+ RuntimeObject.lib
+ )
+endif(CLR_CMAKE_PLATFORM_UNIX)
+
+target_link_libraries(linuxnonjit
+ ${RYUJIT_LINK_LIBRARIES}
+)
+
+# add the install targets
+install_clr(linuxnonjit)
diff --git a/src/jit/liveness.cpp b/src/jit/liveness.cpp
index c6663185e4..47950aee63 100644
--- a/src/jit/liveness.cpp
+++ b/src/jit/liveness.cpp
@@ -1189,9 +1189,7 @@ class LiveVarAnalysis
}
/* Compute the 'm_liveIn' set */
- VarSetOps::Assign(m_compiler, m_liveIn, m_liveOut);
- VarSetOps::DiffD(m_compiler, m_liveIn, block->bbVarDef);
- VarSetOps::UnionD(m_compiler, m_liveIn, block->bbVarUse);
+ VarSetOps::LivenessD(m_compiler, m_liveIn, block->bbVarDef, block->bbVarUse, m_liveOut);
// Even if block->bbMemoryDef is set, we must assume that it doesn't kill memory liveness from m_memoryLiveOut,
// since (without proof otherwise) the use and def may touch different memory at run-time.
@@ -1218,12 +1216,8 @@ class LiveVarAnalysis
noway_assert(block->bbFlags & BBF_INTERNAL);
- liveInChanged =
- !VarSetOps::Equal(m_compiler, VarSetOps::Intersection(m_compiler, block->bbLiveIn, m_liveIn),
- m_liveIn);
- if (liveInChanged ||
- !VarSetOps::Equal(m_compiler, VarSetOps::Intersection(m_compiler, block->bbLiveOut, m_liveOut),
- m_liveOut))
+ liveInChanged = !VarSetOps::IsSubset(m_compiler, m_liveIn, block->bbLiveIn);
+ if (liveInChanged || !VarSetOps::IsSubset(m_compiler, m_liveOut, block->bbLiveOut))
{
#ifdef DEBUG
if (m_compiler->verbose)
@@ -1834,7 +1828,7 @@ VARSET_VALRET_TP Compiler::fgComputeLife(VARSET_VALARG_TP lifeArg,
VARSET_TP VARSET_INIT(this, keepAliveVars, volatileVars);
VarSetOps::UnionD(this, keepAliveVars, compCurBB->bbScope); // Don't kill vars in scope
- noway_assert(VarSetOps::Equal(this, VarSetOps::Intersection(this, keepAliveVars, life), keepAliveVars));
+ noway_assert(VarSetOps::IsSubset(this, keepAliveVars, life));
noway_assert(compCurStmt->gtOper == GT_STMT);
noway_assert(endNode || (startNode == compCurStmt->gtStmt.gtStmtExpr));
@@ -1882,7 +1876,7 @@ VARSET_VALRET_TP Compiler::fgComputeLifeLIR(VARSET_VALARG_TP lifeArg, BasicBlock
VARSET_TP VARSET_INIT(this, keepAliveVars, volatileVars);
VarSetOps::UnionD(this, keepAliveVars, block->bbScope); // Don't kill vars in scope
- noway_assert(VarSetOps::Equal(this, VarSetOps::Intersection(this, keepAliveVars, life), keepAliveVars));
+ noway_assert(VarSetOps::IsSubset(this, keepAliveVars, life));
LIR::Range& blockRange = LIR::AsRange(block);
GenTree* firstNonPhiNode = blockRange.FirstNonPhiNode();
@@ -2997,7 +2991,7 @@ void Compiler::fgInterBlockLocalVarLiveness()
// which may expose more dead stores.
fgLocalVarLivenessChanged = true;
- noway_assert(VarSetOps::Equal(this, VarSetOps::Intersection(this, life, block->bbLiveIn), life));
+ noway_assert(VarSetOps::IsSubset(this, life, block->bbLiveIn));
/* set the new bbLiveIn */
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index 0316a34a21..035f0947c2 100644
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -42,9 +42,12 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
void Lowering::MakeSrcContained(GenTreePtr parentNode, GenTreePtr childNode)
{
assert(!parentNode->OperIsLeaf());
+ assert(childNode->canBeContained());
+
int srcCount = childNode->gtLsraInfo.srcCount;
assert(srcCount >= 0);
m_lsra->clearOperandCounts(childNode);
+
assert(parentNode->gtLsraInfo.srcCount > 0);
parentNode->gtLsraInfo.srcCount += srcCount - 1;
}
@@ -465,7 +468,7 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
// both GT_SWITCH lowering code paths.
// This condition is of the form: if (temp > jumpTableLength - 2){ goto jumpTable[jumpTableLength - 1]; }
GenTreePtr gtDefaultCaseCond = comp->gtNewOperNode(GT_GT, TYP_INT, comp->gtNewLclvNode(tempLclNum, tempLclType),
- comp->gtNewIconNode(jumpCnt - 2, TYP_INT));
+ comp->gtNewIconNode(jumpCnt - 2, tempLclType));
// Make sure we perform an unsigned comparison, just in case the switch index in 'temp'
// is now less than zero 0 (that would also hit the default case).
@@ -678,9 +681,16 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
JITDUMP("Lowering switch BB%02u: using jump table expansion\n", originalSwitchBB->bbNum);
+ GenTree* switchValue = comp->gtNewLclvNode(tempLclNum, tempLclType);
+#ifdef _TARGET_64BIT_
+ if (tempLclType != TYP_I_IMPL)
+ {
+ // Note that the switch value is unsigned so the cast should be unsigned as well.
+ switchValue = comp->gtNewCastNode(TYP_I_IMPL, switchValue, TYP_U_IMPL);
+ }
+#endif
GenTreePtr gtTableSwitch =
- comp->gtNewOperNode(GT_SWITCH_TABLE, TYP_VOID, comp->gtNewLclvNode(tempLclNum, tempLclType),
- comp->gtNewJmpTableNode());
+ comp->gtNewOperNode(GT_SWITCH_TABLE, TYP_VOID, switchValue, comp->gtNewJmpTableNode());
/* Increment the lvRefCnt and lvRefCntWtd for temp */
tempVarDsc->incRefCnts(blockWeight, comp);
@@ -930,23 +940,11 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
// This provides the info to put this argument in in-coming arg area slot
// instead of in out-going arg area slot.
- PUT_STRUCT_ARG_STK_ONLY(assert(info->isStruct == varTypeIsStruct(type))); // Make sure state is
- // correct
+ PUT_STRUCT_ARG_STK_ONLY(assert(info->isStruct == varTypeIsStruct(type))); // Make sure state is correct
-#if FEATURE_FASTTAILCALL
putArg = new (comp, GT_PUTARG_STK)
GenTreePutArgStk(GT_PUTARG_STK, type, arg, info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots),
- call->IsFastTailCall() DEBUGARG(call));
-#else
- putArg = new (comp, GT_PUTARG_STK)
- GenTreePutArgStk(GT_PUTARG_STK, type, arg,
- info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots) DEBUGARG(call));
-#endif
-
-#if defined(UNIX_X86_ABI)
- assert((info->padStkAlign > 0 && info->numSlots > 0) || (info->padStkAlign == 0));
- putArg->AsPutArgStk()->setArgPadding(info->padStkAlign);
-#endif
+ call->IsFastTailCall(), call);
#ifdef FEATURE_PUT_STRUCT_ARG_STK
// If the ArgTabEntry indicates that this arg is a struct
@@ -971,6 +969,43 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
assert(!varTypeIsSIMD(arg));
numRefs = comp->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
putArg->AsPutArgStk()->setGcPointers(numRefs, gcLayout);
+
+#ifdef _TARGET_X86_
+ // On x86 VM lies about the type of a struct containing a pointer sized
+ // integer field by returning the type of its field as the type of struct.
+ // Such struct can be passed in a register depending its position in
+ // parameter list. VM does this unwrapping only one level and therefore
+ // a type like Struct Foo { Struct Bar { int f}} awlays needs to be
+ // passed on stack. Also, VM doesn't lie about type of such a struct
+ // when it is a field of another struct. That is VM doesn't lie about
+ // the type of Foo.Bar
+ //
+ // We now support the promotion of fields that are of type struct.
+ // However we only support a limited case where the struct field has a
+ // single field and that single field must be a scalar type. Say Foo.Bar
+ // field is getting passed as a parameter to a call, Since it is a TYP_STRUCT,
+ // as per x86 ABI it should always be passed on stack. Therefore GenTree
+ // node under a PUTARG_STK could be GT_OBJ(GT_LCL_VAR_ADDR(v1)), where
+ // local v1 could be a promoted field standing for Foo.Bar. Note that
+ // the type of v1 will be the type of field of Foo.Bar.f when Foo is
+ // promoted. That is v1 will be a scalar type. In this case we need to
+ // pass v1 on stack instead of in a register.
+ //
+ // TODO-PERF: replace GT_OBJ(GT_LCL_VAR_ADDR(v1)) with v1 if v1 is
+ // a scalar type and the width of GT_OBJ matches the type size of v1.
+ // Note that this cannot be done till call node arguments are morphed
+ // because we should not lose the fact that the type of argument is
+ // a struct so that the arg gets correctly marked to be passed on stack.
+ GenTree* objOp1 = arg->gtGetOp1();
+ if (objOp1->OperGet() == GT_LCL_VAR_ADDR)
+ {
+ unsigned lclNum = objOp1->AsLclVarCommon()->GetLclNum();
+ if (comp->lvaTable[lclNum].lvType != TYP_STRUCT)
+ {
+ comp->lvaSetVarDoNotEnregister(lclNum DEBUGARG(Compiler::DNER_VMNeedsStackAddr));
+ }
+ }
+#endif // _TARGET_X86_
}
}
#endif // FEATURE_PUT_STRUCT_ARG_STK
@@ -1062,6 +1097,15 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg)
LclVarDsc* varDsc = &comp->lvaTable[varNum];
type = varDsc->lvType;
}
+ else if (arg->OperGet() == GT_SIMD)
+ {
+ assert((arg->AsSIMD()->gtSIMDSize == 16) || (arg->AsSIMD()->gtSIMDSize == 12));
+
+ if (arg->AsSIMD()->gtSIMDSize == 12)
+ {
+ type = TYP_SIMD12;
+ }
+ }
}
#endif // defined(FEATURE_SIMD) && defined(_TARGET_X86_)
@@ -1075,25 +1119,41 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg)
{
if (isReg)
{
- NYI("Lowering of long register argument");
- }
+ noway_assert(arg->OperGet() == GT_LONG);
+ assert(info->numRegs == 2);
+
+ GenTreePtr argLo = arg->gtGetOp1();
+ GenTreePtr argHi = arg->gtGetOp2();
+
+ GenTreeFieldList* fieldList = new (comp, GT_FIELD_LIST) GenTreeFieldList(argLo, 0, TYP_INT, nullptr);
+ (void)new (comp, GT_FIELD_LIST) GenTreeFieldList(argHi, 4, TYP_INT, fieldList);
- // For longs, we will replace the GT_LONG with a GT_FIELD_LIST, and put that under a PUTARG_STK.
- // Although the hi argument needs to be pushed first, that will be handled by the general case,
- // in which the fields will be reversed.
- noway_assert(arg->OperGet() == GT_LONG);
- assert(info->numSlots == 2);
- GenTreePtr argLo = arg->gtGetOp1();
- GenTreePtr argHi = arg->gtGetOp2();
- GenTreeFieldList* fieldList = new (comp, GT_FIELD_LIST) GenTreeFieldList(argLo, 0, TYP_INT, nullptr);
- // Only the first fieldList node (GTF_FIELD_LIST_HEAD) is in the instruction sequence.
- (void)new (comp, GT_FIELD_LIST) GenTreeFieldList(argHi, 4, TYP_INT, fieldList);
- putArg = NewPutArg(call, fieldList, info, TYP_VOID);
-
- // We can't call ReplaceArgWithPutArgOrCopy here because it presumes that we are keeping the original arg.
- BlockRange().InsertBefore(arg, fieldList, putArg);
- BlockRange().Remove(arg);
- *ppArg = putArg;
+ putArg = NewPutArg(call, fieldList, info, TYP_VOID);
+
+ BlockRange().InsertBefore(arg, putArg);
+ BlockRange().Remove(arg);
+ *ppArg = fieldList;
+ info->node = fieldList;
+ }
+ else
+ {
+ // For longs, we will replace the GT_LONG with a GT_FIELD_LIST, and put that under a PUTARG_STK.
+ // Although the hi argument needs to be pushed first, that will be handled by the general case,
+ // in which the fields will be reversed.
+ noway_assert(arg->OperGet() == GT_LONG);
+ assert(info->numSlots == 2);
+ GenTreePtr argLo = arg->gtGetOp1();
+ GenTreePtr argHi = arg->gtGetOp2();
+ GenTreeFieldList* fieldList = new (comp, GT_FIELD_LIST) GenTreeFieldList(argLo, 0, TYP_INT, nullptr);
+ // Only the first fieldList node (GTF_FIELD_LIST_HEAD) is in the instruction sequence.
+ (void)new (comp, GT_FIELD_LIST) GenTreeFieldList(argHi, 4, TYP_INT, fieldList);
+ putArg = NewPutArg(call, fieldList, info, TYP_VOID);
+
+ // We can't call ReplaceArgWithPutArgOrCopy here because it presumes that we are keeping the original arg.
+ BlockRange().InsertBefore(arg, fieldList, putArg);
+ BlockRange().Remove(arg);
+ *ppArg = putArg;
+ }
}
else
#endif // !defined(_TARGET_64BIT_)
@@ -1187,9 +1247,6 @@ void Lowering::LowerCall(GenTree* node)
LowerArgsForCall(call);
-// RyuJIT arm is not set up for lowered call control
-#ifndef _TARGET_ARM_
-
// note that everything generated from this point on runs AFTER the outgoing args are placed
GenTree* result = nullptr;
@@ -1294,7 +1351,6 @@ void Lowering::LowerCall(GenTree* node)
call->gtControlExpr = result;
}
-#endif //!_TARGET_ARM_
if (comp->opts.IsJit64Compat())
{
@@ -2196,7 +2252,6 @@ void Lowering::LowerCompare(GenTree* cmp)
// automatically inserts a cast from int32 to long on 64 bit architectures. However, the JIT
// accidentally generates int/long comparisons internally:
// - loop cloning compares int (and even small int) index limits against long constants
- // - switch lowering compares a 64 bit switch value against a int32 constant
//
// TODO-Cleanup: The above mentioned issues should be fixed and then the code below may be
// replaced with an assert or at least simplified. The special casing of constants in code
@@ -2487,7 +2542,7 @@ GenTree* Lowering::LowerDirectCall(GenTreeCall* call)
GenTree* indir = Ind(cellAddr);
#ifdef FEATURE_READYTORUN_COMPILER
-#ifdef _TARGET_ARM64_
+#if defined(_TARGET_ARMARCH_)
// For arm64, we dispatch code same as VSD using X11 for indirection cell address,
// which ZapIndirectHelperThunk expects.
if (call->IsR2RRelativeIndir())
@@ -2780,6 +2835,9 @@ void Lowering::InsertPInvokeMethodProlog()
JITDUMP("======= Inserting PInvoke method prolog\n");
+ // The first BB must be a scratch BB in order for us to be able to safely insert the P/Invoke prolog.
+ assert(comp->fgFirstBBisScratch());
+
LIR::Range& firstBlockRange = LIR::AsRange(comp->fgFirstBB);
const CORINFO_EE_INFO* pInfo = comp->eeGetEEInfo();
@@ -2795,11 +2853,11 @@ void Lowering::InsertPInvokeMethodProlog()
// for x86, don't pass the secretArg.
CLANG_FORMAT_COMMENT_ANCHOR;
-#ifdef _TARGET_X86_
+#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
GenTreeArgList* argList = comp->gtNewArgList(frameAddr);
-#else // !_TARGET_X86_
+#else
GenTreeArgList* argList = comp->gtNewArgList(frameAddr, PhysReg(REG_SECRET_STUB_PARAM));
-#endif // !_TARGET_X86_
+#endif
GenTree* call = comp->gtNewHelperCallNode(CORINFO_HELP_INIT_PINVOKE_FRAME, TYP_I_IMPL, 0, argList);
@@ -2814,14 +2872,13 @@ void Lowering::InsertPInvokeMethodProlog()
store->gtOp.gtOp1 = call;
store->gtFlags |= GTF_VAR_DEF;
- GenTree* insertionPoint = firstBlockRange.FirstNonPhiOrCatchArgNode();
-
comp->fgMorphTree(store);
- firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, store));
+ firstBlockRange.InsertAtEnd(LIR::SeqTree(comp, store));
DISPTREERANGE(firstBlockRange, store);
-#ifndef _TARGET_X86_ // For x86, this step is done at the call site (due to stack pointer not being static in the
- // function).
+#if !defined(_TARGET_X86_) && !defined(_TARGET_ARM_)
+ // For x86, this step is done at the call site (due to stack pointer not being static in the function).
+ // For arm32, CallSiteSP is set up by the call to CORINFO_HELP_INIT_PINVOKE_FRAME.
// --------------------------------------------------------
// InlinedCallFrame.m_pCallSiteSP = @RSP;
@@ -2830,10 +2887,13 @@ void Lowering::InsertPInvokeMethodProlog()
GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfCallSiteSP);
storeSP->gtOp1 = PhysReg(REG_SPBASE);
- firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeSP));
+ firstBlockRange.InsertAtEnd(LIR::SeqTree(comp, storeSP));
DISPTREERANGE(firstBlockRange, storeSP);
-#endif // !_TARGET_X86_
+#endif // !defined(_TARGET_X86_) && !defined(_TARGET_ARM_)
+
+#if !defined(_TARGET_ARM_)
+ // For arm32, CalleeSavedFP is set up by the call to CORINFO_HELP_INIT_PINVOKE_FRAME.
// --------------------------------------------------------
// InlinedCallFrame.m_pCalleeSavedEBP = @RBP;
@@ -2843,8 +2903,9 @@ void Lowering::InsertPInvokeMethodProlog()
callFrameInfo.offsetOfCalleeSavedFP);
storeFP->gtOp1 = PhysReg(REG_FPBASE);
- firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeFP));
+ firstBlockRange.InsertAtEnd(LIR::SeqTree(comp, storeFP));
DISPTREERANGE(firstBlockRange, storeFP);
+#endif // !defined(_TARGET_ARM_)
// --------------------------------------------------------
// On 32-bit targets, CORINFO_HELP_INIT_PINVOKE_FRAME initializes the PInvoke frame and then pushes it onto
@@ -2857,7 +2918,7 @@ void Lowering::InsertPInvokeMethodProlog()
// Push a frame - if we are NOT in an IL stub, this is done right before the call
// The init routine sets InlinedCallFrame's m_pNext, so we just set the thead's top-of-stack
GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame);
- firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd));
+ firstBlockRange.InsertAtEnd(LIR::SeqTree(comp, frameUpd));
DISPTREERANGE(firstBlockRange, frameUpd);
}
#endif // _TARGET_64BIT_
@@ -2964,7 +3025,6 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call)
noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
-#if COR_JIT_EE_VERSION > 460
if (comp->opts.ShouldUsePInvokeHelpers())
{
// First argument is the address of the frame variable.
@@ -2980,7 +3040,6 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call)
LowerNode(helperCall); // helper call is inserted before current node and should be lowered here.
return;
}
-#endif
// Emit the following sequence:
//
@@ -3113,7 +3172,6 @@ void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call)
{
JITDUMP("======= Inserting PInvoke call epilog\n");
-#if COR_JIT_EE_VERSION > 460
if (comp->opts.ShouldUsePInvokeHelpers())
{
noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
@@ -3131,7 +3189,6 @@ void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call)
BlockRange().InsertAfter(call, LIR::SeqTree(comp, helperCall));
return;
}
-#endif
// gcstate = 1
GenTree* insertionPoint = call->gtNext;
@@ -3252,18 +3309,7 @@ GenTree* Lowering::LowerNonvirtPinvokeCall(GenTreeCall* call)
CORINFO_METHOD_HANDLE methHnd = call->gtCallMethHnd;
CORINFO_CONST_LOOKUP lookup;
-#if COR_JIT_EE_VERSION > 460
comp->info.compCompHnd->getAddressOfPInvokeTarget(methHnd, &lookup);
-#else
- void* pIndirection;
- lookup.accessType = IAT_PVALUE;
- lookup.addr = comp->info.compCompHnd->getAddressOfPInvokeFixup(methHnd, &pIndirection);
- if (lookup.addr == nullptr)
- {
- lookup.accessType = IAT_PPVALUE;
- lookup.addr = pIndirection;
- }
-#endif
void* addr = lookup.addr;
switch (lookup.accessType)
@@ -4381,6 +4427,14 @@ void Lowering::DoPhase()
#endif
#endif
+ // If we have any PInvoke calls, insert the one-time prolog code. We'll inserted the epilog code in the
+ // appropriate spots later. NOTE: there is a minor optimization opportunity here, as we still create p/invoke
+ // data structures and setup/teardown even if we've eliminated all p/invoke calls due to dead code elimination.
+ if (comp->info.compCallUnmanaged)
+ {
+ InsertPInvokeMethodProlog();
+ }
+
#if !defined(_TARGET_64BIT_)
DecomposeLongs decomp(comp); // Initialize the long decomposition class.
decomp.PrepareForDecomposition();
@@ -4398,14 +4452,6 @@ void Lowering::DoPhase()
LowerBlock(block);
}
- // If we have any PInvoke calls, insert the one-time prolog code. We've already inserted the epilog code in the
- // appropriate spots. NOTE: there is a minor optimization opportunity here, as we still create p/invoke data
- // structures and setup/teardown even if we've eliminated all p/invoke calls due to dead code elimination.
- if (comp->info.compCallUnmanaged)
- {
- InsertPInvokeMethodProlog();
- }
-
#ifdef DEBUG
JITDUMP("Lower has completed modifying nodes, proceeding to initialize LSRA TreeNodeInfo structs...\n");
if (VERBOSE)
@@ -4558,13 +4604,6 @@ void Lowering::CheckCallArg(GenTree* arg)
switch (arg->OperGet())
{
-#if !defined(_TARGET_64BIT_)
- case GT_LONG:
- assert(arg->gtGetOp1()->OperIsPutArg());
- assert(arg->gtGetOp2()->OperIsPutArg());
- break;
-#endif
-
case GT_FIELD_LIST:
{
GenTreeFieldList* list = arg->AsFieldList();
diff --git a/src/jit/lower.h b/src/jit/lower.h
index 57b4127f26..bcc2bafdab 100644
--- a/src/jit/lower.h
+++ b/src/jit/lower.h
@@ -195,6 +195,8 @@ private:
void TreeNodeInfoInitStoreLoc(GenTree* tree);
void TreeNodeInfoInitReturn(GenTree* tree);
void TreeNodeInfoInitShiftRotate(GenTree* tree);
+ void TreeNodeInfoInitPutArgReg(
+ GenTreeUnOp* node, regNumber argReg, TreeNodeInfo& info, bool isVarArgs, bool* callHasFloatRegArgs);
void TreeNodeInfoInitCall(GenTreeCall* call);
void TreeNodeInfoInitCmp(GenTreePtr tree);
void TreeNodeInfoInitStructArg(GenTreePtr structArg);
diff --git a/src/jit/lowerarm.cpp b/src/jit/lowerarm.cpp
index 9792b8a9c6..0701520b0a 100644
--- a/src/jit/lowerarm.cpp
+++ b/src/jit/lowerarm.cpp
@@ -31,189 +31,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "lsra.h"
//------------------------------------------------------------------------
-// LowerStoreLoc: Lower a store of a lclVar
-//
-// Arguments:
-// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR)
-//
-// Notes:
-// This involves:
-// - Widening operations of unsigneds.
-//
-void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
-{
- // Try to widen the ops if they are going into a local var.
- GenTree* op1 = storeLoc->gtGetOp1();
- if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (op1->gtOper == GT_CNS_INT))
- {
- GenTreeIntCon* con = op1->AsIntCon();
- ssize_t ival = con->gtIconVal;
- unsigned varNum = storeLoc->gtLclNum;
- LclVarDsc* varDsc = comp->lvaTable + varNum;
-
- if (varDsc->lvIsSIMDType())
- {
- noway_assert(storeLoc->gtType != TYP_STRUCT);
- }
- unsigned size = genTypeSize(storeLoc);
- // If we are storing a constant into a local variable
- // we extend the size of the store here
- if ((size < 4) && !varTypeIsStruct(varDsc))
- {
- if (!varTypeIsUnsigned(varDsc))
- {
- if (genTypeSize(storeLoc) == 1)
- {
- if ((ival & 0x7f) != ival)
- {
- ival = ival | 0xffffff00;
- }
- }
- else
- {
- assert(genTypeSize(storeLoc) == 2);
- if ((ival & 0x7fff) != ival)
- {
- ival = ival | 0xffff0000;
- }
- }
- }
-
- // A local stack slot is at least 4 bytes in size, regardless of
- // what the local var is typed as, so auto-promote it here
- // unless it is a field of a promoted struct
- // TODO-ARM-CQ: if the field is promoted shouldn't we also be able to do this?
- if (!varDsc->lvIsStructField)
- {
- storeLoc->gtType = TYP_INT;
- con->SetIconValue(ival);
- }
- }
- }
-}
-
-//------------------------------------------------------------------------
-// LowerCast: Lower GT_CAST(srcType, DstType) nodes.
-//
-// Arguments:
-// tree - GT_CAST node to be lowered
-//
-// Return Value:
-// None.
-//
-// Notes:
-// Casts from small int type to float/double are transformed as follows:
-// GT_CAST(byte, float/double) = GT_CAST(GT_CAST(byte, int32), float/double)
-// GT_CAST(sbyte, float/double) = GT_CAST(GT_CAST(sbyte, int32), float/double)
-// GT_CAST(int16, float/double) = GT_CAST(GT_CAST(int16, int32), float/double)
-// GT_CAST(uint16, float/double) = GT_CAST(GT_CAST(uint16, int32), float/double)
-//
-// Similarly casts from float/double to a smaller int type are transformed as follows:
-// GT_CAST(float/double, byte) = GT_CAST(GT_CAST(float/double, int32), byte)
-// GT_CAST(float/double, sbyte) = GT_CAST(GT_CAST(float/double, int32), sbyte)
-// GT_CAST(float/double, int16) = GT_CAST(GT_CAST(double/double, int32), int16)
-// GT_CAST(float/double, uint16) = GT_CAST(GT_CAST(double/double, int32), uint16)
-//
-// Note that for the overflow conversions we still depend on helper calls and
-// don't expect to see them here.
-// i) GT_CAST(float/double, int type with overflow detection)
-//
-void Lowering::LowerCast(GenTree* tree)
-{
- assert(tree->OperGet() == GT_CAST);
-
- JITDUMP("LowerCast for: ");
- DISPNODE(tree);
- JITDUMP("\n");
-
- GenTreePtr op1 = tree->gtOp.gtOp1;
- var_types dstType = tree->CastToType();
- var_types srcType = op1->TypeGet();
- var_types tmpType = TYP_UNDEF;
-
- if (varTypeIsFloating(srcType))
- {
- noway_assert(!tree->gtOverflow());
- }
-
- // Case of src is a small type and dst is a floating point type.
- if (varTypeIsSmall(srcType) && varTypeIsFloating(dstType))
- {
- NYI_ARM("Lowering for cast from small type to float"); // Not tested yet.
- // These conversions can never be overflow detecting ones.
- noway_assert(!tree->gtOverflow());
- tmpType = TYP_INT;
- }
- // case of src is a floating point type and dst is a small type.
- else if (varTypeIsFloating(srcType) && varTypeIsSmall(dstType))
- {
- NYI_ARM("Lowering for cast from float to small type"); // Not tested yet.
- tmpType = TYP_INT;
- }
-
- if (tmpType != TYP_UNDEF)
- {
- GenTreePtr tmp = comp->gtNewCastNode(tmpType, op1, tmpType);
- tmp->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT));
-
- tree->gtFlags &= ~GTF_UNSIGNED;
- tree->gtOp.gtOp1 = tmp;
- BlockRange().InsertAfter(op1, tmp);
- }
-}
-
-//------------------------------------------------------------------------
-// LowerRotate: Lower GT_ROL and GT_ROL nodes.
-//
-// Arguments:
-// tree - the node to lower
-//
-// Return Value:
-// None.
-//
-void Lowering::LowerRotate(GenTreePtr tree)
-{
- if (tree->OperGet() == GT_ROL)
- {
- // There is no ROL instruction on ARM. Convert ROL into ROR.
- GenTreePtr rotatedValue = tree->gtOp.gtOp1;
- unsigned rotatedValueBitSize = genTypeSize(rotatedValue->gtType) * 8;
- GenTreePtr rotateLeftIndexNode = tree->gtOp.gtOp2;
-
- if (rotateLeftIndexNode->IsCnsIntOrI())
- {
- ssize_t rotateLeftIndex = rotateLeftIndexNode->gtIntCon.gtIconVal;
- ssize_t rotateRightIndex = rotatedValueBitSize - rotateLeftIndex;
- rotateLeftIndexNode->gtIntCon.gtIconVal = rotateRightIndex;
- }
- else
- {
- GenTreePtr tmp =
- comp->gtNewOperNode(GT_NEG, genActualType(rotateLeftIndexNode->gtType), rotateLeftIndexNode);
- BlockRange().InsertAfter(rotateLeftIndexNode, tmp);
- tree->gtOp.gtOp2 = tmp;
- }
- tree->ChangeOper(GT_ROR);
- }
-}
-
-//------------------------------------------------------------------------
-// LowerPutArgStk: Lower a GT_PUTARG_STK node
-//
-// Arguments:
-// argNode - a GT_PUTARG_STK node
-//
-// Return Value:
-// None.
-//
-// Notes:
-// There is currently no Lowering required for this on ARM.
-//
-void Lowering::LowerPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info)
-{
-}
-
-//------------------------------------------------------------------------
// IsCallTargetInRange: Can a call target address be encoded in-place?
//
// Return Value:
diff --git a/src/jit/lowerarm64.cpp b/src/jit/lowerarm64.cpp
index f5bc55e10c..b24ed8221c 100644
--- a/src/jit/lowerarm64.cpp
+++ b/src/jit/lowerarm64.cpp
@@ -29,304 +29,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "sideeffects.h"
#include "lower.h"
-//------------------------------------------------------------------------
-// LowerStoreLoc: Lower a store of a lclVar
-//
-// Arguments:
-// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR)
-//
-// Notes:
-// This involves:
-// - Widening operations of unsigneds.
-
-void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
-{
- // Try to widen the ops if they are going into a local var.
- GenTree* op1 = storeLoc->gtGetOp1();
- if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (op1->gtOper == GT_CNS_INT))
- {
- GenTreeIntCon* con = op1->AsIntCon();
- ssize_t ival = con->gtIconVal;
- unsigned varNum = storeLoc->gtLclNum;
- LclVarDsc* varDsc = comp->lvaTable + varNum;
-
- if (varDsc->lvIsSIMDType())
- {
- noway_assert(storeLoc->gtType != TYP_STRUCT);
- }
- unsigned size = genTypeSize(storeLoc);
- // If we are storing a constant into a local variable
- // we extend the size of the store here
- if ((size < 4) && !varTypeIsStruct(varDsc))
- {
- if (!varTypeIsUnsigned(varDsc))
- {
- if (genTypeSize(storeLoc) == 1)
- {
- if ((ival & 0x7f) != ival)
- {
- ival = ival | 0xffffff00;
- }
- }
- else
- {
- assert(genTypeSize(storeLoc) == 2);
- if ((ival & 0x7fff) != ival)
- {
- ival = ival | 0xffff0000;
- }
- }
- }
-
- // A local stack slot is at least 4 bytes in size, regardless of
- // what the local var is typed as, so auto-promote it here
- // unless it is a field of a promoted struct
- // TODO-ARM64-CQ: if the field is promoted shouldn't we also be able to do this?
- if (!varDsc->lvIsStructField)
- {
- storeLoc->gtType = TYP_INT;
- con->SetIconValue(ival);
- }
- }
- }
-}
-
-//------------------------------------------------------------------------
-// LowerBlockStore: Set block store type
-//
-// Arguments:
-// blkNode - The block store node of interest
-//
-// Return Value:
-// None.
-//
-
-void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
-{
- GenTree* dstAddr = blkNode->Addr();
- unsigned size = blkNode->gtBlkSize;
- GenTree* source = blkNode->Data();
- Compiler* compiler = comp;
-
- // Sources are dest address and initVal or source.
- GenTreePtr srcAddrOrFill = nullptr;
- bool isInitBlk = blkNode->OperIsInitBlkOp();
-
- if (!isInitBlk)
- {
- // CopyObj or CopyBlk
- if ((blkNode->OperGet() == GT_STORE_OBJ) && ((blkNode->AsObj()->gtGcPtrCount == 0) || blkNode->gtBlkOpGcUnsafe))
- {
- blkNode->SetOper(GT_STORE_BLK);
- }
- if (source->gtOper == GT_IND)
- {
- srcAddrOrFill = blkNode->Data()->gtGetOp1();
- }
- }
-
- if (isInitBlk)
- {
- GenTreePtr initVal = source;
- if (initVal->OperIsInitVal())
- {
- initVal = initVal->gtGetOp1();
- }
- srcAddrOrFill = initVal;
-
-#if 0
- // TODO-ARM64-CQ: Currently we generate a helper call for every
- // initblk we encounter. Later on we should implement loop unrolling
- // code sequences to improve CQ.
- // For reference see the code in LowerXArch.cpp.
- if ((size != 0) && (size <= INITBLK_UNROLL_LIMIT) && initVal->IsCnsIntOrI())
- {
- // The fill value of an initblk is interpreted to hold a
- // value of (unsigned int8) however a constant of any size
- // may practically reside on the evaluation stack. So extract
- // the lower byte out of the initVal constant and replicate
- // it to a larger constant whose size is sufficient to support
- // the largest width store of the desired inline expansion.
-
- ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF;
- if (size < REGSIZE_BYTES)
- {
- initVal->gtIntCon.gtIconVal = 0x01010101 * fill;
- }
- else
- {
- initVal->gtIntCon.gtIconVal = 0x0101010101010101LL * fill;
- initVal->gtType = TYP_LONG;
- }
- initBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindUnroll;
- }
- else
-#endif // 0
- {
- blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
- }
- }
- else
- {
- // CopyObj or CopyBlk
- // Sources are src and dest and size if not constant.
-
- if (blkNode->OperGet() == GT_STORE_OBJ)
- {
- // CopyObj
-
- GenTreeObj* objNode = blkNode->AsObj();
-
- unsigned slots = objNode->gtSlots;
-
-#ifdef DEBUG
- // CpObj must always have at least one GC-Pointer as a member.
- assert(objNode->gtGcPtrCount > 0);
-
- assert(dstAddr->gtType == TYP_BYREF || dstAddr->gtType == TYP_I_IMPL);
-
- CORINFO_CLASS_HANDLE clsHnd = objNode->gtClass;
- size_t classSize = compiler->info.compCompHnd->getClassSize(clsHnd);
- size_t blkSize = roundUp(classSize, TARGET_POINTER_SIZE);
-
- // Currently, the EE always round up a class data structure so
- // we are not handling the case where we have a non multiple of pointer sized
- // struct. This behavior may change in the future so in order to keeps things correct
- // let's assert it just to be safe. Going forward we should simply
- // handle this case.
- assert(classSize == blkSize);
- assert((blkSize / TARGET_POINTER_SIZE) == slots);
- assert(objNode->HasGCPtr());
-#endif
-
- blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
- }
- else
- {
- // CopyBlk
- short internalIntCount = 0;
- regMaskTP internalIntCandidates = RBM_NONE;
-
-#if 0
- // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size
- // we should unroll the loop to improve CQ.
- // For reference see the code in lowerxarch.cpp.
-
- // TODO-ARM64-CQ: cpblk loop unrolling is currently not implemented.
-
- if ((size != 0) && (size <= INITBLK_UNROLL_LIMIT))
- {
- blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
- }
- else
-#endif // 0
- {
- // In case we have a constant integer this means we went beyond
- // CPBLK_UNROLL_LIMIT bytes of size, still we should never have the case of
- // any GC-Pointers in the src struct.
- blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
- }
- }
- }
-}
-
-/* Lower GT_CAST(srcType, DstType) nodes.
- *
- * Casts from small int type to float/double are transformed as follows:
- * GT_CAST(byte, float/double) = GT_CAST(GT_CAST(byte, int32), float/double)
- * GT_CAST(sbyte, float/double) = GT_CAST(GT_CAST(sbyte, int32), float/double)
- * GT_CAST(int16, float/double) = GT_CAST(GT_CAST(int16, int32), float/double)
- * GT_CAST(uint16, float/double) = GT_CAST(GT_CAST(uint16, int32), float/double)
- *
- * SSE2 conversion instructions operate on signed integers. casts from Uint32/Uint64
- * are morphed as follows by front-end and hence should not be seen here.
- * GT_CAST(uint32, float/double) = GT_CAST(GT_CAST(uint32, long), float/double)
- * GT_CAST(uint64, float) = GT_CAST(GT_CAST(uint64, double), float)
- *
- *
- * Similarly casts from float/double to a smaller int type are transformed as follows:
- * GT_CAST(float/double, byte) = GT_CAST(GT_CAST(float/double, int32), byte)
- * GT_CAST(float/double, sbyte) = GT_CAST(GT_CAST(float/double, int32), sbyte)
- * GT_CAST(float/double, int16) = GT_CAST(GT_CAST(double/double, int32), int16)
- * GT_CAST(float/double, uint16) = GT_CAST(GT_CAST(double/double, int32), uint16)
- *
- * SSE2 has instructions to convert a float/double vlaue into a signed 32/64-bit
- * integer. The above transformations help us to leverage those instructions.
- *
- * Note that for the overflow conversions we still depend on helper calls and
- * don't expect to see them here.
- * i) GT_CAST(float/double, int type with overflow detection)
- *
- */
-void Lowering::LowerCast(GenTree* tree)
-{
- assert(tree->OperGet() == GT_CAST);
-
- GenTreePtr op1 = tree->gtOp.gtOp1;
- var_types dstType = tree->CastToType();
- var_types srcType = op1->TypeGet();
- var_types tmpType = TYP_UNDEF;
-
- // We should never see the following casts as they are expected to be lowered
- // apropriately or converted into helper calls by front-end.
- // srcType = float/double dstType = * and overflow detecting cast
- // Reason: must be converted to a helper call
- //
- if (varTypeIsFloating(srcType))
- {
- noway_assert(!tree->gtOverflow());
- }
-
- // Case of src is a small type and dst is a floating point type.
- if (varTypeIsSmall(srcType) && varTypeIsFloating(dstType))
- {
- // These conversions can never be overflow detecting ones.
- noway_assert(!tree->gtOverflow());
- tmpType = TYP_INT;
- }
- // case of src is a floating point type and dst is a small type.
- else if (varTypeIsFloating(srcType) && varTypeIsSmall(dstType))
- {
- tmpType = TYP_INT;
- }
-
- if (tmpType != TYP_UNDEF)
- {
- GenTreePtr tmp = comp->gtNewCastNode(tmpType, op1, tmpType);
- tmp->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT));
-
- tree->gtFlags &= ~GTF_UNSIGNED;
- tree->gtOp.gtOp1 = tmp;
- BlockRange().InsertAfter(op1, tmp);
- }
-}
-
-void Lowering::LowerRotate(GenTreePtr tree)
-{
- if (tree->OperGet() == GT_ROL)
- {
- // There is no ROL instruction on ARM. Convert ROL into ROR.
- GenTreePtr rotatedValue = tree->gtOp.gtOp1;
- unsigned rotatedValueBitSize = genTypeSize(rotatedValue->gtType) * 8;
- GenTreePtr rotateLeftIndexNode = tree->gtOp.gtOp2;
-
- if (rotateLeftIndexNode->IsCnsIntOrI())
- {
- ssize_t rotateLeftIndex = rotateLeftIndexNode->gtIntCon.gtIconVal;
- ssize_t rotateRightIndex = rotatedValueBitSize - rotateLeftIndex;
- rotateLeftIndexNode->gtIntCon.gtIconVal = rotateRightIndex;
- }
- else
- {
- GenTreePtr tmp =
- comp->gtNewOperNode(GT_NEG, genActualType(rotateLeftIndexNode->gtType), rotateLeftIndexNode);
- BlockRange().InsertAfter(rotateLeftIndexNode, tmp);
- tree->gtOp.gtOp2 = tmp;
- }
- tree->ChangeOper(GT_ROR);
- }
-}
-
// returns true if the tree can use the read-modify-write memory instruction form
bool Lowering::isRMWRegOper(GenTreePtr tree)
{
diff --git a/src/jit/lowerarmarch.cpp b/src/jit/lowerarmarch.cpp
new file mode 100644
index 0000000000..4ff3552eb0
--- /dev/null
+++ b/src/jit/lowerarmarch.cpp
@@ -0,0 +1,346 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Lowering for ARM and ARM64 common code XX
+XX XX
+XX This encapsulates common logic for lowering trees for the ARM and ARM64 XX
+XX architectures. For a more detailed view of what is lowering, please XX
+XX take a look at Lower.cpp XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+
+#ifdef _TARGET_ARMARCH_ // This file is ONLY used for ARM and ARM64 architectures
+
+#include "jit.h"
+#include "sideeffects.h"
+#include "lower.h"
+#include "lsra.h"
+
+//------------------------------------------------------------------------
+// LowerStoreLoc: Lower a store of a lclVar
+//
+// Arguments:
+// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR)
+//
+// Notes:
+// This involves:
+// - Widening operations of unsigneds.
+//
+void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
+{
+ // Try to widen the ops if they are going into a local var.
+ GenTree* op1 = storeLoc->gtGetOp1();
+ if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (op1->gtOper == GT_CNS_INT))
+ {
+ GenTreeIntCon* con = op1->AsIntCon();
+ ssize_t ival = con->gtIconVal;
+ unsigned varNum = storeLoc->gtLclNum;
+ LclVarDsc* varDsc = comp->lvaTable + varNum;
+
+ if (varDsc->lvIsSIMDType())
+ {
+ noway_assert(storeLoc->gtType != TYP_STRUCT);
+ }
+ unsigned size = genTypeSize(storeLoc);
+ // If we are storing a constant into a local variable
+ // we extend the size of the store here
+ if ((size < 4) && !varTypeIsStruct(varDsc))
+ {
+ if (!varTypeIsUnsigned(varDsc))
+ {
+ if (genTypeSize(storeLoc) == 1)
+ {
+ if ((ival & 0x7f) != ival)
+ {
+ ival = ival | 0xffffff00;
+ }
+ }
+ else
+ {
+ assert(genTypeSize(storeLoc) == 2);
+ if ((ival & 0x7fff) != ival)
+ {
+ ival = ival | 0xffff0000;
+ }
+ }
+ }
+
+ // A local stack slot is at least 4 bytes in size, regardless of
+ // what the local var is typed as, so auto-promote it here
+ // unless it is a field of a promoted struct
+ // TODO-CQ: if the field is promoted shouldn't we also be able to do this?
+ if (!varDsc->lvIsStructField)
+ {
+ storeLoc->gtType = TYP_INT;
+ con->SetIconValue(ival);
+ }
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// LowerBlockStore: Set block store type
+//
+// Arguments:
+// blkNode - The block store node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
+{
+ GenTree* dstAddr = blkNode->Addr();
+ unsigned size = blkNode->gtBlkSize;
+ GenTree* source = blkNode->Data();
+ Compiler* compiler = comp;
+
+ // Sources are dest address and initVal or source.
+ GenTreePtr srcAddrOrFill = nullptr;
+ bool isInitBlk = blkNode->OperIsInitBlkOp();
+
+ if (!isInitBlk)
+ {
+ // CopyObj or CopyBlk
+ if ((blkNode->OperGet() == GT_STORE_OBJ) && ((blkNode->AsObj()->gtGcPtrCount == 0) || blkNode->gtBlkOpGcUnsafe))
+ {
+ blkNode->SetOper(GT_STORE_BLK);
+ }
+ if (source->gtOper == GT_IND)
+ {
+ srcAddrOrFill = blkNode->Data()->gtGetOp1();
+ }
+ }
+
+ if (isInitBlk)
+ {
+ GenTreePtr initVal = source;
+ if (initVal->OperIsInitVal())
+ {
+ initVal = initVal->gtGetOp1();
+ }
+ srcAddrOrFill = initVal;
+
+#ifdef _TARGET_ARM64_
+ if ((size != 0) && (size <= INITBLK_UNROLL_LIMIT) && initVal->IsCnsIntOrI())
+ {
+ // TODO-ARM-CQ: Currently we generate a helper call for every
+ // initblk we encounter. Later on we should implement loop unrolling
+ // code sequences to improve CQ.
+ // For reference see the code in LowerXArch.cpp.
+ NYI_ARM("initblk loop unrolling is currently not implemented.");
+
+ // The fill value of an initblk is interpreted to hold a
+ // value of (unsigned int8) however a constant of any size
+ // may practically reside on the evaluation stack. So extract
+ // the lower byte out of the initVal constant and replicate
+ // it to a larger constant whose size is sufficient to support
+ // the largest width store of the desired inline expansion.
+
+ ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF;
+ if (size < REGSIZE_BYTES)
+ {
+ initVal->gtIntCon.gtIconVal = 0x01010101 * fill;
+ }
+ else
+ {
+ initVal->gtIntCon.gtIconVal = 0x0101010101010101LL * fill;
+ initVal->gtType = TYP_LONG;
+ }
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+ }
+ else
+#endif // _TARGET_ARM64_
+ {
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
+ }
+ }
+ else
+ {
+ // CopyObj or CopyBlk
+ // Sources are src and dest and size if not constant.
+
+ if (blkNode->OperGet() == GT_STORE_OBJ)
+ {
+ // CopyObj
+
+ NYI_ARM("Lowering for GT_STORE_OBJ isn't implemented");
+
+#ifdef _TARGET_ARM64_
+
+ GenTreeObj* objNode = blkNode->AsObj();
+
+ unsigned slots = objNode->gtSlots;
+
+#ifdef DEBUG
+ // CpObj must always have at least one GC-Pointer as a member.
+ assert(objNode->gtGcPtrCount > 0);
+
+ assert(dstAddr->gtType == TYP_BYREF || dstAddr->gtType == TYP_I_IMPL);
+
+ CORINFO_CLASS_HANDLE clsHnd = objNode->gtClass;
+ size_t classSize = compiler->info.compCompHnd->getClassSize(clsHnd);
+ size_t blkSize = roundUp(classSize, TARGET_POINTER_SIZE);
+
+ // Currently, the EE always round up a class data structure so
+ // we are not handling the case where we have a non multiple of pointer sized
+ // struct. This behavior may change in the future so in order to keeps things correct
+ // let's assert it just to be safe. Going forward we should simply
+ // handle this case.
+ assert(classSize == blkSize);
+ assert((blkSize / TARGET_POINTER_SIZE) == slots);
+ assert(objNode->HasGCPtr());
+#endif
+
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+
+#endif // _TARGET_ARM64_
+ }
+ else
+ {
+ // CopyBlk
+ short internalIntCount = 0;
+ regMaskTP internalIntCandidates = RBM_NONE;
+
+#ifdef _TARGET_ARM64_
+ // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size
+ // we should unroll the loop to improve CQ.
+ // For reference see the code in lowerxarch.cpp.
+ // TODO-ARM-CQ: cpblk loop unrolling is currently not implemented.
+
+ if ((size != 0) && (size <= INITBLK_UNROLL_LIMIT))
+ {
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+ }
+ else
+#endif // _TARGET_ARM64_
+ {
+ // In case we have a constant integer this means we went beyond
+ // CPBLK_UNROLL_LIMIT bytes of size, still we should never have the case of
+ // any GC-Pointers in the src struct.
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
+ }
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// LowerCast: Lower GT_CAST(srcType, DstType) nodes.
+//
+// Arguments:
+// tree - GT_CAST node to be lowered
+//
+// Return Value:
+// None.
+//
+// Notes:
+// Casts from small int type to float/double are transformed as follows:
+// GT_CAST(byte, float/double) = GT_CAST(GT_CAST(byte, int32), float/double)
+// GT_CAST(sbyte, float/double) = GT_CAST(GT_CAST(sbyte, int32), float/double)
+// GT_CAST(int16, float/double) = GT_CAST(GT_CAST(int16, int32), float/double)
+// GT_CAST(uint16, float/double) = GT_CAST(GT_CAST(uint16, int32), float/double)
+//
+// Similarly casts from float/double to a smaller int type are transformed as follows:
+// GT_CAST(float/double, byte) = GT_CAST(GT_CAST(float/double, int32), byte)
+// GT_CAST(float/double, sbyte) = GT_CAST(GT_CAST(float/double, int32), sbyte)
+// GT_CAST(float/double, int16) = GT_CAST(GT_CAST(double/double, int32), int16)
+// GT_CAST(float/double, uint16) = GT_CAST(GT_CAST(double/double, int32), uint16)
+//
+// Note that for the overflow conversions we still depend on helper calls and
+// don't expect to see them here.
+// i) GT_CAST(float/double, int type with overflow detection)
+//
+void Lowering::LowerCast(GenTree* tree)
+{
+ assert(tree->OperGet() == GT_CAST);
+
+ JITDUMP("LowerCast for: ");
+ DISPNODE(tree);
+ JITDUMP("\n");
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ var_types dstType = tree->CastToType();
+ var_types srcType = op1->TypeGet();
+ var_types tmpType = TYP_UNDEF;
+
+ if (varTypeIsFloating(srcType))
+ {
+ noway_assert(!tree->gtOverflow());
+ }
+
+ // Case of src is a small type and dst is a floating point type.
+ if (varTypeIsSmall(srcType) && varTypeIsFloating(dstType))
+ {
+ NYI_ARM("Lowering for cast from small type to float"); // Not tested yet.
+ // These conversions can never be overflow detecting ones.
+ noway_assert(!tree->gtOverflow());
+ tmpType = TYP_INT;
+ }
+ // case of src is a floating point type and dst is a small type.
+ else if (varTypeIsFloating(srcType) && varTypeIsSmall(dstType))
+ {
+ NYI_ARM("Lowering for cast from float to small type"); // Not tested yet.
+ tmpType = TYP_INT;
+ }
+
+ if (tmpType != TYP_UNDEF)
+ {
+ GenTreePtr tmp = comp->gtNewCastNode(tmpType, op1, tmpType);
+ tmp->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT));
+
+ tree->gtFlags &= ~GTF_UNSIGNED;
+ tree->gtOp.gtOp1 = tmp;
+ BlockRange().InsertAfter(op1, tmp);
+ }
+}
+
+//------------------------------------------------------------------------
+// LowerRotate: Lower GT_ROL and GT_ROL nodes.
+//
+// Arguments:
+// tree - the node to lower
+//
+// Return Value:
+// None.
+//
+void Lowering::LowerRotate(GenTreePtr tree)
+{
+ if (tree->OperGet() == GT_ROL)
+ {
+ // There is no ROL instruction on ARM. Convert ROL into ROR.
+ GenTreePtr rotatedValue = tree->gtOp.gtOp1;
+ unsigned rotatedValueBitSize = genTypeSize(rotatedValue->gtType) * 8;
+ GenTreePtr rotateLeftIndexNode = tree->gtOp.gtOp2;
+
+ if (rotateLeftIndexNode->IsCnsIntOrI())
+ {
+ ssize_t rotateLeftIndex = rotateLeftIndexNode->gtIntCon.gtIconVal;
+ ssize_t rotateRightIndex = rotatedValueBitSize - rotateLeftIndex;
+ rotateLeftIndexNode->gtIntCon.gtIconVal = rotateRightIndex;
+ }
+ else
+ {
+ GenTreePtr tmp =
+ comp->gtNewOperNode(GT_NEG, genActualType(rotateLeftIndexNode->gtType), rotateLeftIndexNode);
+ BlockRange().InsertAfter(rotateLeftIndexNode, tmp);
+ tree->gtOp.gtOp2 = tmp;
+ }
+ tree->ChangeOper(GT_ROR);
+ }
+}
+
+#endif // _TARGET_ARMARCH_
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp
index ac76e29364..e7c1c839d1 100644
--- a/src/jit/lsra.cpp
+++ b/src/jit/lsra.cpp
@@ -39,9 +39,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Overview (doLinearScan):
- Walk all blocks, building intervals and RefPositions (buildIntervals)
- - Traverse the RefPositions, marking last uses (setLastUses)
- - Note that this is necessary because the execution order doesn't accurately reflect use order.
- There is a "TODO-Throughput" to eliminate this.
- Allocate registers (allocateRegisters)
- Annotate nodes with register assignments (resolveRegisters)
- Add move nodes as needed to resolve conflicting register
@@ -723,12 +720,25 @@ void LinearScan::associateRefPosWithInterval(RefPosition* rp)
applyCalleeSaveHeuristics(rp);
- // Ensure that we have consistent def/use on SDSU temps.
- // However, in the case of a non-commutative rmw def, we must avoid over-constraining
- // the def, so don't propagate a single-register restriction from the consumer to the producer
+ if (theInterval->isLocalVar)
+ {
+ if (RefTypeIsUse(rp->refType))
+ {
+ RefPosition* const prevRP = theInterval->recentRefPosition;
+ if ((prevRP != nullptr) && (prevRP->bbNum == rp->bbNum))
+ {
+ prevRP->lastUse = false;
+ }
+ }
- if (RefTypeIsUse(rp->refType) && !theInterval->isLocalVar)
+ rp->lastUse = (rp->refType != RefTypeExpUse) && (rp->refType != RefTypeParamDef) &&
+ (rp->refType != RefTypeZeroInit) && !extendLifetimes();
+ }
+ else if (rp->refType == RefTypeUse)
{
+ // Ensure that we have consistent def/use on SDSU temps.
+ // However, in the case of a non-commutative rmw def, we must avoid over-constraining
+ // the def, so don't propagate a single-register restriction from the consumer to the producer
RefPosition* prevRefPosition = theInterval->recentRefPosition;
assert(prevRefPosition != nullptr && theInterval->firstRefPosition == prevRefPosition);
regMaskTP prevAssignment = prevRefPosition->registerAssignment;
@@ -744,6 +754,8 @@ void LinearScan::associateRefPosWithInterval(RefPosition* rp)
{
theInterval->hasConflictingDefUse = true;
}
+
+ rp->lastUse = true;
}
}
@@ -2486,16 +2498,15 @@ RefType refTypeForLocalRefNode(GenTree* node)
// being set by dataflow analysis. It is necessary to do it this way only because the execution
// order wasn't strictly correct.
-void LinearScan::setLastUses(BasicBlock* block)
-{
#ifdef DEBUG
+void LinearScan::checkLastUses(BasicBlock* block)
+{
if (VERBOSE)
{
- JITDUMP("\n\nCALCULATING LAST USES for block %u, liveout=", block->bbNum);
+ JITDUMP("\n\nCHECKING LAST USES for block %u, liveout=", block->bbNum);
dumpConvertedVarSet(compiler, block->bbLiveOut);
JITDUMP("\n==============================\n");
}
-#endif // DEBUG
unsigned keepAliveVarNum = BAD_VAR_NUM;
if (compiler->lvaKeepAliveAndReportThis())
@@ -2513,8 +2524,8 @@ void LinearScan::setLastUses(BasicBlock* block)
VARSET_TP VARSET_INIT(compiler, temp, block->bbLiveOut);
+ bool foundDiff = false;
auto currentRefPosition = refPositions.rbegin();
-
while (currentRefPosition->refType != RefTypeBB)
{
// We should never see ParamDefs or ZeroInits within a basic block.
@@ -2523,42 +2534,46 @@ void LinearScan::setLastUses(BasicBlock* block)
{
unsigned varNum = currentRefPosition->getInterval()->varNum;
unsigned varIndex = currentRefPosition->getInterval()->getVarIndex(compiler);
+
+ LsraLocation loc = currentRefPosition->nodeLocation;
+
// We should always have a tree node for a localVar, except for the "special" RefPositions.
GenTreePtr tree = currentRefPosition->treeNode;
assert(tree != nullptr || currentRefPosition->refType == RefTypeExpUse ||
currentRefPosition->refType == RefTypeDummyDef);
+
if (!VarSetOps::IsMember(compiler, temp, varIndex) && varNum != keepAliveVarNum)
{
- // There was no exposed use, so this is a
- // "last use" (and we mark it thus even if it's a def)
+ // There was no exposed use, so this is a "last use" (and we mark it thus even if it's a def)
- if (tree != nullptr)
+ if (extendLifetimes())
{
- tree->gtFlags |= GTF_VAR_DEATH;
- }
- LsraLocation loc = currentRefPosition->nodeLocation;
-#ifdef DEBUG
- if (getLsraExtendLifeTimes())
- {
- JITDUMP("last use of V%02u @%u (not marked as last use for LSRA due to extendLifetimes stress "
- "option)\n",
- compiler->lvaTrackedToVarNum[varIndex], loc);
+ // NOTE: this is a bit of a hack. When extending lifetimes, the "last use" bit will be clear.
+ // This bit, however, would normally be used during resolveLocalRef to set the value of
+ // GTF_VAR_DEATH on the node for a ref position. If this bit is not set correctly even when
+ // extending lifetimes, the code generator will assert as it expects to have accurate last
+ // use information. To avoid these asserts, set the GTF_VAR_DEATH bit here.
+ if (tree != nullptr)
+ {
+ tree->gtFlags |= GTF_VAR_DEATH;
+ }
}
- else
-#endif // DEBUG
+ else if (!currentRefPosition->lastUse)
{
- JITDUMP("last use of V%02u @%u\n", compiler->lvaTrackedToVarNum[varIndex], loc);
- currentRefPosition->lastUse = true;
+ JITDUMP("missing expected last use of V%02u @%u\n", compiler->lvaTrackedToVarNum[varIndex], loc);
+ foundDiff = true;
}
VarSetOps::AddElemD(compiler, temp, varIndex);
}
- else
+ else if (currentRefPosition->lastUse)
{
- currentRefPosition->lastUse = false;
- if (tree != nullptr)
- {
- tree->gtFlags &= ~GTF_VAR_DEATH;
- }
+ JITDUMP("unexpected last use of V%02u @%u\n", compiler->lvaTrackedToVarNum[varIndex], loc);
+ foundDiff = true;
+ }
+ else if (extendLifetimes() && tree != nullptr)
+ {
+ // NOTE: see the comment above re: the extendLifetimes hack.
+ tree->gtFlags &= ~GTF_VAR_DEATH;
}
if (currentRefPosition->refType == RefTypeDef || currentRefPosition->refType == RefTypeDummyDef)
@@ -2566,15 +2581,14 @@ void LinearScan::setLastUses(BasicBlock* block)
VarSetOps::RemoveElemD(compiler, temp, varIndex);
}
}
+
assert(currentRefPosition != refPositions.rend());
++currentRefPosition;
}
-#ifdef DEBUG
VARSET_TP VARSET_INIT(compiler, temp2, block->bbLiveIn);
VarSetOps::DiffD(compiler, temp2, temp);
VarSetOps::DiffD(compiler, temp, block->bbLiveIn);
- bool foundDiff = false;
{
VARSET_ITER_INIT(compiler, iter, temp, varIndex);
@@ -2603,8 +2617,8 @@ void LinearScan::setLastUses(BasicBlock* block)
}
assert(!foundDiff);
-#endif // DEBUG
}
+#endif // DEBUG
void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse)
{
@@ -2758,6 +2772,8 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree)
needFloatTmpForFPCall = true;
}
}
+#endif // _TARGET_X86_
+#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
if (tree->IsHelperCall())
{
GenTreeCall* call = tree->AsCall();
@@ -2765,7 +2781,7 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree)
killMask = compiler->compHelperCallKillSet(helpFunc);
}
else
-#endif // _TARGET_X86_
+#endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
{
// if there is no FP used, we can ignore the FP kills
if (compiler->compFloatingPointUsed)
@@ -2782,9 +2798,6 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree)
if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
{
killMask = RBM_CALLEE_TRASH_NOGC;
-#if !NOGC_WRITE_BARRIERS && (defined(_TARGET_ARM_) || defined(_TARGET_AMD64_))
- killMask |= (RBM_ARG_0 | RBM_ARG_1);
-#endif // !NOGC_WRITE_BARRIERS && (defined(_TARGET_ARM_) || defined(_TARGET_AMD64_))
}
break;
@@ -3030,7 +3043,6 @@ void LinearScan::buildInternalRegisterUsesForNode(GenTree* tree,
{
RefPosition* newest = newRefPosition(defs[i]->getInterval(), currentLoc, RefTypeUse, tree, mask,
0 DEBUG_ARG(minRegCandidateCount));
- newest->lastUse = true;
if (tree->gtLsraInfo.isInternalRegDelayFree)
{
@@ -3549,8 +3561,6 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree,
}
RefPosition* pos = newRefPosition(interval, currentLoc, RefTypeUse, tree, candidates);
pos->isLocalDefUse = true;
- bool isLastUse = ((tree->gtFlags & GTF_VAR_DEATH) != 0);
- pos->lastUse = isLastUse;
pos->setAllocateIfProfitable(tree->IsRegOptional());
DBEXEC(VERBOSE, pos->dump());
return;
@@ -3566,6 +3576,39 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree,
}
#endif // DEBUG
+ const bool isContainedNode = !info.isLocalDefUse && consume == 0 && produce == 0 && tree->canBeContained();
+ if (isContainedNode)
+ {
+ assert(info.internalIntCount == 0);
+ assert(info.internalFloatCount == 0);
+
+ // Contained nodes map to the concatenated lists of their operands.
+ LocationInfoList locationInfoList;
+ for (GenTree* op : tree->Operands())
+ {
+ if (!op->gtLsraInfo.definesAnyRegisters)
+ {
+ assert(ComputeOperandDstCount(op) == 0);
+ continue;
+ }
+
+ LocationInfoList operandList;
+ bool removed = operandToLocationInfoMap.TryRemove(op, &operandList);
+ assert(removed);
+
+ locationInfoList.Append(operandList);
+ }
+
+ if (!locationInfoList.IsEmpty())
+ {
+ bool added = operandToLocationInfoMap.AddOrUpdate(tree, locationInfoList);
+ assert(added);
+ tree->gtLsraInfo.definesAnyRegisters = true;
+ }
+
+ return;
+ }
+
// Handle the case of local variable assignment
Interval* varDefInterval = nullptr;
RefType defRefType = RefTypeDef;
@@ -3851,31 +3894,28 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree,
}
#endif // FEATURE_SIMD
- bool delayRegFree = (hasDelayFreeSrc && useNode->gtLsraInfo.isDelayFree);
if (useNode->gtLsraInfo.isTgtPref)
{
prefSrcInterval = i;
}
- bool regOptionalAtUse = useNode->IsRegOptional();
- bool isLastUse = true;
- if (isCandidateLocalRef(useNode))
+ regMaskTP fixedAssignment = fixedCandidateMask(type, candidates);
+ if (fixedAssignment != RBM_NONE)
{
- isLastUse = ((useNode->gtFlags & GTF_VAR_DEATH) != 0);
+ candidates = fixedAssignment;
}
- else
+
+ const bool regOptionalAtUse = useNode->IsRegOptional();
+ const bool delayRegFree = (hasDelayFreeSrc && useNode->gtLsraInfo.isDelayFree);
+
+ assert(isCandidateLocalRef(useNode) == i->isLocalVar);
+ if (!i->isLocalVar)
{
// For non-localVar uses we record nothing,
// as nothing needs to be written back to the tree.
useNode = nullptr;
}
- regMaskTP fixedAssignment = fixedCandidateMask(type, candidates);
- if (fixedAssignment != RBM_NONE)
- {
- candidates = fixedAssignment;
- }
-
#ifdef DEBUG
// If delayRegFree, then Use will interfere with the destination of
// the consuming node. Therefore, we also need add the kill set of
@@ -3936,11 +3976,6 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree,
pos->delayRegFree = true;
}
- if (isLastUse)
- {
- pos->lastUse = true;
- }
-
if (regOptionalAtUse)
{
pos->setAllocateIfProfitable(1);
@@ -3973,8 +4008,6 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree,
#if defined(_TARGET_AMD64_)
// Multi-reg call node is the only node that could produce multi-reg value
assert(produce <= 1 || (tree->IsMultiRegCall() && produce == MAX_RET_REG_COUNT));
-#elif defined(_TARGET_ARM_)
- assert(!varTypeIsMultiReg(tree->TypeGet()));
#endif // _TARGET_xxx_
// Add kill positions before adding def positions
@@ -4074,27 +4107,6 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree,
buildUpperVectorRestoreRefPositions(tree, defLocation, liveLargeVectors);
#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- bool isContainedNode = !noAdd && consume == 0 && produce == 0 &&
- (tree->OperIsFieldListHead() || ((tree->TypeGet() != TYP_VOID) && !tree->OperIsStore()));
- if (isContainedNode)
- {
- // Contained nodes map to the concatenated lists of their operands.
- for (GenTree* op : tree->Operands())
- {
- if (!op->gtLsraInfo.definesAnyRegisters)
- {
- assert(ComputeOperandDstCount(op) == 0);
- continue;
- }
-
- LocationInfoList operandList;
- bool removed = operandToLocationInfoMap.TryRemove(op, &operandList);
- assert(removed);
-
- locationInfoList.Append(operandList);
- }
- }
-
if (!locationInfoList.IsEmpty())
{
bool added = operandToLocationInfoMap.AddOrUpdate(tree, locationInfoList);
@@ -4716,15 +4728,27 @@ void LinearScan::buildIntervals()
JITDUMP("\n");
}
- // Identify the last uses of each variable, except in the case of MinOpts, where all vars
- // are kept live everywhere.
-
- if (!compiler->opts.MinOpts())
+ // Clear the "last use" flag on any vars that are live-out from this block.
{
- setLastUses(block);
+ VARSET_ITER_INIT(compiler, iter, block->bbLiveOut, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ LclVarDsc* const varDsc = &compiler->lvaTable[varNum];
+ if (isCandidateVar(varDsc))
+ {
+ RefPosition* const lastRP = getIntervalForLocalVar(varNum)->lastRefPosition;
+ if ((lastRP != nullptr) && (lastRP->bbNum == block->bbNum))
+ {
+ lastRP->lastUse = false;
+ }
+ }
+ }
}
#ifdef DEBUG
+ checkLastUses(block);
+
if (VERBOSE)
{
printf("use: ");
@@ -7669,6 +7693,22 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosi
interval->recentRefPosition = currentRefPosition;
LclVarDsc* varDsc = interval->getLocalVar(compiler);
+ // NOTE: we set the GTF_VAR_DEATH flag here unless we are extending lifetimes, in which case we write
+ // this bit in checkLastUses. This is a bit of a hack, but is necessary because codegen requires
+ // accurate last use info that is not reflected in the lastUse bit on ref positions when we are extending
+ // lifetimes. See also the comments in checkLastUses.
+ if ((treeNode != nullptr) && !extendLifetimes())
+ {
+ if (currentRefPosition->lastUse)
+ {
+ treeNode->gtFlags |= GTF_VAR_DEATH;
+ }
+ else
+ {
+ treeNode->gtFlags &= ~GTF_VAR_DEATH;
+ }
+ }
+
if (currentRefPosition->registerAssignment == RBM_NONE)
{
assert(!currentRefPosition->RequiresRegister());
diff --git a/src/jit/lsra.h b/src/jit/lsra.h
index c8a3fb4e24..b6f83792a7 100644
--- a/src/jit/lsra.h
+++ b/src/jit/lsra.h
@@ -681,7 +681,9 @@ private:
void buildPhysRegRecords();
- void setLastUses(BasicBlock* block);
+#ifdef DEBUG
+ void checkLastUses(BasicBlock* block);
+#endif // DEBUG
void setFrameType();
@@ -744,6 +746,9 @@ private:
TreeNodeInfo& info = tree->gtLsraInfo;
info.srcCount = 0;
info.dstCount = 0;
+
+ info.internalIntCount = 0;
+ info.internalFloatCount = 0;
}
inline bool isLocalDefUse(GenTree* tree)
diff --git a/src/jit/lsraarm.cpp b/src/jit/lsraarm.cpp
index 57f0096b35..e35e57908a 100644
--- a/src/jit/lsraarm.cpp
+++ b/src/jit/lsraarm.cpp
@@ -30,251 +30,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "lsra.h"
//------------------------------------------------------------------------
-// TreeNodeInfoInitStoreLoc: Lower a store of a lclVar
-//
-// Arguments:
-// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR)
-//
-// Notes:
-// This involves:
-// - Setting the appropriate candidates for a store of a multi-reg call return value.
-// - Handling of contained immediates and widening operations of unsigneds.
-//
-void Lowering::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc)
-{
- TreeNodeInfo* info = &(storeLoc->gtLsraInfo);
-
- // Is this the case of var = call where call is returning
- // a value in multiple return registers?
- GenTree* op1 = storeLoc->gtGetOp1();
- if (op1->IsMultiRegCall())
- {
- // backend expects to see this case only for store lclvar.
- assert(storeLoc->OperGet() == GT_STORE_LCL_VAR);
-
- // srcCount = number of registers in which the value is returned by call
- GenTreeCall* call = op1->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- info->srcCount = retTypeDesc->GetReturnRegCount();
-
- // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1
- regMaskTP srcCandidates = m_lsra->allMultiRegCallNodeRegs(call);
- op1->gtLsraInfo.setSrcCandidates(m_lsra, srcCandidates);
- return;
- }
-
- CheckImmedAndMakeContained(storeLoc, op1);
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitCmp: Lower a GT comparison node.
-//
-// Arguments:
-// tree - the node to lower
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
- info->srcCount = 2;
- info->dstCount = 1;
- CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitGCWriteBarrier: GC lowering helper.
-//
-// Arguments:
-// tree - the node to lower
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitGCWriteBarrier(GenTree* tree)
-{
- GenTreePtr dst = tree;
- GenTreePtr addr = tree->gtOp.gtOp1;
- GenTreePtr src = tree->gtOp.gtOp2;
-
- if (addr->OperGet() == GT_LEA)
- {
- // In the case where we are doing a helper assignment, if the dst
- // is an indir through an lea, we need to actually instantiate the
- // lea in a register
- GenTreeAddrMode* lea = addr->AsAddrMode();
-
- short leaSrcCount = 0;
- if (lea->Base() != nullptr)
- {
- leaSrcCount++;
- }
- if (lea->Index() != nullptr)
- {
- leaSrcCount++;
- }
- lea->gtLsraInfo.srcCount = leaSrcCount;
- lea->gtLsraInfo.dstCount = 1;
- }
-
-#if NOGC_WRITE_BARRIERS
- NYI_ARM("NOGC_WRITE_BARRIERS");
-#else
- // For the standard JIT Helper calls
- // op1 goes into REG_ARG_0 and
- // op2 goes into REG_ARG_1
- //
- addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_0);
- src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_1);
-#endif // NOGC_WRITE_BARRIERS
-
- // Both src and dst must reside in a register, which they should since we haven't set
- // either of them as contained.
- assert(addr->gtLsraInfo.dstCount == 1);
- assert(src->gtLsraInfo.dstCount == 1);
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitIndir: Specify register requirements for address expression
-// of an indirection operation.
-//
-// Arguments:
-// indirTree - GT_IND, GT_STOREIND, block node or GT_NULLCHECK gentree node
-//
-void Lowering::TreeNodeInfoInitIndir(GenTreePtr indirTree)
-{
- assert(indirTree->OperIsIndir());
- // If this is the rhs of a block copy (i.e. non-enregisterable struct),
- // it has no register requirements.
- if (indirTree->TypeGet() == TYP_STRUCT)
- {
- return;
- }
-
- GenTreePtr addr = indirTree->gtGetOp1();
- TreeNodeInfo* info = &(indirTree->gtLsraInfo);
-
- GenTreePtr base = nullptr;
- GenTreePtr index = nullptr;
- unsigned cns = 0;
- unsigned mul;
- bool rev;
- bool modifiedSources = false;
-
- if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr))
- {
- GenTreeAddrMode* lea = addr->AsAddrMode();
- base = lea->Base();
- index = lea->Index();
- cns = lea->gtOffset;
-
- m_lsra->clearOperandCounts(addr);
- // The srcCount is decremented because addr is now "contained",
- // then we account for the base and index below, if they are non-null.
- info->srcCount--;
- }
- else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) &&
- !(modifiedSources = AreSourcesPossiblyModifiedLocals(indirTree, base, index)))
- {
- // An addressing mode will be constructed that may cause some
- // nodes to not need a register, and cause others' lifetimes to be extended
- // to the GT_IND or even its parent if it's an assignment
-
- assert(base != addr);
- m_lsra->clearOperandCounts(addr);
-
- GenTreePtr arrLength = nullptr;
-
- // Traverse the computation below GT_IND to find the operands
- // for the addressing mode, marking the various constants and
- // intermediate results as not consuming/producing.
- // If the traversal were more complex, we might consider using
- // a traversal function, but the addressing mode is only made
- // up of simple arithmetic operators, and the code generator
- // only traverses one leg of each node.
-
- bool foundBase = (base == nullptr);
- bool foundIndex = (index == nullptr);
- GenTreePtr nextChild = nullptr;
- for (GenTreePtr child = addr; child != nullptr && !child->OperIsLeaf(); child = nextChild)
- {
- nextChild = nullptr;
- GenTreePtr op1 = child->gtOp.gtOp1;
- GenTreePtr op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr;
-
- if (op1 == base)
- {
- foundBase = true;
- }
- else if (op1 == index)
- {
- foundIndex = true;
- }
- else
- {
- m_lsra->clearOperandCounts(op1);
- if (!op1->OperIsLeaf())
- {
- nextChild = op1;
- }
- }
-
- if (op2 != nullptr)
- {
- if (op2 == base)
- {
- foundBase = true;
- }
- else if (op2 == index)
- {
- foundIndex = true;
- }
- else
- {
- m_lsra->clearOperandCounts(op2);
- if (!op2->OperIsLeaf())
- {
- assert(nextChild == nullptr);
- nextChild = op2;
- }
- }
- }
- }
- assert(foundBase && foundIndex);
- info->srcCount--; // it gets incremented below.
- }
- else if (addr->gtOper == GT_ARR_ELEM)
- {
- // The GT_ARR_ELEM consumes all the indices and produces the offset.
- // The array object lives until the mem access.
- // We also consume the target register to which the address is
- // computed
-
- info->srcCount++;
- assert(addr->gtLsraInfo.srcCount >= 2);
- addr->gtLsraInfo.srcCount -= 1;
- }
- else
- {
- // it is nothing but a plain indir
- info->srcCount--; // base gets added in below
- base = addr;
- }
-
- if (base != nullptr)
- {
- info->srcCount++;
- }
-
- if (index != nullptr && !modifiedSources)
- {
- info->srcCount++;
- }
-}
-
-//------------------------------------------------------------------------
// TreeNodeInfoInitReturn: Set the NodeInfo for a GT_RETURN.
//
// Arguments:
@@ -289,375 +44,158 @@ void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
LinearScan* l = m_lsra;
Compiler* compiler = comp;
- GenTree* op1 = tree->gtGetOp1();
- regMaskTP useCandidates = RBM_NONE;
-
- info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
- info->dstCount = 0;
-
- if (varTypeIsStruct(tree))
- {
- NYI_ARM("struct return");
- }
- else
- {
- // Non-struct type return - determine useCandidates
- switch (tree->TypeGet())
- {
- case TYP_VOID:
- useCandidates = RBM_NONE;
- break;
- case TYP_FLOAT:
- useCandidates = RBM_FLOATRET;
- break;
- case TYP_DOUBLE:
- useCandidates = RBM_DOUBLERET;
- break;
- case TYP_LONG:
- useCandidates = RBM_LNGRET;
- break;
- default:
- useCandidates = RBM_INTRET;
- break;
- }
- }
-
- if (useCandidates != RBM_NONE)
- {
- tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, useCandidates);
- }
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitCall: Set the NodeInfo for a call.
-//
-// Arguments:
-// call - The call node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
-{
- TreeNodeInfo* info = &(call->gtLsraInfo);
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
- bool hasMultiRegRetVal = false;
- ReturnTypeDesc* retTypeDesc = nullptr;
-
- info->srcCount = 0;
- if (call->TypeGet() != TYP_VOID)
- {
- hasMultiRegRetVal = call->HasMultiRegRetVal();
- if (hasMultiRegRetVal)
- {
- // dst count = number of registers in which the value is returned by call
- retTypeDesc = call->GetReturnTypeDesc();
- info->dstCount = retTypeDesc->GetReturnRegCount();
- }
- else
- {
- info->dstCount = 1;
- }
- }
- else
+ if (tree->TypeGet() == TYP_LONG)
{
+ GenTree* op1 = tree->gtGetOp1();
+ noway_assert(op1->OperGet() == GT_LONG);
+ GenTree* loVal = op1->gtGetOp1();
+ GenTree* hiVal = op1->gtGetOp2();
+ info->srcCount = 2;
+ loVal->gtLsraInfo.setSrcCandidates(l, RBM_LNGRET_LO);
+ hiVal->gtLsraInfo.setSrcCandidates(l, RBM_LNGRET_HI);
info->dstCount = 0;
}
-
- GenTree* ctrlExpr = call->gtControlExpr;
- if (call->gtCallType == CT_INDIRECT)
- {
- // either gtControlExpr != null or gtCallAddr != null.
- // Both cannot be non-null at the same time.
- assert(ctrlExpr == nullptr);
- assert(call->gtCallAddr != nullptr);
- ctrlExpr = call->gtCallAddr;
- }
-
- // set reg requirements on call target represented as control sequence.
- if (ctrlExpr != nullptr)
- {
- // we should never see a gtControlExpr whose type is void.
- assert(ctrlExpr->TypeGet() != TYP_VOID);
-
- info->srcCount++;
- // In case of fast tail implemented as jmp, make sure that gtControlExpr is
- // computed into a register.
- if (call->IsFastTailCall())
- {
- NYI_ARM("tail call");
- }
- }
- else
- {
- info->internalIntCount = 1;
- }
-
- RegisterType registerType = call->TypeGet();
-
- // Set destination candidates for return value of the call.
- if (hasMultiRegRetVal)
- {
- assert(retTypeDesc != nullptr);
- info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs());
- }
- else if (varTypeIsFloating(registerType))
- {
- info->setDstCandidates(l, RBM_FLOATRET);
- }
- else if (registerType == TYP_LONG)
- {
- info->setDstCandidates(l, RBM_LNGRET);
- }
else
{
- info->setDstCandidates(l, RBM_INTRET);
- }
-
- // If there is an explicit this pointer, we don't want that node to produce anything
- // as it is redundant
- if (call->gtCallObjp != nullptr)
- {
- GenTreePtr thisPtrNode = call->gtCallObjp;
-
- if (thisPtrNode->gtOper == GT_PUTARG_REG)
- {
- l->clearOperandCounts(thisPtrNode);
- l->clearDstCount(thisPtrNode->gtOp.gtOp1);
- }
- else
- {
- l->clearDstCount(thisPtrNode);
- }
- }
-
- // First, count reg args
- bool callHasFloatRegArgs = false;
-
- for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
- {
- assert(list->OperIsList());
-
- GenTreePtr argNode = list->Current();
-
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
- assert(curArgTabEntry);
-
- if (curArgTabEntry->regNum == REG_STK)
- {
- // late arg that is not passed in a register
- assert(argNode->gtOper == GT_PUTARG_STK);
-
- TreeNodeInfoInitPutArgStk(argNode->AsPutArgStk(), curArgTabEntry);
- continue;
- }
-
- var_types argType = argNode->TypeGet();
- bool argIsFloat = varTypeIsFloating(argType);
- callHasFloatRegArgs |= argIsFloat;
-
- regNumber argReg = curArgTabEntry->regNum;
- // We will setup argMask to the set of all registers that compose this argument
- regMaskTP argMask = 0;
+ GenTree* op1 = tree->gtGetOp1();
+ regMaskTP useCandidates = RBM_NONE;
- argNode = argNode->gtEffectiveVal();
+ info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
+ info->dstCount = 0;
- // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct
- if (varTypeIsStruct(argNode) || (argNode->gtOper == GT_FIELD_LIST))
+ if (varTypeIsStruct(tree))
{
- GenTreePtr actualArgNode = argNode;
- unsigned originalSize = 0;
-
- if (argNode->gtOper == GT_FIELD_LIST)
+ // op1 has to be either an lclvar or a multi-reg returning call
+ if (op1->OperGet() == GT_LCL_VAR)
{
- // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs)
- GenTreeFieldList* fieldListPtr = argNode->AsFieldList();
-
- // Initailize the first register and the first regmask in our list
- regNumber targetReg = argReg;
- regMaskTP targetMask = genRegMask(targetReg);
- unsigned iterationNum = 0;
- originalSize = 0;
+ GenTreeLclVarCommon* lclVarCommon = op1->AsLclVarCommon();
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclVarCommon->gtLclNum]);
+ assert(varDsc->lvIsMultiRegRet);
- for (; fieldListPtr; fieldListPtr = fieldListPtr->Rest())
+ // Mark var as contained if not enregistrable.
+ if (!varTypeIsEnregisterableStruct(op1))
{
- GenTreePtr putArgRegNode = fieldListPtr->Current();
- assert(putArgRegNode->gtOper == GT_PUTARG_REG);
- GenTreePtr putArgChild = putArgRegNode->gtOp.gtOp1;
-
- originalSize += REGSIZE_BYTES; // 8 bytes
-
- // Record the register requirements for the GT_PUTARG_REG node
- putArgRegNode->gtLsraInfo.setDstCandidates(l, targetMask);
- putArgRegNode->gtLsraInfo.setSrcCandidates(l, targetMask);
-
- // To avoid redundant moves, request that the argument child tree be
- // computed in the register in which the argument is passed to the call.
- putArgChild->gtLsraInfo.setSrcCandidates(l, targetMask);
-
- // We consume one source for each item in this list
- info->srcCount++;
- iterationNum++;
-
- // Update targetReg and targetMask for the next putarg_reg (if any)
- targetReg = genRegArgNext(targetReg);
- targetMask = genRegMask(targetReg);
+ MakeSrcContained(tree, op1);
}
}
else
{
-#ifdef DEBUG
- compiler->gtDispTreeRange(BlockRange(), argNode);
-#endif
- noway_assert(!"Unsupported TYP_STRUCT arg kind");
- }
-
- unsigned slots = ((unsigned)(roundUp(originalSize, REGSIZE_BYTES))) / REGSIZE_BYTES;
- regNumber curReg = argReg;
- regNumber lastReg = argIsFloat ? REG_ARG_FP_LAST : REG_ARG_LAST;
- unsigned remainingSlots = slots;
-
- while (remainingSlots > 0)
- {
- argMask |= genRegMask(curReg);
- remainingSlots--;
-
- if (curReg == lastReg)
- break;
+ noway_assert(op1->IsMultiRegCall());
- curReg = genRegArgNext(curReg);
+ ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc();
+ info->srcCount = retTypeDesc->GetReturnRegCount();
+ useCandidates = retTypeDesc->GetABIReturnRegs();
}
-
- // Struct typed arguments must be fully passed in registers (Reg/Stk split not allowed)
- noway_assert(remainingSlots == 0);
- argNode->gtLsraInfo.internalIntCount = 0;
}
- else // A scalar argument (not a struct)
+ else
{
- // We consume one source
- info->srcCount++;
-
- argMask |= genRegMask(argReg);
- argNode->gtLsraInfo.setDstCandidates(l, argMask);
- argNode->gtLsraInfo.setSrcCandidates(l, argMask);
-
- if (argNode->gtOper == GT_PUTARG_REG)
+ // Non-struct type return - determine useCandidates
+ switch (tree->TypeGet())
{
- GenTreePtr putArgChild = argNode->gtOp.gtOp1;
-
- // To avoid redundant moves, request that the argument child tree be
- // computed in the register in which the argument is passed to the call.
- putArgChild->gtLsraInfo.setSrcCandidates(l, argMask);
+ case TYP_VOID:
+ useCandidates = RBM_NONE;
+ break;
+ case TYP_FLOAT:
+ useCandidates = RBM_FLOATRET;
+ break;
+ case TYP_DOUBLE:
+ useCandidates = RBM_DOUBLERET;
+ break;
+ case TYP_LONG:
+ useCandidates = RBM_LNGRET;
+ break;
+ default:
+ useCandidates = RBM_INTRET;
+ break;
}
}
- }
- // Now, count stack args
- // Note that these need to be computed into a register, but then
- // they're just stored to the stack - so the reg doesn't
- // need to remain live until the call. In fact, it must not
- // because the code generator doesn't actually consider it live,
- // so it can't be spilled.
-
- GenTreePtr args = call->gtCallArgs;
- while (args)
- {
- GenTreePtr arg = args->gtOp.gtOp1;
-
- // Skip arguments that have been moved to the Late Arg list
- if (!(args->gtFlags & GTF_LATE_ARG))
+ if (useCandidates != RBM_NONE)
{
- if (arg->gtOper == GT_PUTARG_STK)
- {
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
- assert(curArgTabEntry);
-
- assert(curArgTabEntry->regNum == REG_STK);
-
- TreeNodeInfoInitPutArgStk(arg->AsPutArgStk(), curArgTabEntry);
- }
- else
- {
- TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
- if (argInfo->dstCount != 0)
- {
- argInfo->isLocalDefUse = true;
- }
-
- argInfo->dstCount = 0;
- }
+ tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, useCandidates);
}
- args = args->gtOp.gtOp2;
- }
-
- if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr))
- {
- NYI_ARM("float reg varargs");
}
}
-//------------------------------------------------------------------------
-// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node
-//
-// Arguments:
-// argNode - a GT_PUTARG_STK node
-//
-// Return Value:
-// None.
-//
-// Notes:
-// Set the child node(s) to be contained when we have a multireg arg
-//
-void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info)
+void Lowering::TreeNodeInfoInitLclHeap(GenTree* tree)
{
- assert(argNode->gtOper == GT_PUTARG_STK);
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
- GenTreePtr putArgChild = argNode->gtOp.gtOp1;
+ info->srcCount = 1;
+ info->dstCount = 1;
- // Initialize 'argNode' as not contained, as this is both the default case
- // and how MakeSrcContained expects to find things setup.
+ // Need a variable number of temp regs (see genLclHeap() in codegenarm.cpp):
+ // Here '-' means don't care.
//
- argNode->gtLsraInfo.srcCount = 1;
- argNode->gtLsraInfo.dstCount = 0;
+ // Size? Init Memory? # temp regs
+ // 0 - 0
+ // const and <=4 ptr words - hasPspSym ? 1 : 0
+ // const and <PageSize No hasPspSym ? 1 : 0
+ // >4 ptr words Yes hasPspSym ? 2 : 1
+ // Non-const Yes hasPspSym ? 2 : 1
+ // Non-const No hasPspSym ? 2 : 1
+
+ bool hasPspSym;
+#if FEATURE_EH_FUNCLETS
+ hasPspSym = (compiler->lvaPSPSym != BAD_VAR_NUM);
+#else
+ hasPspSym = false;
+#endif
- // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct
- if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_FIELD_LIST))
+ GenTreePtr size = tree->gtOp.gtOp1;
+ if (size->IsCnsIntOrI())
{
- // We will use store instructions that each write a register sized value
+ MakeSrcContained(tree, size);
- if (putArgChild->OperGet() == GT_FIELD_LIST)
+ size_t sizeVal = size->gtIntCon.gtIconVal;
+ if (sizeVal == 0)
{
- // We consume all of the items in the GT_FIELD_LIST
- argNode->gtLsraInfo.srcCount = info->numSlots;
+ info->internalIntCount = 0;
}
else
{
- // We could use a ldp/stp sequence so we need two internal registers
- argNode->gtLsraInfo.internalIntCount = 2;
+ sizeVal = AlignUp(sizeVal, STACK_ALIGN);
+ size_t cntStackAlignedWidthItems = (sizeVal >> STACK_ALIGN_SHIFT);
- if (putArgChild->OperGet() == GT_OBJ)
+ // For small allocations up to 4 store instructions
+ if (cntStackAlignedWidthItems <= 4)
+ {
+ info->internalIntCount = 0;
+ }
+ else if (!compiler->info.compInitMem)
{
- GenTreePtr objChild = putArgChild->gtOp.gtOp1;
- if (objChild->OperGet() == GT_LCL_VAR_ADDR)
+ // No need to initialize allocated stack space.
+ if (sizeVal < compiler->eeGetPageSize())
{
- // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR
- // as one contained operation
- //
- MakeSrcContained(putArgChild, objChild);
+ info->internalIntCount = 0;
}
+ else
+ {
+ // target (regCnt) + tmp + [psp]
+ info->internalIntCount = 1;
+ info->isInternalRegDelayFree = true;
+ }
+ }
+ else
+ {
+ // target (regCnt) + tmp + [psp]
+ info->internalIntCount = 1;
+ info->isInternalRegDelayFree = true;
}
- // We will generate all of the code for the GT_PUTARG_STK and it's child node
- // as one contained operation
- //
- MakeSrcContained(argNode, putArgChild);
+ if (hasPspSym)
+ {
+ info->internalIntCount++;
+ }
}
}
else
{
- // We must not have a multi-reg struct
- assert(info->numSlots == 1);
+ // target (regCnt) + tmp + [psp]
+ info->internalIntCount = hasPspSym ? 2 : 1;
+ info->isInternalRegDelayFree = true;
}
}
@@ -689,6 +227,8 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
JITDUMP("TreeNodeInfoInit for: ");
DISPNODE(tree);
+ NYI_IF(tree->TypeGet() == TYP_DOUBLE, "lowering double");
+
switch (tree->OperGet())
{
GenTree* op1;
@@ -696,7 +236,14 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_STORE_LCL_FLD:
case GT_STORE_LCL_VAR:
- info->srcCount = 1;
+ if (tree->gtGetOp1()->OperGet() == GT_LONG)
+ {
+ info->srcCount = 2;
+ }
+ else
+ {
+ info->srcCount = 1;
+ }
info->dstCount = 0;
LowerStoreLoc(tree->AsLclVarCommon());
TreeNodeInfoInitStoreLoc(tree->AsLclVarCommon());
@@ -767,9 +314,33 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
}
#endif // DEBUG
- if (tree->gtOverflow())
+ if (varTypeIsLong(castOpType))
{
- NYI_ARM("overflow checks");
+ noway_assert(castOp->OperGet() == GT_LONG);
+ info->srcCount = 2;
+ }
+
+ CastInfo castInfo;
+
+ // Get information about the cast.
+ getCastDescription(tree, &castInfo);
+
+ if (castInfo.requiresOverflowCheck)
+ {
+ var_types srcType = castOp->TypeGet();
+ emitAttr cmpSize = EA_ATTR(genTypeSize(srcType));
+
+ // If we cannot store the comparisons in an immediate for either
+ // comparing against the max or min value, then we will need to
+ // reserve a temporary register.
+
+ bool canStoreMaxValue = emitter::emitIns_valid_imm_for_cmp(castInfo.typeMax, INS_FLAGS_DONT_CARE);
+ bool canStoreMinValue = emitter::emitIns_valid_imm_for_cmp(castInfo.typeMin, INS_FLAGS_DONT_CARE);
+
+ if (!canStoreMaxValue || !canStoreMinValue)
+ {
+ info->internalIntCount = 1;
+ }
}
}
break;
@@ -799,9 +370,8 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
break;
case GT_SWITCH_TABLE:
- info->srcCount = 2;
- info->internalIntCount = 1;
- info->dstCount = 0;
+ info->srcCount = 2;
+ info->dstCount = 0;
break;
case GT_ASG:
@@ -812,6 +382,10 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
info->dstCount = 0;
break;
+ case GT_ADD_LO:
+ case GT_ADD_HI:
+ case GT_SUB_LO:
+ case GT_SUB_HI:
case GT_ADD:
case GT_SUB:
if (varTypeIsFloating(tree->TypeGet()))
@@ -840,6 +414,13 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
break;
+ case GT_RETURNTRAP:
+ // this just turns into a compare of its child with an int
+ // + a conditional call
+ info->srcCount = 1;
+ info->dstCount = 0;
+ break;
+
case GT_MUL:
if (tree->gtOverflow())
{
@@ -867,6 +448,21 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
info->dstCount = 0;
break;
+ case GT_LONG:
+ if ((tree->gtLIRFlags & LIR::Flags::IsUnusedValue) != 0)
+ {
+ // An unused GT_LONG node needs to consume its sources.
+ info->srcCount = 2;
+ }
+ else
+ {
+ // Passthrough
+ info->srcCount = 0;
+ }
+
+ info->dstCount = 0;
+ break;
+
case GT_CNS_DBL:
info->srcCount = 0;
info->dstCount = 1;
@@ -907,6 +503,54 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
}
break;
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ {
+ // Consumes arrLen & index - has no result
+ info->srcCount = 2;
+ info->dstCount = 0;
+ }
+ break;
+
+ case GT_ARR_ELEM:
+ // These must have been lowered to GT_ARR_INDEX
+ noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_ARR_INDEX:
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+ // We need one internal register when generating code for GT_ARR_INDEX, however the
+ // register allocator always may just give us the same one as it gives us for the 'dst'
+ // as a workaround we will just ask for two internal registers.
+ //
+ info->internalIntCount = 2;
+
+ // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
+ // times while the result is being computed.
+ tree->AsArrIndex()->ArrObj()->gtLsraInfo.isDelayFree = true;
+ info->hasDelayFreeSrc = true;
+ break;
+
+ case GT_ARR_OFFSET:
+ // This consumes the offset, if any, the arrObj and the effective index,
+ // and produces the flattened offset for this dimension.
+ info->srcCount = 3;
+ info->dstCount = 1;
+ info->internalIntCount = 1;
+
+ // we don't want to generate code for this
+ if (tree->gtArrOffs.gtOffset->IsIntegralConst(0))
+ {
+ MakeSrcContained(tree, tree->gtArrOffs.gtOffset);
+ }
+ break;
+
case GT_LEA:
{
GenTreeAddrMode* lea = tree->AsAddrMode();
@@ -928,13 +572,17 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
}
info->dstCount = 1;
+ // On ARM we may need a single internal register
+ // (when both conditions are true then we still only need a single internal register)
if ((index != nullptr) && (cns != 0))
{
- NYI_ARM("GT_LEA: index and cns are not nil");
+ // ARM does not support both Index and offset so we need an internal register
+ info->internalIntCount = 1;
}
else if (!emitter::emitIns_valid_imm_for_add(cns, INS_FLAGS_DONT_CARE))
{
- NYI_ARM("GT_LEA: invalid imm");
+ // This offset can't be contained in the add instruction, so we need an internal register
+ info->internalIntCount = 1;
}
}
break;
@@ -953,19 +601,10 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_RSH:
case GT_RSZ:
case GT_ROR:
- {
- info->srcCount = 2;
- info->dstCount = 1;
-
- GenTreePtr shiftBy = tree->gtOp.gtOp2;
- GenTreePtr source = tree->gtOp.gtOp1;
- if (shiftBy->IsCnsIntOrI())
- {
- l->clearDstCount(shiftBy);
- info->srcCount--;
- }
- }
- break;
+ case GT_LSH_HI:
+ case GT_RSH_LO:
+ TreeNodeInfoInitShiftRotate(tree);
+ break;
case GT_EQ:
case GT_NE:
@@ -980,6 +619,17 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
TreeNodeInfoInitCall(tree->AsCall());
break;
+ case GT_STORE_BLK:
+ case GT_STORE_OBJ:
+ case GT_STORE_DYN_BLK:
+ LowerBlockStore(tree->AsBlk());
+ TreeNodeInfoInitBlockStore(tree->AsBlk());
+ break;
+
+ case GT_LCLHEAP:
+ TreeNodeInfoInitLclHeap(tree);
+ break;
+
case GT_STOREIND:
{
info->srcCount = 2;
@@ -1030,17 +680,27 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
default:
#ifdef DEBUG
- JitTls::GetCompiler()->gtDispTree(tree);
-#endif
+ char message[256];
+ _snprintf_s(message, _countof(message), _TRUNCATE, "NYI: Unimplemented node type %s",
+ GenTree::NodeName(tree->OperGet()));
+ NYIRAW(message);
+#else
NYI_ARM("TreeNodeInfoInit default case");
+#endif
case GT_LCL_FLD:
+ case GT_LCL_FLD_ADDR:
case GT_LCL_VAR:
case GT_LCL_VAR_ADDR:
+ case GT_PHYSREG:
case GT_CLS_VAR_ADDR:
case GT_IL_OFFSET:
case GT_CNS_INT:
case GT_PUTARG_REG:
case GT_PUTARG_STK:
+ case GT_LABEL:
+ case GT_PINVOKE_PROLOG:
+ case GT_JCC:
+ case GT_MEMORYBARRIER:
info->dstCount = tree->IsValue() ? 1 : 0;
if (kind & (GTK_CONST | GTK_LEAF))
{
diff --git a/src/jit/lsraarm64.cpp b/src/jit/lsraarm64.cpp
index 0db30e1811..3b2d465495 100644
--- a/src/jit/lsraarm64.cpp
+++ b/src/jit/lsraarm64.cpp
@@ -29,43 +29,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "lower.h"
//------------------------------------------------------------------------
-// TreeNodeInfoInitStoreLoc: Set register requirements for a store of a lclVar
-//
-// Arguments:
-// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR)
-//
-// Notes:
-// This involves:
-// - Setting the appropriate candidates for a store of a multi-reg call return value.
-// - Handling of contained immediates.
-
-void Lowering::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc)
-{
- TreeNodeInfo* info = &(storeLoc->gtLsraInfo);
-
- // Is this the case of var = call where call is returning
- // a value in multiple return registers?
- GenTree* op1 = storeLoc->gtGetOp1();
- if (op1->IsMultiRegCall())
- {
- // backend expects to see this case only for store lclvar.
- assert(storeLoc->OperGet() == GT_STORE_LCL_VAR);
-
- // srcCount = number of registers in which the value is returned by call
- GenTreeCall* call = op1->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- info->srcCount = retTypeDesc->GetReturnRegCount();
-
- // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1
- regMaskTP srcCandidates = m_lsra->allMultiRegCallNodeRegs(call);
- op1->gtLsraInfo.setSrcCandidates(m_lsra, srcCandidates);
- return;
- }
-
- CheckImmedAndMakeContained(storeLoc, op1);
-}
-
-//------------------------------------------------------------------------
// TreeNodeInfoInit: Set the register requirements for RA.
//
// Notes:
@@ -435,19 +398,8 @@ void Lowering::TreeNodeInfoInit(GenTree* tree)
case GT_RSH:
case GT_RSZ:
case GT_ROR:
- {
- info->srcCount = 2;
- info->dstCount = 1;
-
- GenTreePtr shiftBy = tree->gtOp.gtOp2;
- GenTreePtr source = tree->gtOp.gtOp1;
- if (shiftBy->IsCnsIntOrI())
- {
- l->clearDstCount(shiftBy);
- info->srcCount--;
- }
- }
- break;
+ TreeNodeInfoInitShiftRotate(tree);
+ break;
case GT_EQ:
case GT_NE:
@@ -847,502 +799,6 @@ void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
}
}
-//------------------------------------------------------------------------
-// TreeNodeInfoInitCall: Set the NodeInfo for a call.
-//
-// Arguments:
-// call - The call node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
-{
- TreeNodeInfo* info = &(call->gtLsraInfo);
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
- bool hasMultiRegRetVal = false;
- ReturnTypeDesc* retTypeDesc = nullptr;
-
- info->srcCount = 0;
- if (call->TypeGet() != TYP_VOID)
- {
- hasMultiRegRetVal = call->HasMultiRegRetVal();
- if (hasMultiRegRetVal)
- {
- // dst count = number of registers in which the value is returned by call
- retTypeDesc = call->GetReturnTypeDesc();
- info->dstCount = retTypeDesc->GetReturnRegCount();
- }
- else
- {
- info->dstCount = 1;
- }
- }
- else
- {
- info->dstCount = 0;
- }
-
- GenTree* ctrlExpr = call->gtControlExpr;
- if (call->gtCallType == CT_INDIRECT)
- {
- // either gtControlExpr != null or gtCallAddr != null.
- // Both cannot be non-null at the same time.
- assert(ctrlExpr == nullptr);
- assert(call->gtCallAddr != nullptr);
- ctrlExpr = call->gtCallAddr;
- }
-
- // set reg requirements on call target represented as control sequence.
- if (ctrlExpr != nullptr)
- {
- // we should never see a gtControlExpr whose type is void.
- assert(ctrlExpr->TypeGet() != TYP_VOID);
-
- info->srcCount++;
-
- // In case of fast tail implemented as jmp, make sure that gtControlExpr is
- // computed into a register.
- if (call->IsFastTailCall())
- {
- // Fast tail call - make sure that call target is always computed in IP0
- // so that epilog sequence can generate "br xip0" to achieve fast tail call.
- ctrlExpr->gtLsraInfo.setSrcCandidates(l, genRegMask(REG_IP0));
- }
- }
-
- RegisterType registerType = call->TypeGet();
-
- // Set destination candidates for return value of the call.
- if (hasMultiRegRetVal)
- {
- assert(retTypeDesc != nullptr);
- info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs());
- }
- else if (varTypeIsFloating(registerType))
- {
- info->setDstCandidates(l, RBM_FLOATRET);
- }
- else if (registerType == TYP_LONG)
- {
- info->setDstCandidates(l, RBM_LNGRET);
- }
- else
- {
- info->setDstCandidates(l, RBM_INTRET);
- }
-
- // If there is an explicit this pointer, we don't want that node to produce anything
- // as it is redundant
- if (call->gtCallObjp != nullptr)
- {
- GenTreePtr thisPtrNode = call->gtCallObjp;
-
- if (thisPtrNode->gtOper == GT_PUTARG_REG)
- {
- l->clearOperandCounts(thisPtrNode);
- l->clearDstCount(thisPtrNode->gtOp.gtOp1);
- }
- else
- {
- l->clearDstCount(thisPtrNode);
- }
- }
-
- // First, count reg args
- bool callHasFloatRegArgs = false;
-
- for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
- {
- assert(list->OperIsList());
-
- GenTreePtr argNode = list->Current();
-
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
- assert(curArgTabEntry);
-
- if (curArgTabEntry->regNum == REG_STK)
- {
- // late arg that is not passed in a register
- assert(argNode->gtOper == GT_PUTARG_STK);
-
- TreeNodeInfoInitPutArgStk(argNode->AsPutArgStk(), curArgTabEntry);
- continue;
- }
-
- var_types argType = argNode->TypeGet();
- bool argIsFloat = varTypeIsFloating(argType);
- callHasFloatRegArgs |= argIsFloat;
-
- regNumber argReg = curArgTabEntry->regNum;
- // We will setup argMask to the set of all registers that compose this argument
- regMaskTP argMask = 0;
-
- argNode = argNode->gtEffectiveVal();
-
- // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct
- if (varTypeIsStruct(argNode) || (argNode->gtOper == GT_FIELD_LIST))
- {
- GenTreePtr actualArgNode = argNode;
- unsigned originalSize = 0;
-
- if (argNode->gtOper == GT_FIELD_LIST)
- {
- // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs)
- GenTreeFieldList* fieldListPtr = argNode->AsFieldList();
-
- // Initailize the first register and the first regmask in our list
- regNumber targetReg = argReg;
- regMaskTP targetMask = genRegMask(targetReg);
- unsigned iterationNum = 0;
- originalSize = 0;
-
- for (; fieldListPtr; fieldListPtr = fieldListPtr->Rest())
- {
- GenTreePtr putArgRegNode = fieldListPtr->Current();
- assert(putArgRegNode->gtOper == GT_PUTARG_REG);
- GenTreePtr putArgChild = putArgRegNode->gtOp.gtOp1;
-
- originalSize += REGSIZE_BYTES; // 8 bytes
-
- // Record the register requirements for the GT_PUTARG_REG node
- putArgRegNode->gtLsraInfo.setDstCandidates(l, targetMask);
- putArgRegNode->gtLsraInfo.setSrcCandidates(l, targetMask);
-
- // To avoid redundant moves, request that the argument child tree be
- // computed in the register in which the argument is passed to the call.
- putArgChild->gtLsraInfo.setSrcCandidates(l, targetMask);
-
- // We consume one source for each item in this list
- info->srcCount++;
- iterationNum++;
-
- // Update targetReg and targetMask for the next putarg_reg (if any)
- targetReg = genRegArgNext(targetReg);
- targetMask = genRegMask(targetReg);
- }
- }
- else
- {
-#ifdef DEBUG
- compiler->gtDispTreeRange(BlockRange(), argNode);
-#endif
- noway_assert(!"Unsupported TYP_STRUCT arg kind");
- }
-
- unsigned slots = ((unsigned)(roundUp(originalSize, REGSIZE_BYTES))) / REGSIZE_BYTES;
- regNumber curReg = argReg;
- regNumber lastReg = argIsFloat ? REG_ARG_FP_LAST : REG_ARG_LAST;
- unsigned remainingSlots = slots;
-
- while (remainingSlots > 0)
- {
- argMask |= genRegMask(curReg);
- remainingSlots--;
-
- if (curReg == lastReg)
- break;
-
- curReg = genRegArgNext(curReg);
- }
-
- // Struct typed arguments must be fully passed in registers (Reg/Stk split not allowed)
- noway_assert(remainingSlots == 0);
- argNode->gtLsraInfo.internalIntCount = 0;
- }
- else // A scalar argument (not a struct)
- {
- // We consume one source
- info->srcCount++;
-
- argMask |= genRegMask(argReg);
- argNode->gtLsraInfo.setDstCandidates(l, argMask);
- argNode->gtLsraInfo.setSrcCandidates(l, argMask);
-
- if (argNode->gtOper == GT_PUTARG_REG)
- {
- GenTreePtr putArgChild = argNode->gtOp.gtOp1;
-
- // To avoid redundant moves, request that the argument child tree be
- // computed in the register in which the argument is passed to the call.
- putArgChild->gtLsraInfo.setSrcCandidates(l, argMask);
- }
- }
- }
-
- // Now, count stack args
- // Note that these need to be computed into a register, but then
- // they're just stored to the stack - so the reg doesn't
- // need to remain live until the call. In fact, it must not
- // because the code generator doesn't actually consider it live,
- // so it can't be spilled.
-
- GenTreePtr args = call->gtCallArgs;
- while (args)
- {
- GenTreePtr arg = args->gtOp.gtOp1;
-
- // Skip arguments that have been moved to the Late Arg list
- if (!(args->gtFlags & GTF_LATE_ARG))
- {
- if (arg->gtOper == GT_PUTARG_STK)
- {
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
- assert(curArgTabEntry);
-
- assert(curArgTabEntry->regNum == REG_STK);
-
- TreeNodeInfoInitPutArgStk(arg->AsPutArgStk(), curArgTabEntry);
- }
- else
- {
- TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
- if (argInfo->dstCount != 0)
- {
- argInfo->isLocalDefUse = true;
- }
-
- argInfo->dstCount = 0;
- }
- }
- args = args->gtOp.gtOp2;
- }
-
- // If it is a fast tail call, it is already preferenced to use IP0.
- // Therefore, no need set src candidates on call tgt again.
- if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr))
- {
- // Don't assign the call target to any of the argument registers because
- // we will use them to also pass floating point arguments as required
- // by Arm64 ABI.
- ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS));
- }
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node
-//
-// Arguments:
-// argNode - a GT_PUTARG_STK node
-//
-// Return Value:
-// None.
-//
-// Notes:
-// Set the child node(s) to be contained when we have a multireg arg
-//
-void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info)
-{
- assert(argNode->gtOper == GT_PUTARG_STK);
-
- GenTreePtr putArgChild = argNode->gtOp.gtOp1;
-
- // Initialize 'argNode' as not contained, as this is both the default case
- // and how MakeSrcContained expects to find things setup.
- //
- argNode->gtLsraInfo.srcCount = 1;
- argNode->gtLsraInfo.dstCount = 0;
-
- // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct
- if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_FIELD_LIST))
- {
- // We will use store instructions that each write a register sized value
-
- if (putArgChild->OperGet() == GT_FIELD_LIST)
- {
- // We consume all of the items in the GT_FIELD_LIST
- argNode->gtLsraInfo.srcCount = info->numSlots;
- }
- else
- {
- // We could use a ldp/stp sequence so we need two internal registers
- argNode->gtLsraInfo.internalIntCount = 2;
-
- if (putArgChild->OperGet() == GT_OBJ)
- {
- GenTreePtr objChild = putArgChild->gtOp.gtOp1;
- if (objChild->OperGet() == GT_LCL_VAR_ADDR)
- {
- // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR
- // as one contained operation
- //
- MakeSrcContained(putArgChild, objChild);
- }
- }
-
- // We will generate all of the code for the GT_PUTARG_STK and it's child node
- // as one contained operation
- //
- MakeSrcContained(argNode, putArgChild);
- }
- }
- else
- {
- // We must not have a multi-reg struct
- assert(info->numSlots == 1);
- }
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store.
-//
-// Arguments:
-// blkNode - The block store node of interest
-//
-// Return Value:
-// None.
-//
-// Notes:
-
-void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
-{
- GenTree* dstAddr = blkNode->Addr();
- unsigned size = blkNode->gtBlkSize;
- GenTree* source = blkNode->Data();
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
-
- // Sources are dest address and initVal or source.
- // We may require an additional source or temp register for the size.
- blkNode->gtLsraInfo.srcCount = 2;
- blkNode->gtLsraInfo.dstCount = 0;
- GenTreePtr srcAddrOrFill = nullptr;
- bool isInitBlk = blkNode->OperIsInitBlkOp();
-
- if (!isInitBlk)
- {
- // CopyObj or CopyBlk
- if (source->gtOper == GT_IND)
- {
- srcAddrOrFill = blkNode->Data()->gtGetOp1();
- // We're effectively setting source as contained, but can't call MakeSrcContained, because the
- // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading.
- // If srcAddr is already non-contained, we don't need to change it.
- if (srcAddrOrFill->gtLsraInfo.getDstCount() == 0)
- {
- srcAddrOrFill->gtLsraInfo.setDstCount(1);
- srcAddrOrFill->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount);
- }
- m_lsra->clearOperandCounts(source);
- }
- else if (!source->IsMultiRegCall() && !source->OperIsSIMD())
- {
- assert(source->IsLocal());
- MakeSrcContained(blkNode, source);
- }
- }
-
- if (isInitBlk)
- {
- GenTreePtr initVal = source;
- if (initVal->OperIsInitVal())
- {
- initVal = initVal->gtGetOp1();
- }
- srcAddrOrFill = initVal;
-
-#if 0
- if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll)
- {
- // TODO-ARM64-CQ: Currently we generate a helper call for every
- // initblk we encounter. Later on we should implement loop unrolling
- // code sequences to improve CQ.
- // For reference see the code in lsraxarch.cpp.
- }
- else
-#endif // 0
- {
- assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper);
- // The helper follows the regular ABI.
- dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0);
- initVal->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1);
- if (size != 0)
- {
- // Reserve a temp register for the block size argument.
- blkNode->gtLsraInfo.setInternalCandidates(l, RBM_ARG_2);
- blkNode->gtLsraInfo.internalIntCount = 1;
- }
- else
- {
- // The block size argument is a third argument to GT_STORE_DYN_BLK
- noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
- blkNode->gtLsraInfo.setSrcCount(3);
- GenTree* sizeNode = blkNode->AsDynBlk()->gtDynamicSize;
- sizeNode->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2);
- }
- }
- }
- else
- {
- // CopyObj or CopyBlk
- // Sources are src and dest and size if not constant.
-
- if (blkNode->OperGet() == GT_STORE_OBJ)
- {
- // CopyObj
-
- // We don't need to materialize the struct size but we still need
- // a temporary register to perform the sequence of loads and stores.
- blkNode->gtLsraInfo.internalIntCount = 1;
-
- dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_DST_BYREF);
- // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF.
- // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF,
- // which is killed by a StoreObj (and thus needn't be reserved).
- if (srcAddrOrFill != nullptr)
- {
- srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_SRC_BYREF);
- }
- }
- else
- {
- // CopyBlk
- short internalIntCount = 0;
- regMaskTP internalIntCandidates = RBM_NONE;
-
-#if 0
- if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll)
- {
- // TODO-ARM64-CQ: cpblk loop unrolling is currently not implemented.
- // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size
- // we should unroll the loop to improve CQ.
- // For reference see the code in lsraxarch.cpp.
- }
- else
-#endif // 0
- {
- assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper);
- dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0);
- // The srcAddr goes in arg1.
- if (srcAddrOrFill != nullptr)
- {
- srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1);
- }
- if (size != 0)
- {
- // Reserve a temp register for the block size argument.
- internalIntCandidates |= RBM_ARG_2;
- internalIntCount++;
- }
- else
- {
- // The block size argument is a third argument to GT_STORE_DYN_BLK
- noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
- blkNode->gtLsraInfo.setSrcCount(3);
- GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
- blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2);
- }
- }
- if (internalIntCount != 0)
- {
- blkNode->gtLsraInfo.internalIntCount = internalIntCount;
- blkNode->gtLsraInfo.setInternalCandidates(l, internalIntCandidates);
- }
- }
- }
-}
-
#ifdef FEATURE_SIMD
//------------------------------------------------------------------------
// TreeNodeInfoInitSIMD: Set the NodeInfo for a GT_SIMD tree.
@@ -1544,223 +1000,6 @@ void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
}
#endif // FEATURE_SIMD
-void Lowering::TreeNodeInfoInitGCWriteBarrier(GenTree* tree)
-{
- GenTreePtr dst = tree;
- GenTreePtr addr = tree->gtOp.gtOp1;
- GenTreePtr src = tree->gtOp.gtOp2;
-
- if (addr->OperGet() == GT_LEA)
- {
- // In the case where we are doing a helper assignment, if the dst
- // is an indir through an lea, we need to actually instantiate the
- // lea in a register
- GenTreeAddrMode* lea = addr->AsAddrMode();
-
- short leaSrcCount = 0;
- if (lea->Base() != nullptr)
- {
- leaSrcCount++;
- }
- if (lea->Index() != nullptr)
- {
- leaSrcCount++;
- }
- lea->gtLsraInfo.srcCount = leaSrcCount;
- lea->gtLsraInfo.dstCount = 1;
- }
-
-#if NOGC_WRITE_BARRIERS
- // For the NOGC JIT Helper calls
- //
- // the 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF)
- // the 'src' goes into x15 (REG_WRITE_BARRIER)
- //
- addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER_DST_BYREF);
- src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER);
-#else
- // For the standard JIT Helper calls
- // op1 goes into REG_ARG_0 and
- // op2 goes into REG_ARG_1
- //
- addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_0);
- src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_1);
-#endif // NOGC_WRITE_BARRIERS
-
- // Both src and dst must reside in a register, which they should since we haven't set
- // either of them as contained.
- assert(addr->gtLsraInfo.dstCount == 1);
- assert(src->gtLsraInfo.dstCount == 1);
-}
-
-//-----------------------------------------------------------------------------------------
-// TreeNodeInfoInitIndir: Specify register requirements for address expression of an indirection operation.
-//
-// Arguments:
-// indirTree - GT_IND or GT_STOREIND gentree node
-//
-void Lowering::TreeNodeInfoInitIndir(GenTreePtr indirTree)
-{
- assert(indirTree->OperIsIndir());
- // If this is the rhs of a block copy (i.e. non-enregisterable struct),
- // it has no register requirements.
- if (indirTree->TypeGet() == TYP_STRUCT)
- {
- return;
- }
-
- GenTreePtr addr = indirTree->gtGetOp1();
- TreeNodeInfo* info = &(indirTree->gtLsraInfo);
-
- GenTreePtr base = nullptr;
- GenTreePtr index = nullptr;
- unsigned cns = 0;
- unsigned mul;
- bool rev;
- bool modifiedSources = false;
-
- if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr))
- {
- GenTreeAddrMode* lea = addr->AsAddrMode();
- base = lea->Base();
- index = lea->Index();
- cns = lea->gtOffset;
-
- m_lsra->clearOperandCounts(addr);
- // The srcCount is decremented because addr is now "contained",
- // then we account for the base and index below, if they are non-null.
- info->srcCount--;
- }
- else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) &&
- !(modifiedSources = AreSourcesPossiblyModifiedLocals(indirTree, base, index)))
- {
- // An addressing mode will be constructed that may cause some
- // nodes to not need a register, and cause others' lifetimes to be extended
- // to the GT_IND or even its parent if it's an assignment
-
- assert(base != addr);
- m_lsra->clearOperandCounts(addr);
-
- GenTreePtr arrLength = nullptr;
-
- // Traverse the computation below GT_IND to find the operands
- // for the addressing mode, marking the various constants and
- // intermediate results as not consuming/producing.
- // If the traversal were more complex, we might consider using
- // a traversal function, but the addressing mode is only made
- // up of simple arithmetic operators, and the code generator
- // only traverses one leg of each node.
-
- bool foundBase = (base == nullptr);
- bool foundIndex = (index == nullptr);
- GenTreePtr nextChild = nullptr;
- for (GenTreePtr child = addr; child != nullptr && !child->OperIsLeaf(); child = nextChild)
- {
- nextChild = nullptr;
- GenTreePtr op1 = child->gtOp.gtOp1;
- GenTreePtr op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr;
-
- if (op1 == base)
- {
- foundBase = true;
- }
- else if (op1 == index)
- {
- foundIndex = true;
- }
- else
- {
- m_lsra->clearOperandCounts(op1);
- if (!op1->OperIsLeaf())
- {
- nextChild = op1;
- }
- }
-
- if (op2 != nullptr)
- {
- if (op2 == base)
- {
- foundBase = true;
- }
- else if (op2 == index)
- {
- foundIndex = true;
- }
- else
- {
- m_lsra->clearOperandCounts(op2);
- if (!op2->OperIsLeaf())
- {
- assert(nextChild == nullptr);
- nextChild = op2;
- }
- }
- }
- }
- assert(foundBase && foundIndex);
- info->srcCount--; // it gets incremented below.
- }
- else if (addr->gtOper == GT_ARR_ELEM)
- {
- // The GT_ARR_ELEM consumes all the indices and produces the offset.
- // The array object lives until the mem access.
- // We also consume the target register to which the address is
- // computed
-
- info->srcCount++;
- assert(addr->gtLsraInfo.srcCount >= 2);
- addr->gtLsraInfo.srcCount -= 1;
- }
- else
- {
- // it is nothing but a plain indir
- info->srcCount--; // base gets added in below
- base = addr;
- }
-
- if (base != nullptr)
- {
- info->srcCount++;
- }
-
- if (index != nullptr && !modifiedSources)
- {
- info->srcCount++;
- }
-
- // On ARM64 we may need a single internal register
- // (when both conditions are true then we still only need a single internal register)
- if ((index != nullptr) && (cns != 0))
- {
- // ARM64 does not support both Index and offset so we need an internal register
- info->internalIntCount = 1;
- }
- else if (!emitter::emitIns_valid_imm_for_ldst_offset(cns, emitTypeSize(indirTree)))
- {
- // This offset can't be contained in the ldr/str instruction, so we need an internal register
- info->internalIntCount = 1;
- }
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitCmp: Set the register requirements for a compare.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
- info->srcCount = 2;
- info->dstCount = 1;
- CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
-}
-
#endif // _TARGET_ARM64_
#endif // !LEGACY_BACKEND
diff --git a/src/jit/lsraarmarch.cpp b/src/jit/lsraarmarch.cpp
new file mode 100644
index 0000000000..7d999d880f
--- /dev/null
+++ b/src/jit/lsraarmarch.cpp
@@ -0,0 +1,868 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Register Requirements for ARM and ARM64 common code XX
+XX XX
+XX This encapsulates common logic for setting register requirements for XX
+XX the ARM and ARM64 architectures. XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+
+#ifdef _TARGET_ARMARCH_ // This file is ONLY used for ARM and ARM64 architectures
+
+#include "jit.h"
+#include "sideeffects.h"
+#include "lower.h"
+#include "lsra.h"
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitStoreLoc: Set register requirements for a store of a lclVar
+//
+// Arguments:
+// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR)
+//
+// Notes:
+// This involves:
+// - Setting the appropriate candidates for a store of a multi-reg call return value.
+// - Handling of contained immediates.
+//
+void Lowering::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc)
+{
+ TreeNodeInfo* info = &(storeLoc->gtLsraInfo);
+
+ // Is this the case of var = call where call is returning
+ // a value in multiple return registers?
+ GenTree* op1 = storeLoc->gtGetOp1();
+ if (op1->IsMultiRegCall())
+ {
+ // backend expects to see this case only for store lclvar.
+ assert(storeLoc->OperGet() == GT_STORE_LCL_VAR);
+
+ // srcCount = number of registers in which the value is returned by call
+ GenTreeCall* call = op1->AsCall();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ info->srcCount = retTypeDesc->GetReturnRegCount();
+
+ // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1
+ regMaskTP srcCandidates = m_lsra->allMultiRegCallNodeRegs(call);
+ op1->gtLsraInfo.setSrcCandidates(m_lsra, srcCandidates);
+ return;
+ }
+
+ CheckImmedAndMakeContained(storeLoc, op1);
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitCmp: Lower a GT comparison node.
+//
+// Arguments:
+// tree - the node to lower
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+#ifdef _TARGET_ARM_
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ var_types op1Type = op1->TypeGet();
+ var_types op2Type = op2->TypeGet();
+
+ // Long compares will consume GT_LONG nodes, each of which produces two results.
+ // Thus for each long operand there will be an additional source.
+ // TODO-ARM-CQ: Mark hiOp2 and loOp2 as contained if it is a constant.
+ if (varTypeIsLong(op1Type))
+ {
+ info->srcCount++;
+ }
+ if (varTypeIsLong(op2Type))
+ {
+ info->srcCount++;
+ }
+
+#endif // _TARGET_ARM_
+
+ CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
+}
+
+void Lowering::TreeNodeInfoInitGCWriteBarrier(GenTree* tree)
+{
+ GenTreePtr dst = tree;
+ GenTreePtr addr = tree->gtOp.gtOp1;
+ GenTreePtr src = tree->gtOp.gtOp2;
+
+ if (addr->OperGet() == GT_LEA)
+ {
+ // In the case where we are doing a helper assignment, if the dst
+ // is an indir through an lea, we need to actually instantiate the
+ // lea in a register
+ GenTreeAddrMode* lea = addr->AsAddrMode();
+
+ short leaSrcCount = 0;
+ if (lea->Base() != nullptr)
+ {
+ leaSrcCount++;
+ }
+ if (lea->Index() != nullptr)
+ {
+ leaSrcCount++;
+ }
+ lea->gtLsraInfo.srcCount = leaSrcCount;
+ lea->gtLsraInfo.dstCount = 1;
+ }
+
+#if NOGC_WRITE_BARRIERS
+ NYI_ARM("NOGC_WRITE_BARRIERS");
+
+ // For the NOGC JIT Helper calls
+ //
+ // the 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF)
+ // the 'src' goes into x15 (REG_WRITE_BARRIER)
+ //
+ addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER_DST_BYREF);
+ src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER);
+#else
+ // For the standard JIT Helper calls
+ // op1 goes into REG_ARG_0 and
+ // op2 goes into REG_ARG_1
+ //
+ addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_0);
+ src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_1);
+#endif // NOGC_WRITE_BARRIERS
+
+ // Both src and dst must reside in a register, which they should since we haven't set
+ // either of them as contained.
+ assert(addr->gtLsraInfo.dstCount == 1);
+ assert(src->gtLsraInfo.dstCount == 1);
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitIndir: Specify register requirements for address expression
+// of an indirection operation.
+//
+// Arguments:
+// indirTree - GT_IND, GT_STOREIND, block node or GT_NULLCHECK gentree node
+//
+void Lowering::TreeNodeInfoInitIndir(GenTreePtr indirTree)
+{
+ assert(indirTree->OperIsIndir());
+ // If this is the rhs of a block copy (i.e. non-enregisterable struct),
+ // it has no register requirements.
+ if (indirTree->TypeGet() == TYP_STRUCT)
+ {
+ return;
+ }
+
+ GenTreePtr addr = indirTree->gtGetOp1();
+ TreeNodeInfo* info = &(indirTree->gtLsraInfo);
+
+ GenTreePtr base = nullptr;
+ GenTreePtr index = nullptr;
+ unsigned cns = 0;
+ unsigned mul;
+ bool rev;
+ bool modifiedSources = false;
+
+ if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr))
+ {
+ GenTreeAddrMode* lea = addr->AsAddrMode();
+ base = lea->Base();
+ index = lea->Index();
+ cns = lea->gtOffset;
+
+ m_lsra->clearOperandCounts(addr);
+ // The srcCount is decremented because addr is now "contained",
+ // then we account for the base and index below, if they are non-null.
+ info->srcCount--;
+ }
+ else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) &&
+ !(modifiedSources = AreSourcesPossiblyModifiedLocals(indirTree, base, index)))
+ {
+ // An addressing mode will be constructed that may cause some
+ // nodes to not need a register, and cause others' lifetimes to be extended
+ // to the GT_IND or even its parent if it's an assignment
+
+ assert(base != addr);
+ m_lsra->clearOperandCounts(addr);
+
+ GenTreePtr arrLength = nullptr;
+
+ // Traverse the computation below GT_IND to find the operands
+ // for the addressing mode, marking the various constants and
+ // intermediate results as not consuming/producing.
+ // If the traversal were more complex, we might consider using
+ // a traversal function, but the addressing mode is only made
+ // up of simple arithmetic operators, and the code generator
+ // only traverses one leg of each node.
+
+ bool foundBase = (base == nullptr);
+ bool foundIndex = (index == nullptr);
+ GenTreePtr nextChild = nullptr;
+ for (GenTreePtr child = addr; child != nullptr && !child->OperIsLeaf(); child = nextChild)
+ {
+ nextChild = nullptr;
+ GenTreePtr op1 = child->gtOp.gtOp1;
+ GenTreePtr op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr;
+
+ if (op1 == base)
+ {
+ foundBase = true;
+ }
+ else if (op1 == index)
+ {
+ foundIndex = true;
+ }
+ else
+ {
+ m_lsra->clearOperandCounts(op1);
+ if (!op1->OperIsLeaf())
+ {
+ nextChild = op1;
+ }
+ }
+
+ if (op2 != nullptr)
+ {
+ if (op2 == base)
+ {
+ foundBase = true;
+ }
+ else if (op2 == index)
+ {
+ foundIndex = true;
+ }
+ else
+ {
+ m_lsra->clearOperandCounts(op2);
+ if (!op2->OperIsLeaf())
+ {
+ assert(nextChild == nullptr);
+ nextChild = op2;
+ }
+ }
+ }
+ }
+ assert(foundBase && foundIndex);
+ info->srcCount--; // it gets incremented below.
+ }
+ else if (addr->gtOper == GT_ARR_ELEM)
+ {
+ // The GT_ARR_ELEM consumes all the indices and produces the offset.
+ // The array object lives until the mem access.
+ // We also consume the target register to which the address is
+ // computed
+
+ info->srcCount++;
+ assert(addr->gtLsraInfo.srcCount >= 2);
+ addr->gtLsraInfo.srcCount -= 1;
+ }
+ else
+ {
+ // it is nothing but a plain indir
+ info->srcCount--; // base gets added in below
+ base = addr;
+ }
+
+ if (base != nullptr)
+ {
+ info->srcCount++;
+ }
+
+ if (index != nullptr && !modifiedSources)
+ {
+ info->srcCount++;
+ }
+
+ // On ARM we may need a single internal register
+ // (when both conditions are true then we still only need a single internal register)
+ if ((index != nullptr) && (cns != 0))
+ {
+ // ARM does not support both Index and offset so we need an internal register
+ info->internalIntCount = 1;
+ }
+ else if (!emitter::emitIns_valid_imm_for_ldst_offset(cns, emitTypeSize(indirTree)))
+ {
+ // This offset can't be contained in the ldr/str instruction, so we need an internal register
+ info->internalIntCount = 1;
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitShiftRotate: Set the NodeInfo for a shift or rotate.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ LinearScan* l = m_lsra;
+
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+ GenTreePtr shiftBy = tree->gtOp.gtOp2;
+ GenTreePtr source = tree->gtOp.gtOp1;
+ if (shiftBy->IsCnsIntOrI())
+ {
+ l->clearDstCount(shiftBy);
+ info->srcCount--;
+ }
+
+#ifdef _TARGET_ARM_
+
+ // The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that
+ // we can have a three operand form. Increment the srcCount.
+ if (tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO)
+ {
+ assert(source->OperGet() == GT_LONG);
+
+ info->srcCount++;
+
+ if (tree->OperGet() == GT_LSH_HI)
+ {
+ GenTreePtr sourceLo = source->gtOp.gtOp1;
+ sourceLo->gtLsraInfo.isDelayFree = true;
+ }
+ else
+ {
+ GenTreePtr sourceHi = source->gtOp.gtOp2;
+ sourceHi->gtLsraInfo.isDelayFree = true;
+ }
+
+ source->gtLsraInfo.hasDelayFreeSrc = true;
+ info->hasDelayFreeSrc = true;
+ }
+
+#endif // _TARGET_ARM_
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitPutArgReg: Set the NodeInfo for a PUTARG_REG.
+//
+// Arguments:
+// node - The PUTARG_REG node.
+// argReg - The register in which to pass the argument.
+// info - The info for the node's using call.
+// isVarArgs - True if the call uses a varargs calling convention.
+// callHasFloatRegArgs - Set to true if this PUTARG_REG uses an FP register.
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitPutArgReg(
+ GenTreeUnOp* node, regNumber argReg, TreeNodeInfo& info, bool isVarArgs, bool* callHasFloatRegArgs)
+{
+ assert(node != nullptr);
+ assert(node->OperIsPutArgReg());
+ assert(argReg != REG_NA);
+
+ // Each register argument corresponds to one source.
+ info.srcCount++;
+
+ // Set the register requirements for the node.
+ const regMaskTP argMask = genRegMask(argReg);
+ node->gtLsraInfo.setDstCandidates(m_lsra, argMask);
+ node->gtLsraInfo.setSrcCandidates(m_lsra, argMask);
+
+ // To avoid redundant moves, have the argument operand computed in the
+ // register in which the argument is passed to the call.
+ node->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(m_lsra, m_lsra->getUseCandidates(node));
+
+ *callHasFloatRegArgs |= varTypeIsFloating(node->TypeGet());
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitCall: Set the NodeInfo for a call.
+//
+// Arguments:
+// call - The call node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
+{
+ TreeNodeInfo* info = &(call->gtLsraInfo);
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+ bool hasMultiRegRetVal = false;
+ ReturnTypeDesc* retTypeDesc = nullptr;
+
+ info->srcCount = 0;
+ if (call->TypeGet() != TYP_VOID)
+ {
+ hasMultiRegRetVal = call->HasMultiRegRetVal();
+ if (hasMultiRegRetVal)
+ {
+ // dst count = number of registers in which the value is returned by call
+ retTypeDesc = call->GetReturnTypeDesc();
+ info->dstCount = retTypeDesc->GetReturnRegCount();
+ }
+ else
+ {
+ info->dstCount = 1;
+ }
+ }
+ else
+ {
+ info->dstCount = 0;
+ }
+
+ GenTree* ctrlExpr = call->gtControlExpr;
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ // either gtControlExpr != null or gtCallAddr != null.
+ // Both cannot be non-null at the same time.
+ assert(ctrlExpr == nullptr);
+ assert(call->gtCallAddr != nullptr);
+ ctrlExpr = call->gtCallAddr;
+ }
+
+ // set reg requirements on call target represented as control sequence.
+ if (ctrlExpr != nullptr)
+ {
+ // we should never see a gtControlExpr whose type is void.
+ assert(ctrlExpr->TypeGet() != TYP_VOID);
+
+ info->srcCount++;
+
+ // In case of fast tail implemented as jmp, make sure that gtControlExpr is
+ // computed into a register.
+ if (call->IsFastTailCall())
+ {
+ NYI_ARM("tail call");
+
+#ifdef _TARGET_ARM64_
+ // Fast tail call - make sure that call target is always computed in IP0
+ // so that epilog sequence can generate "br xip0" to achieve fast tail call.
+ ctrlExpr->gtLsraInfo.setSrcCandidates(l, genRegMask(REG_IP0));
+#endif // _TARGET_ARM64_
+ }
+ }
+#ifdef _TARGET_ARM_
+ else
+ {
+ info->internalIntCount = 1;
+ }
+#endif // _TARGET_ARM_
+
+ RegisterType registerType = call->TypeGet();
+
+// Set destination candidates for return value of the call.
+
+#ifdef _TARGET_ARM_
+ if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
+ {
+ // The ARM CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
+ // TCB in REG_PINVOKE_TCB. fgMorphCall() sets the correct argument registers.
+ info->setDstCandidates(l, RBM_PINVOKE_TCB);
+ }
+ else
+#endif // _TARGET_ARM_
+ if (hasMultiRegRetVal)
+ {
+ assert(retTypeDesc != nullptr);
+ info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs());
+ }
+ else if (varTypeIsFloating(registerType))
+ {
+ info->setDstCandidates(l, RBM_FLOATRET);
+ }
+ else if (registerType == TYP_LONG)
+ {
+ info->setDstCandidates(l, RBM_LNGRET);
+ }
+ else
+ {
+ info->setDstCandidates(l, RBM_INTRET);
+ }
+
+ // If there is an explicit this pointer, we don't want that node to produce anything
+ // as it is redundant
+ if (call->gtCallObjp != nullptr)
+ {
+ GenTreePtr thisPtrNode = call->gtCallObjp;
+
+ if (thisPtrNode->gtOper == GT_PUTARG_REG)
+ {
+ l->clearOperandCounts(thisPtrNode);
+ l->clearDstCount(thisPtrNode->gtOp.gtOp1);
+ }
+ else
+ {
+ l->clearDstCount(thisPtrNode);
+ }
+ }
+
+ // First, count reg args
+ bool callHasFloatRegArgs = false;
+
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->OperIsList());
+
+ GenTreePtr argNode = list->Current();
+
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
+ assert(curArgTabEntry);
+
+ if (curArgTabEntry->regNum == REG_STK)
+ {
+ // late arg that is not passed in a register
+ assert(argNode->gtOper == GT_PUTARG_STK);
+
+ TreeNodeInfoInitPutArgStk(argNode->AsPutArgStk(), curArgTabEntry);
+ continue;
+ }
+
+ // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct
+ if (argNode->OperGet() == GT_FIELD_LIST)
+ {
+ // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs)
+ regNumber argReg = curArgTabEntry->regNum;
+ for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest())
+ {
+ TreeNodeInfoInitPutArgReg(entry->Current()->AsUnOp(), argReg, *info, false, &callHasFloatRegArgs);
+
+ // Update argReg for the next putarg_reg (if any)
+ argReg = genRegArgNext(argReg);
+ }
+ }
+ else
+ {
+ TreeNodeInfoInitPutArgReg(argNode->AsUnOp(), curArgTabEntry->regNum, *info, false, &callHasFloatRegArgs);
+ }
+ }
+
+ // Now, count stack args
+ // Note that these need to be computed into a register, but then
+ // they're just stored to the stack - so the reg doesn't
+ // need to remain live until the call. In fact, it must not
+ // because the code generator doesn't actually consider it live,
+ // so it can't be spilled.
+
+ GenTreePtr args = call->gtCallArgs;
+ while (args)
+ {
+ GenTreePtr arg = args->gtOp.gtOp1;
+
+ // Skip arguments that have been moved to the Late Arg list
+ if (!(args->gtFlags & GTF_LATE_ARG))
+ {
+ if (arg->gtOper == GT_PUTARG_STK)
+ {
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
+ assert(curArgTabEntry);
+
+ assert(curArgTabEntry->regNum == REG_STK);
+
+ TreeNodeInfoInitPutArgStk(arg->AsPutArgStk(), curArgTabEntry);
+ }
+ else
+ {
+ TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
+ if (argInfo->dstCount != 0)
+ {
+ argInfo->isLocalDefUse = true;
+ }
+
+ argInfo->dstCount = 0;
+ }
+ }
+ args = args->gtOp.gtOp2;
+ }
+
+ // If it is a fast tail call, it is already preferenced to use IP0.
+ // Therefore, no need set src candidates on call tgt again.
+ if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr))
+ {
+ NYI_ARM("float reg varargs");
+
+ // Don't assign the call target to any of the argument registers because
+ // we will use them to also pass floating point arguments as required
+ // by Arm64 ABI.
+ ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS));
+ }
+
+#ifdef _TARGET_ARM_
+
+ if (call->NeedsNullCheck())
+ {
+ info->internalIntCount++;
+ }
+
+#endif // _TARGET_ARM_
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node
+//
+// Arguments:
+// argNode - a GT_PUTARG_STK node
+//
+// Return Value:
+// None.
+//
+// Notes:
+// Set the child node(s) to be contained when we have a multireg arg
+//
+void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info)
+{
+ assert(argNode->gtOper == GT_PUTARG_STK);
+
+ GenTreePtr putArgChild = argNode->gtOp.gtOp1;
+
+ // Initialize 'argNode' as not contained, as this is both the default case
+ // and how MakeSrcContained expects to find things setup.
+ //
+ argNode->gtLsraInfo.srcCount = 1;
+ argNode->gtLsraInfo.dstCount = 0;
+
+ // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct
+ if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_FIELD_LIST))
+ {
+ // We will use store instructions that each write a register sized value
+
+ if (putArgChild->OperGet() == GT_FIELD_LIST)
+ {
+ // We consume all of the items in the GT_FIELD_LIST
+ argNode->gtLsraInfo.srcCount = info->numSlots;
+ }
+ else
+ {
+ // We could use a ldp/stp sequence so we need two internal registers
+ argNode->gtLsraInfo.internalIntCount = 2;
+
+ if (putArgChild->OperGet() == GT_OBJ)
+ {
+ GenTreePtr objChild = putArgChild->gtOp.gtOp1;
+ if (objChild->OperGet() == GT_LCL_VAR_ADDR)
+ {
+ // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR
+ // as one contained operation
+ //
+ MakeSrcContained(putArgChild, objChild);
+ }
+ }
+
+ // We will generate all of the code for the GT_PUTARG_STK and it's child node
+ // as one contained operation
+ //
+ MakeSrcContained(argNode, putArgChild);
+ }
+ }
+ else
+ {
+ // We must not have a multi-reg struct
+ assert(info->numSlots == 1);
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store.
+//
+// Arguments:
+// blkNode - The block store node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
+{
+ GenTree* dstAddr = blkNode->Addr();
+ unsigned size = blkNode->gtBlkSize;
+ GenTree* source = blkNode->Data();
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+
+ // Sources are dest address and initVal or source.
+ // We may require an additional source or temp register for the size.
+ blkNode->gtLsraInfo.srcCount = 2;
+ blkNode->gtLsraInfo.dstCount = 0;
+ GenTreePtr srcAddrOrFill = nullptr;
+ bool isInitBlk = blkNode->OperIsInitBlkOp();
+
+ if (!isInitBlk)
+ {
+ // CopyObj or CopyBlk
+ if (source->gtOper == GT_IND)
+ {
+ srcAddrOrFill = blkNode->Data()->gtGetOp1();
+ // We're effectively setting source as contained, but can't call MakeSrcContained, because the
+ // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading.
+ // If srcAddr is already non-contained, we don't need to change it.
+ if (srcAddrOrFill->gtLsraInfo.getDstCount() == 0)
+ {
+ srcAddrOrFill->gtLsraInfo.setDstCount(1);
+ srcAddrOrFill->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount);
+ }
+ m_lsra->clearOperandCounts(source);
+ }
+ else if (!source->IsMultiRegCall() && !source->OperIsSIMD())
+ {
+ assert(source->IsLocal());
+ MakeSrcContained(blkNode, source);
+ }
+ }
+
+ if (isInitBlk)
+ {
+ GenTreePtr initVal = source;
+ if (initVal->OperIsInitVal())
+ {
+ initVal = initVal->gtGetOp1();
+ }
+ srcAddrOrFill = initVal;
+
+ if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll)
+ {
+ // TODO-ARM-CQ: Currently we generate a helper call for every
+ // initblk we encounter. Later on we should implement loop unrolling
+ // code sequences to improve CQ.
+ // For reference see the code in lsraxarch.cpp.
+ NYI_ARM("initblk loop unrolling is currently not implemented.");
+
+#ifdef _TARGET_ARM64_
+ // No additional temporaries required
+ ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF;
+ if (fill == 0)
+ {
+ MakeSrcContained(blkNode, source);
+ }
+#endif // _TARGET_ARM64_
+ }
+ else
+ {
+ assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper);
+ // The helper follows the regular ABI.
+ dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0);
+ initVal->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1);
+ if (size != 0)
+ {
+ // Reserve a temp register for the block size argument.
+ blkNode->gtLsraInfo.setInternalCandidates(l, RBM_ARG_2);
+ blkNode->gtLsraInfo.internalIntCount = 1;
+ }
+ else
+ {
+ // The block size argument is a third argument to GT_STORE_DYN_BLK
+ noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
+ blkNode->gtLsraInfo.setSrcCount(3);
+ GenTree* sizeNode = blkNode->AsDynBlk()->gtDynamicSize;
+ sizeNode->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2);
+ }
+ }
+ }
+ else
+ {
+ // CopyObj or CopyBlk
+ // Sources are src and dest and size if not constant.
+ if (blkNode->OperGet() == GT_STORE_OBJ)
+ {
+ // CopyObj
+ NYI_ARM("GT_STORE_OBJ is needed of write barriers implementation");
+
+#ifdef _TARGET_ARM64_
+
+ // We don't need to materialize the struct size but we still need
+ // a temporary register to perform the sequence of loads and stores.
+ blkNode->gtLsraInfo.internalIntCount = 1;
+
+ dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_DST_BYREF);
+ // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF.
+ // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF,
+ // which is killed by a StoreObj (and thus needn't be reserved).
+ if (srcAddrOrFill != nullptr)
+ {
+ srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_SRC_BYREF);
+ }
+
+#endif // _TARGET_ARM64_
+ }
+ else
+ {
+ // CopyBlk
+ short internalIntCount = 0;
+ regMaskTP internalIntCandidates = RBM_NONE;
+
+ if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll)
+ {
+ // TODO-ARM-CQ: cpblk loop unrolling is currently not implemented.
+ // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size
+ // we should unroll the loop to improve CQ.
+ // For reference see the code in lsraxarch.cpp.
+ NYI_ARM("cpblk loop unrolling is currently not implemented.");
+
+#ifdef _TARGET_ARM64_
+
+ internalIntCount = 1;
+ internalIntCandidates = RBM_ALLINT;
+
+ if (size >= 2 * REGSIZE_BYTES)
+ {
+ // Use ldp/stp to reduce code size and improve performance
+ internalIntCount++;
+ }
+
+#endif // _TARGET_ARM64_
+ }
+ else
+ {
+ assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper);
+ dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0);
+ // The srcAddr goes in arg1.
+ if (srcAddrOrFill != nullptr)
+ {
+ srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1);
+ }
+ if (size != 0)
+ {
+ // Reserve a temp register for the block size argument.
+ internalIntCandidates |= RBM_ARG_2;
+ internalIntCount++;
+ }
+ else
+ {
+ // The block size argument is a third argument to GT_STORE_DYN_BLK
+ noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
+ blkNode->gtLsraInfo.setSrcCount(3);
+ GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
+ blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2);
+ }
+ }
+ if (internalIntCount != 0)
+ {
+ blkNode->gtLsraInfo.internalIntCount = internalIntCount;
+ blkNode->gtLsraInfo.setInternalCandidates(l, internalIntCandidates);
+ }
+ }
+ }
+}
+
+#endif // _TARGET_ARMARCH_
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp
index a4da2b7ce6..002e3d803f 100644
--- a/src/jit/lsraxarch.cpp
+++ b/src/jit/lsraxarch.cpp
@@ -1174,6 +1174,55 @@ void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree)
}
//------------------------------------------------------------------------
+// TreeNodeInfoInitPutArgReg: Set the NodeInfo for a PUTARG_REG.
+//
+// Arguments:
+// node - The PUTARG_REG node.
+// argReg - The register in which to pass the argument.
+// info - The info for the node's using call.
+// isVarArgs - True if the call uses a varargs calling convention.
+// callHasFloatRegArgs - Set to true if this PUTARG_REG uses an FP register.
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitPutArgReg(
+ GenTreeUnOp* node, regNumber argReg, TreeNodeInfo& info, bool isVarArgs, bool* callHasFloatRegArgs)
+{
+ assert(node != nullptr);
+ assert(node->OperIsPutArgReg());
+ assert(argReg != REG_NA);
+
+ // Each register argument corresponds to one source.
+ info.srcCount++;
+
+ // Set the register requirements for the node.
+ const regMaskTP argMask = genRegMask(argReg);
+ node->gtLsraInfo.setDstCandidates(m_lsra, argMask);
+ node->gtLsraInfo.setSrcCandidates(m_lsra, argMask);
+
+ // To avoid redundant moves, have the argument operand computed in the
+ // register in which the argument is passed to the call.
+ node->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(m_lsra, m_lsra->getUseCandidates(node));
+
+#if FEATURE_VARARG
+ *callHasFloatRegArgs |= varTypeIsFloating(node->TypeGet());
+
+ // In the case of a varargs call, the ABI dictates that if we have floating point args,
+ // we must pass the enregistered arguments in both the integer and floating point registers.
+ // Since the integer register is not associated with this arg node, we will reserve it as
+ // an internal register so that it is not used during the evaluation of the call node
+ // (e.g. for the target).
+ if (isVarArgs && varTypeIsFloating(node))
+ {
+ regNumber targetReg = comp->getCallArgIntRegister(argReg);
+ info.setInternalIntCount(info.internalIntCount + 1);
+ info.addInternalCandidates(m_lsra, genRegMask(targetReg));
+ }
+#endif // FEATURE_VARARG
+}
+
+//------------------------------------------------------------------------
// TreeNodeInfoInitCall: Set the NodeInfo for a call.
//
// Arguments:
@@ -1337,15 +1386,23 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
}
}
-#if FEATURE_VARARG
bool callHasFloatRegArgs = false;
-#endif // !FEATURE_VARARG
+ bool isVarArgs = call->IsVarargs();
// First, count reg args
for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
{
assert(list->OperIsList());
+ // By this point, lowering has ensured that all call arguments are one of the following:
+ // - an arg setup store
+ // - an arg placeholder
+ // - a nop
+ // - a copy blk
+ // - a field list
+ // - a put arg
+ //
+ // Note that this property is statically checked by Lowering::CheckBlock.
GenTreePtr argNode = list->Current();
fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
@@ -1372,166 +1429,30 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
argNode->gtLsraInfo.srcCount = 0;
}
#endif // FEATURE_PUT_STRUCT_ARG_STK
- continue;
- }
-
- regNumber argReg = REG_NA;
- regMaskTP argMask = RBM_NONE;
- short regCount = 0;
- bool isOnStack = true;
- if (curArgTabEntry->regNum != REG_STK)
- {
- isOnStack = false;
- var_types argType = argNode->TypeGet();
-
-#if FEATURE_VARARG
- callHasFloatRegArgs |= varTypeIsFloating(argType);
-#endif // !FEATURE_VARARG
-
- argReg = curArgTabEntry->regNum;
- regCount = 1;
-
- // Default case is that we consume one source; modify this later (e.g. for
- // promoted structs)
- info->srcCount++;
- argMask = genRegMask(argReg);
- argNode = argNode->gtEffectiveVal();
+ continue;
}
- // If the struct arg is wrapped in CPYBLK the type of the param will be TYP_VOID.
- // Use the curArgTabEntry's isStruct to get whether the param is a struct.
- if (varTypeIsStruct(argNode) PUT_STRUCT_ARG_STK_ONLY(|| curArgTabEntry->isStruct))
- {
- unsigned originalSize = 0;
- LclVarDsc* varDsc = nullptr;
- if (argNode->gtOper == GT_LCL_VAR)
- {
- varDsc = compiler->lvaTable + argNode->gtLclVarCommon.gtLclNum;
- originalSize = varDsc->lvSize();
- }
- else if (argNode->gtOper == GT_MKREFANY)
- {
- originalSize = 2 * TARGET_POINTER_SIZE;
- }
- else if (argNode->gtOper == GT_OBJ)
- {
- noway_assert(!"GT_OBJ not supported for amd64");
- }
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- else if (argNode->gtOper == GT_PUTARG_REG)
- {
- originalSize = genTypeSize(argNode->gtType);
- }
- else if (argNode->gtOper == GT_FIELD_LIST)
- {
- originalSize = 0;
-
- // There could be up to 2 PUTARG_REGs in the list
- GenTreeFieldList* fieldListPtr = argNode->AsFieldList();
- unsigned iterationNum = 0;
- for (; fieldListPtr; fieldListPtr = fieldListPtr->Rest())
- {
- GenTreePtr putArgRegNode = fieldListPtr->Current();
- assert(putArgRegNode->gtOper == GT_PUTARG_REG);
-
- if (iterationNum == 0)
- {
- varDsc = compiler->lvaTable + putArgRegNode->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
- originalSize = varDsc->lvSize();
- assert(originalSize != 0);
- }
- else
- {
- // Need an extra source for every node, but the first in the list.
- info->srcCount++;
-
- // Get the mask for the second putarg_reg
- argMask = genRegMask(curArgTabEntry->otherRegNum);
- }
-
- putArgRegNode->gtLsraInfo.setDstCandidates(l, argMask);
- putArgRegNode->gtLsraInfo.setSrcCandidates(l, argMask);
-
- // To avoid redundant moves, have the argument child tree computed in the
- // register in which the argument is passed to the call.
- putArgRegNode->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, l->getUseCandidates(putArgRegNode));
- iterationNum++;
- }
-
- assert(iterationNum <= CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
- }
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- else
- {
- noway_assert(!"Can't predict unsupported TYP_STRUCT arg kind");
- }
-
- unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES;
- unsigned remainingSlots = slots;
-
- if (!isOnStack)
- {
- remainingSlots = slots - 1;
-
- regNumber reg = (regNumber)(argReg + 1);
- while (remainingSlots > 0 && reg <= REG_ARG_LAST)
- {
- argMask |= genRegMask(reg);
- reg = (regNumber)(reg + 1);
- remainingSlots--;
- regCount++;
- }
- }
+ if (argNode->OperGet() == GT_FIELD_LIST)
+ {
+ assert(varTypeIsStruct(argNode) || curArgTabEntry->isStruct);
- short internalIntCount = 0;
- if (remainingSlots > 0)
+ unsigned eightbyte = 0;
+ for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest())
{
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- // This TYP_STRUCT argument is also passed in the outgoing argument area
- // We need a register to address the TYP_STRUCT
- internalIntCount = 1;
-#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
- // And we may need 2
- internalIntCount = 2;
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- }
- argNode->gtLsraInfo.internalIntCount = internalIntCount;
+ const regNumber argReg = eightbyte == 0 ? curArgTabEntry->regNum : curArgTabEntry->otherRegNum;
+ TreeNodeInfoInitPutArgReg(entry->Current()->AsUnOp(), argReg, *info, isVarArgs, &callHasFloatRegArgs);
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- if (argNode->gtOper == GT_PUTARG_REG)
- {
- argNode->gtLsraInfo.setDstCandidates(l, argMask);
- argNode->gtLsraInfo.setSrcCandidates(l, argMask);
+ eightbyte++;
}
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
}
else
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
{
- argNode->gtLsraInfo.setDstCandidates(l, argMask);
- argNode->gtLsraInfo.setSrcCandidates(l, argMask);
- }
-
- // To avoid redundant moves, have the argument child tree computed in the
- // register in which the argument is passed to the call.
- if (argNode->gtOper == GT_PUTARG_REG)
- {
- argNode->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, l->getUseCandidates(argNode));
- }
-
-#if FEATURE_VARARG
- // In the case of a varargs call, the ABI dictates that if we have floating point args,
- // we must pass the enregistered arguments in both the integer and floating point registers.
- // Since the integer register is not associated with this arg node, we will reserve it as
- // an internal register so that it is not used during the evaluation of the call node
- // (e.g. for the target).
- if (call->IsVarargs() && varTypeIsFloating(argNode))
- {
- regNumber targetReg = compiler->getCallArgIntRegister(argReg);
- info->setInternalIntCount(info->internalIntCount + 1);
- info->addInternalCandidates(l, genRegMask(targetReg));
+ TreeNodeInfoInitPutArgReg(argNode->AsUnOp(), curArgTabEntry->regNum, *info, isVarArgs,
+ &callHasFloatRegArgs);
}
-#endif // FEATURE_VARARG
}
// Now, count stack args
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp
index dabca57710..92d5e0967e 100644
--- a/src/jit/morph.cpp
+++ b/src/jit/morph.cpp
@@ -92,7 +92,7 @@ GenTreePtr Compiler::fgMorphIntoHelperCall(GenTreePtr tree, int helper, GenTreeA
tree->gtCall.gtEntryPoint.addr = nullptr;
#endif
-#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+#if (defined(_TARGET_X86_) || defined(_TARGET_ARM_)) && !defined(LEGACY_BACKEND)
if (varTypeIsLong(tree))
{
GenTreeCall* callNode = tree->AsCall();
@@ -101,7 +101,7 @@ GenTreePtr Compiler::fgMorphIntoHelperCall(GenTreePtr tree, int helper, GenTreeA
retTypeDesc->InitializeLongReturnType(this);
callNode->ClearOtherRegs();
}
-#endif
+#endif // _TARGET_XXX_
/* Perform the morphing */
@@ -850,17 +850,22 @@ void fgArgTabEntry::Dump()
}
#endif
-fgArgInfo::fgArgInfo(Compiler* comp, GenTreePtr call, unsigned numArgs)
+fgArgInfo::fgArgInfo(Compiler* comp, GenTreeCall* call, unsigned numArgs)
{
- compiler = comp;
- callTree = call;
- assert(call->IsCall());
+ compiler = comp;
+ callTree = call;
argCount = 0; // filled in arg count, starts at zero
nextSlotNum = INIT_ARG_STACK_SLOT;
stkLevel = 0;
#if defined(UNIX_X86_ABI)
- padStkAlign = 0;
+ alignmentDone = false;
+ stkSizeBytes = 0;
+ padStkAlign = 0;
#endif
+#if FEATURE_FIXED_OUT_ARGS
+ outArgSize = 0;
+#endif
+
argTableSize = numArgs; // the allocated table size
hasRegArgs = false;
@@ -889,22 +894,22 @@ fgArgInfo::fgArgInfo(Compiler* comp, GenTreePtr call, unsigned numArgs)
* in the argTable contains pointers that must point to the
* new arguments and not the old arguments.
*/
-fgArgInfo::fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall)
+fgArgInfo::fgArgInfo(GenTreeCall* newCall, GenTreeCall* oldCall)
{
- assert(oldCall->IsCall());
- assert(newCall->IsCall());
-
fgArgInfoPtr oldArgInfo = oldCall->gtCall.fgArgInfo;
- compiler = oldArgInfo->compiler;
- ;
- callTree = newCall;
- assert(newCall->IsCall());
+ compiler = oldArgInfo->compiler;
+ callTree = newCall;
argCount = 0; // filled in arg count, starts at zero
nextSlotNum = INIT_ARG_STACK_SLOT;
stkLevel = oldArgInfo->stkLevel;
#if defined(UNIX_X86_ABI)
- padStkAlign = oldArgInfo->padStkAlign;
+ alignmentDone = oldArgInfo->alignmentDone;
+ stkSizeBytes = oldArgInfo->stkSizeBytes;
+ padStkAlign = oldArgInfo->padStkAlign;
+#endif
+#if FEATURE_FIXED_OUT_ARGS
+ outArgSize = oldArgInfo->outArgSize;
#endif
argTableSize = oldArgInfo->argTableSize;
argsComplete = false;
@@ -924,22 +929,22 @@ fgArgInfo::fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall)
// so we can iterate over these argument lists more uniformly.
// Need to provide a temporary non-null first arguments to these constructors: if we use them, we'll replace them
GenTreeArgList* newArgs;
- GenTreeArgList newArgObjp(newCall, newCall->gtCall.gtCallArgs);
+ GenTreeArgList newArgObjp(newCall, newCall->gtCallArgs);
GenTreeArgList* oldArgs;
- GenTreeArgList oldArgObjp(oldCall, oldCall->gtCall.gtCallArgs);
+ GenTreeArgList oldArgObjp(oldCall, oldCall->gtCallArgs);
- if (newCall->gtCall.gtCallObjp == nullptr)
+ if (newCall->gtCallObjp == nullptr)
{
- assert(oldCall->gtCall.gtCallObjp == nullptr);
- newArgs = newCall->gtCall.gtCallArgs;
- oldArgs = oldCall->gtCall.gtCallArgs;
+ assert(oldCall->gtCallObjp == nullptr);
+ newArgs = newCall->gtCallArgs;
+ oldArgs = oldCall->gtCallArgs;
}
else
{
- assert(oldCall->gtCall.gtCallObjp != nullptr);
- newArgObjp.Current() = newCall->gtCall.gtCallArgs;
+ assert(oldCall->gtCallObjp != nullptr);
+ newArgObjp.Current() = newCall->gtCallArgs;
newArgs = &newArgObjp;
- oldArgObjp.Current() = oldCall->gtCall.gtCallObjp;
+ oldArgObjp.Current() = oldCall->gtCallObjp;
oldArgs = &oldArgObjp;
}
@@ -1023,8 +1028,8 @@ fgArgInfo::fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall)
if (scanRegArgs)
{
- newArgs = newCall->gtCall.gtCallLateArgs;
- oldArgs = oldCall->gtCall.gtCallLateArgs;
+ newArgs = newCall->gtCallLateArgs;
+ oldArgs = oldCall->gtCallLateArgs;
while (newArgs)
{
@@ -1085,19 +1090,16 @@ fgArgTabEntryPtr fgArgInfo::AddRegArg(
{
fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
- curArgTabEntry->argNum = argNum;
- curArgTabEntry->node = node;
- curArgTabEntry->parent = parent;
- curArgTabEntry->regNum = regNum;
- curArgTabEntry->slotNum = 0;
- curArgTabEntry->numRegs = numRegs;
- curArgTabEntry->numSlots = 0;
- curArgTabEntry->alignment = alignment;
- curArgTabEntry->lateArgInx = (unsigned)-1;
- curArgTabEntry->tmpNum = (unsigned)-1;
-#if defined(UNIX_X86_ABI)
- curArgTabEntry->padStkAlign = 0;
-#endif
+ curArgTabEntry->argNum = argNum;
+ curArgTabEntry->node = node;
+ curArgTabEntry->parent = parent;
+ curArgTabEntry->regNum = regNum;
+ curArgTabEntry->slotNum = 0;
+ curArgTabEntry->numRegs = numRegs;
+ curArgTabEntry->numSlots = 0;
+ curArgTabEntry->alignment = alignment;
+ curArgTabEntry->lateArgInx = (unsigned)-1;
+ curArgTabEntry->tmpNum = (unsigned)-1;
curArgTabEntry->isSplit = false;
curArgTabEntry->isTmp = false;
curArgTabEntry->needTmp = false;
@@ -1163,19 +1165,16 @@ fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned argNum,
curArgTabEntry->isStruct = isStruct; // is this a struct arg
#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
- curArgTabEntry->argNum = argNum;
- curArgTabEntry->node = node;
- curArgTabEntry->parent = parent;
- curArgTabEntry->regNum = REG_STK;
- curArgTabEntry->slotNum = nextSlotNum;
- curArgTabEntry->numRegs = 0;
- curArgTabEntry->numSlots = numSlots;
- curArgTabEntry->alignment = alignment;
- curArgTabEntry->lateArgInx = (unsigned)-1;
- curArgTabEntry->tmpNum = (unsigned)-1;
-#if defined(UNIX_X86_ABI)
- curArgTabEntry->padStkAlign = 0;
-#endif
+ curArgTabEntry->argNum = argNum;
+ curArgTabEntry->node = node;
+ curArgTabEntry->parent = parent;
+ curArgTabEntry->regNum = REG_STK;
+ curArgTabEntry->slotNum = nextSlotNum;
+ curArgTabEntry->numRegs = 0;
+ curArgTabEntry->numSlots = numSlots;
+ curArgTabEntry->alignment = alignment;
+ curArgTabEntry->lateArgInx = (unsigned)-1;
+ curArgTabEntry->tmpNum = (unsigned)-1;
curArgTabEntry->isSplit = false;
curArgTabEntry->isTmp = false;
curArgTabEntry->needTmp = false;
@@ -1701,52 +1700,6 @@ void fgArgInfo::ArgsComplete()
argsComplete = true;
}
-#if defined(UNIX_X86_ABI)
-// Get the stack alignment value for a Call holding this object
-//
-// NOTE: This function will calculate number of padding slots, to align the
-// stack before pushing arguments to the stack. Padding value is stored in
-// the first argument in fgArgTabEntry structure padStkAlign member so that
-// code (sub esp, n) can be emitted before generating argument push in
-// fgArgTabEntry node. As of result stack will be aligned right before
-// making a "Call". After the Call, stack is re-adjusted to the value it
-// was with fgArgInfo->padStkAlign value as we cann't use the one in fgArgTabEntry.
-//
-void fgArgInfo::ArgsAlignPadding()
-{
- // To get the padding amount, sum up all the slots and get the remainder for padding
- unsigned curInx;
- unsigned numSlots = 0;
- fgArgTabEntryPtr firstArgTabEntry = nullptr;
-
- for (curInx = 0; curInx < argCount; curInx++)
- {
- fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
- if (curArgTabEntry->numSlots > 0)
- {
- // The argument may be REG_STK or constant or register that goes to stack
- assert(nextSlotNum >= curArgTabEntry->slotNum);
-
- numSlots += curArgTabEntry->numSlots;
- if (firstArgTabEntry == nullptr)
- {
- // First argument will be used to hold the padding amount
- firstArgTabEntry = curArgTabEntry;
- }
- }
- }
-
- if (firstArgTabEntry != nullptr)
- {
- const int numSlotsAligned = STACK_ALIGN / TARGET_POINTER_SIZE;
- // Set stack align pad for the first argument
- firstArgTabEntry->padStkAlign = AlignmentPad(numSlots, numSlotsAligned);
- // Set also for fgArgInfo that will be used to reset stack pointer after the Call
- this->padStkAlign = firstArgTabEntry->padStkAlign;
- }
-}
-#endif // UNIX_X86_ABI
-
void fgArgInfo::SortArgs()
{
assert(argsComplete == true);
@@ -2665,10 +2618,8 @@ GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE
#pragma warning(push)
#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
#endif
-GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
+GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
{
- GenTreeCall* call = callNode->AsCall();
-
GenTreePtr args;
GenTreePtr argx;
@@ -2838,9 +2789,9 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
// so we record the stack depth on the first morph call when reMorphing
// was false (via RecordStkLevel) and then retrieve that value here (via RetrieveStkLevel)
//
- unsigned callStkLevel = call->fgArgInfo->RetrieveStkLevel();
if (call->gtCallLateArgs != nullptr)
{
+ unsigned callStkLevel = call->fgArgInfo->RetrieveStkLevel();
fgPtrArgCntCur += callStkLevel;
call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList();
flagsSummary |= call->gtCallLateArgs->gtFlags;
@@ -2874,9 +2825,9 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
CLANG_FORMAT_COMMENT_ANCHOR;
#if !defined(LEGACY_BACKEND)
-#if defined(_TARGET_X86_)
- // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper has a custom calling convention. Set the argument registers
- // correctly here.
+#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
+ // The x86 and arm32 CORINFO_HELP_INIT_PINVOKE_FRAME helpers has a custom calling convention.
+ // Set the argument registers correctly here.
if (call->IsHelperCall(this, CORINFO_HELP_INIT_PINVOKE_FRAME))
{
GenTreeArgList* args = call->gtCallArgs;
@@ -2884,6 +2835,8 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
assert(arg1 != nullptr);
nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME);
}
+#endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
+#if defined(_TARGET_X86_)
// The x86 shift helpers have custom calling conventions and expect the lo part of the long to be in EAX and the
// hi part to be in EDX. This sets the argument registers up correctly.
else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) ||
@@ -4286,10 +4239,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
{
call->fgArgInfo->ArgsComplete();
-#if defined(UNIX_X86_ABI)
- call->fgArgInfo->ArgsAlignPadding();
-#endif // UNIX_X86_ABI
-
#ifdef LEGACY_BACKEND
call->gtCallRegUsedMask = genIntAllRegArgMask(intArgRegNum);
#if defined(_TARGET_ARM_)
@@ -4327,19 +4276,23 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
if (fgPtrArgCntMax < fgPtrArgCntCur)
{
+ JITDUMP("Upping fgPtrArgCntMax from %d to %d\n", fgPtrArgCntMax, fgPtrArgCntCur);
fgPtrArgCntMax = fgPtrArgCntCur;
}
+ assert(fgPtrArgCntCur >= genPtrArgCntSav);
+ call->fgArgInfo->SetStkSizeBytes((fgPtrArgCntCur - genPtrArgCntSav) * TARGET_POINTER_SIZE);
+
/* The call will pop all the arguments we pushed */
fgPtrArgCntCur = genPtrArgCntSav;
#if FEATURE_FIXED_OUT_ARGS
- // Update the outgoing argument size.
- // If the call is a fast tail call, it will setup its arguments in incoming arg
- // area instead of the out-going arg area. Therefore, don't consider fast tail
- // calls to update lvaOutgoingArgSpaceSize.
+ // Record the outgoing argument size. If the call is a fast tail
+ // call, it will setup its arguments in incoming arg area instead
+ // of the out-going arg area, so we don't need to track the
+ // outgoing arg size.
if (!call->IsFastTailCall())
{
unsigned preallocatedArgCount = call->fgArgInfo->GetNextSlotNum();
@@ -4359,26 +4312,14 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
}
#endif // UNIX_AMD64_ABI
- // Check if we need to increase the size of our Outgoing Arg Space
- if (preallocatedArgCount * REGSIZE_BYTES > lvaOutgoingArgSpaceSize)
- {
- lvaOutgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES;
+ const unsigned outgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES;
+ call->fgArgInfo->SetOutArgSize(max(outgoingArgSpaceSize, MIN_ARG_AREA_FOR_CALL));
- // If a function has localloc, we will need to move the outgoing arg space when the
- // localloc happens. When we do this, we need to maintain stack alignment. To avoid
- // leaving alignment-related holes when doing this move, make sure the outgoing
- // argument space size is a multiple of the stack alignment by aligning up to the next
- // stack alignment boundary.
- if (compLocallocUsed)
- {
- lvaOutgoingArgSpaceSize = (unsigned)roundUp(lvaOutgoingArgSpaceSize, STACK_ALIGN);
- }
- }
#ifdef DEBUG
if (verbose)
{
- printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, lvaOutgoingArgSpaceSize=%d\n", argSlots,
- preallocatedArgCount, call->fgArgInfo->GetNextSlotNum(), lvaOutgoingArgSpaceSize);
+ printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, outgoingArgSpaceSize=%d\n", argSlots,
+ preallocatedArgCount, call->fgArgInfo->GetNextSlotNum(), outgoingArgSpaceSize);
}
#endif
}
@@ -5047,7 +4988,7 @@ GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr f
// replace the existing LDOBJ(ADDR(LCLVAR))
// with a FIELD_LIST(LCLFLD-LO, FIELD_LIST(LCLFLD-HI, nullptr) ...)
//
- unsigned offset = 0;
+ unsigned offset = baseOffset;
GenTreeFieldList* listEntry = nullptr;
for (unsigned inx = 0; inx < elemCount; inx++)
{
@@ -6163,6 +6104,14 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
return newTree;
}
}
+ else if ((objRef != nullptr) && (objRef->OperGet() == GT_ADDR) && varTypeIsSIMD(objRef->gtGetOp1()))
+ {
+ GenTreeLclVarCommon* lcl = objRef->IsLocalAddrExpr();
+ if (lcl != nullptr)
+ {
+ lvaSetVarDoNotEnregister(lcl->gtLclNum DEBUGARG(DNER_LocalField));
+ }
+ }
#endif
/* Is this an instance data member? */
@@ -6735,8 +6684,10 @@ void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result)
printTreeID(fgMorphStmt);
printf(" in BB%02u:\n", compCurBB->bbNum);
gtDispTree(fgMorphStmt);
-
- // printf("startVars=%d.\n", startVars);
+ if (call->IsImplicitTailCall())
+ {
+ printf("Note: candidate is implicit tail call\n");
+ }
}
#endif
@@ -7865,6 +7816,9 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
// Either a call stmt or
// GT_RETURN(GT_CALL(..)) or GT_RETURN(GT_CAST(GT_CALL(..)))
// var = GT_CALL(..) or var = (GT_CAST(GT_CALL(..)))
+ // GT_COMMA(GT_CALL(..), GT_NOP) or GT_COMMA(GT_CAST(GT_CALL(..)), GT_NOP)
+ // In the above,
+ // GT_CASTS may be nested.
genTreeOps stmtOper = stmtExpr->gtOper;
if (stmtOper == GT_CALL)
{
@@ -7872,24 +7826,31 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
}
else
{
- noway_assert(stmtOper == GT_RETURN || stmtOper == GT_ASG);
+ noway_assert(stmtOper == GT_RETURN || stmtOper == GT_ASG || stmtOper == GT_COMMA);
GenTreePtr treeWithCall;
if (stmtOper == GT_RETURN)
{
treeWithCall = stmtExpr->gtGetOp1();
}
- else
+ else if (stmtOper == GT_COMMA)
{
- treeWithCall = stmtExpr->gtGetOp2();
+ // Second operation must be nop.
+ noway_assert(stmtExpr->gtGetOp2()->IsNothingNode());
+ treeWithCall = stmtExpr->gtGetOp1();
}
- if (treeWithCall->gtOper == GT_CAST)
+ else
{
- noway_assert(treeWithCall->gtGetOp1() == call && !treeWithCall->gtOverflow());
+ treeWithCall = stmtExpr->gtGetOp2();
}
- else
+
+ // Peel off casts
+ while (treeWithCall->gtOper == GT_CAST)
{
- noway_assert(treeWithCall == call);
+ noway_assert(!treeWithCall->gtOverflow());
+ treeWithCall = treeWithCall->gtGetOp1();
}
+
+ noway_assert(treeWithCall == call);
}
#endif
@@ -7909,10 +7870,11 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
// 2) tail.call, nop*, pop, nop*, ret
// 3) var=tail.call, nop*, ret(var)
// 4) var=tail.call, nop*, pop, ret
+ // 5) comma(tail.call, nop), nop*, ret
//
// See impIsTailCallILPattern() for details on tail call IL patterns
// that are supported.
- if ((stmtExpr->gtOper == GT_CALL) || (stmtExpr->gtOper == GT_ASG))
+ if (stmtExpr->gtOper != GT_RETURN)
{
// First delete all GT_NOPs after the call
GenTreeStmt* morphStmtToRemove = nullptr;
@@ -7940,7 +7902,16 @@ GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
GenTreeStmt* popStmt = nextMorphStmt;
nextMorphStmt = nextMorphStmt->gtNextStmt;
- noway_assert((popStmt->gtStmtExpr->gtFlags & GTF_ALL_EFFECT) == 0);
+ // Side effect flags on a GT_COMMA may be overly pessimistic, so examine
+ // the constituent nodes.
+ GenTreePtr popExpr = popStmt->gtStmtExpr;
+ bool isSideEffectFree = (popExpr->gtFlags & GTF_ALL_EFFECT) == 0;
+ if (!isSideEffectFree && (popExpr->OperGet() == GT_COMMA))
+ {
+ isSideEffectFree = ((popExpr->gtGetOp1()->gtFlags & GTF_ALL_EFFECT) == 0) &&
+ ((popExpr->gtGetOp2()->gtFlags & GTF_ALL_EFFECT) == 0);
+ }
+ noway_assert(isSideEffectFree);
fgRemoveStmt(compCurBB, popStmt);
}
@@ -9658,6 +9629,7 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
assert(dest->gtOper == GT_LCL_FLD);
blockWidth = genTypeSize(dest->TypeGet());
destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
+ destFldSeq = dest->AsLclFld()->gtFieldSeq;
}
}
else
@@ -9779,12 +9751,13 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
// Check to see if we are required to do a copy block because the struct contains holes
// and either the src or dest is externally visible
//
- bool requiresCopyBlock = false;
- bool srcSingleLclVarAsg = false;
+ bool requiresCopyBlock = false;
+ bool srcSingleLclVarAsg = false;
+ bool destSingleLclVarAsg = false;
- if ((destLclVar != nullptr) && (srcLclVar == destLclVar))
+ if ((destLclVar != nullptr) && (srcLclVar == destLclVar) && (destFldSeq == srcFldSeq))
{
- // Beyond perf reasons, it is not prudent to have a copy of a struct to itself.
+ // Self-assign; no effect.
GenTree* nop = gtNewNothingNode();
INDEBUG(nop->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
return nop;
@@ -9896,6 +9869,30 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
}
}
}
+ else
+ {
+ assert(srcDoFldAsg);
+ // Check for the symmetric case (which happens for the _pointer field of promoted spans):
+ //
+ // [000240] -----+------ /--* lclVar struct(P) V18 tmp9
+ // /--* byref V18._value (offs=0x00) -> V30 tmp21
+ // [000245] -A------R--- * = struct (copy)
+ // [000244] -----+------ \--* obj(8) struct
+ // [000243] -----+------ \--* addr byref
+ // [000242] D----+-N---- \--* lclVar byref V28 tmp19
+ //
+ if (blockWidthIsConst && (srcLclVar->lvFieldCnt == 1) && (destLclVar != nullptr) &&
+ (blockWidth == genTypeSize(destLclVar->TypeGet())))
+ {
+ // Check for type agreement
+ unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart;
+ var_types srcType = lvaTable[fieldLclNum].TypeGet();
+ if (destLclVar->TypeGet() == srcType)
+ {
+ destSingleLclVarAsg = true;
+ }
+ }
+ }
}
// If we require a copy block the set both of the field assign bools to false
@@ -9912,7 +9909,7 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
// when they are not reg-sized non-field-addressed structs and we are using a CopyBlock
// or the struct is not promoted
//
- if (!destDoFldAsg && (destLclVar != nullptr))
+ if (!destDoFldAsg && (destLclVar != nullptr) && !destSingleLclVarAsg)
{
if (!destLclVar->lvRegStruct)
{
@@ -10166,45 +10163,56 @@ GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
noway_assert(srcLclNum != BAD_VAR_NUM);
unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
- if (addrSpill)
+ if (destSingleLclVarAsg)
{
- assert(addrSpillTemp != BAD_VAR_NUM);
- dest = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
+ noway_assert(fieldCnt == 1);
+ noway_assert(destLclVar != nullptr);
+ noway_assert(addrSpill == nullptr);
+
+ dest = gtNewLclvNode(destLclNum, destLclVar->TypeGet());
}
else
{
- dest = gtCloneExpr(destAddr);
- noway_assert(dest != nullptr);
-
- // Is the address of a local?
- GenTreeLclVarCommon* lclVarTree = nullptr;
- bool isEntire = false;
- bool* pIsEntire = (blockWidthIsConst ? &isEntire : nullptr);
- if (dest->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire))
+ if (addrSpill)
+ {
+ assert(addrSpillTemp != BAD_VAR_NUM);
+ dest = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
+ }
+ else
{
- lclVarTree->gtFlags |= GTF_VAR_DEF;
- if (!isEntire)
+ dest = gtCloneExpr(destAddr);
+ noway_assert(dest != nullptr);
+
+ // Is the address of a local?
+ GenTreeLclVarCommon* lclVarTree = nullptr;
+ bool isEntire = false;
+ bool* pIsEntire = (blockWidthIsConst ? &isEntire : nullptr);
+ if (dest->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire))
{
- lclVarTree->gtFlags |= GTF_VAR_USEASG;
+ lclVarTree->gtFlags |= GTF_VAR_DEF;
+ if (!isEntire)
+ {
+ lclVarTree->gtFlags |= GTF_VAR_USEASG;
+ }
}
}
- }
- GenTreePtr fieldOffsetNode = gtNewIconNode(lvaTable[fieldLclNum].lvFldOffset, TYP_I_IMPL);
- // Have to set the field sequence -- which means we need the field handle.
- CORINFO_CLASS_HANDLE classHnd = lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle();
- CORINFO_FIELD_HANDLE fieldHnd =
- info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
- curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
- fieldOffsetNode->gtIntCon.gtFieldSeq = curFieldSeq;
+ GenTreePtr fieldOffsetNode = gtNewIconNode(lvaTable[fieldLclNum].lvFldOffset, TYP_I_IMPL);
+ // Have to set the field sequence -- which means we need the field handle.
+ CORINFO_CLASS_HANDLE classHnd = lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle();
+ CORINFO_FIELD_HANDLE fieldHnd =
+ info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
+ curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
+ fieldOffsetNode->gtIntCon.gtFieldSeq = curFieldSeq;
- dest = gtNewOperNode(GT_ADD, TYP_BYREF, dest, fieldOffsetNode);
+ dest = gtNewOperNode(GT_ADD, TYP_BYREF, dest, fieldOffsetNode);
- dest = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), dest);
+ dest = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), dest);
- // !!! The destination could be on stack. !!!
- // This flag will let us choose the correct write barrier.
- dest->gtFlags |= GTF_IND_TGTANYWHERE;
+ // !!! The destination could be on stack. !!!
+ // This flag will let us choose the correct write barrier.
+ dest->gtFlags |= GTF_IND_TGTANYWHERE;
+ }
}
if (srcDoFldAsg)
@@ -10849,7 +10857,6 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
op1->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op1->gtCast.CastOp());
op1->gtFlags &= ~GTF_ALL_EFFECT;
op1->gtFlags |= (op1->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
- op1->gtFlags |= GTF_DONT_CSE;
}
if (op2->gtCast.CastOp()->OperGet() != GT_NOP)
@@ -10857,9 +10864,11 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
op2->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op2->gtCast.CastOp());
op2->gtFlags &= ~GTF_ALL_EFFECT;
op2->gtFlags |= (op2->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
- op2->gtFlags |= GTF_DONT_CSE;
}
+ op1->gtFlags |= GTF_DONT_CSE;
+ op2->gtFlags |= GTF_DONT_CSE;
+
tree->gtFlags &= ~GTF_ALL_EFFECT;
tree->gtFlags |= ((op1->gtFlags | op2->gtFlags) & GTF_ALL_EFFECT);
@@ -11178,11 +11187,13 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
GenTreePtr pGetType;
#ifdef LEGACY_BACKEND
- bool bOp1ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op1);
- bool bOp2ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op2);
+ bool bOp1ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op1->AsCall());
+ bool bOp2ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op2->AsCall());
#else
- bool bOp1ClassFromHandle = op1->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op1) : false;
- bool bOp2ClassFromHandle = op2->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op2) : false;
+ bool bOp1ClassFromHandle =
+ op1->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op1->AsCall()) : false;
+ bool bOp2ClassFromHandle =
+ op2->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op2->AsCall()) : false;
#endif
// Optimize typeof(...) == typeof(...)
@@ -12244,6 +12255,23 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
}
}
}
+ else // we have an unsigned comparison
+ {
+ if (op2->IsIntegralConst(0))
+ {
+ if ((oper == GT_GT) || (oper == GT_LE))
+ {
+ // IL doesn't have a cne instruction so compilers use cgt.un instead. The JIT
+ // recognizes certain patterns that involve GT_NE (e.g (x & 4) != 0) and fails
+ // if GT_GT is used instead. Transform (x GT_GT.unsigned 0) into (x GT_NE 0)
+ // and (x GT_LE.unsigned 0) into (x GT_EQ 0). The later case is rare, it sometimes
+ // occurs as a result of branch inversion.
+ oper = (oper == GT_LE) ? GT_EQ : GT_NE;
+ tree->SetOper(oper, GenTree::PRESERVE_VN);
+ tree->gtFlags &= ~GTF_UNSIGNED;
+ }
+ }
+ }
COMPARE:
@@ -14157,13 +14185,13 @@ GenTreePtr Compiler::fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree)
//
// OR ROL
// / \ / \
- // LSH RSZ -> x y
+ // LSH RSZ -> x y
// / \ / \
- // x AND x AND
+ // x AND x AND
// / \ / \
- // y 31 ADD 31
+ // y 31 ADD 31
// / \
- // NEG 32
+ // NEG 32
// |
// y
// The patterns recognized:
@@ -14534,7 +14562,10 @@ GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree)
tree = fgMorphIntoHelperCall(tree, helper, args);
if (fgPtrArgCntMax < fgPtrArgCntCur)
+ {
+ JITDUMP("Upping fgPtrArgCntMax from %d to %d\n", fgPtrArgCntMax, fgPtrArgCntCur);
fgPtrArgCntMax = fgPtrArgCntCur;
+ }
fgPtrArgCntCur -= argc;
return tree;
@@ -15090,13 +15121,13 @@ bool Compiler::fgFoldConditional(BasicBlock* block)
// else if bTaken has valid profile weight and block does not we try to adjust block's weight
// We can only adjust the block weights when (the edge block -> bTaken) is the only edge into bTaken
//
- if (block->bbFlags & BBF_PROF_WEIGHT)
+ if (block->hasProfileWeight())
{
// The edge weights for (block -> bTaken) are 100% of block's weight
edgeTaken->flEdgeWeightMin = block->bbWeight;
edgeTaken->flEdgeWeightMax = block->bbWeight;
- if ((bTaken->bbFlags & BBF_PROF_WEIGHT) == 0)
+ if (!bTaken->hasProfileWeight())
{
if ((bTaken->countOfInEdges() == 1) || (bTaken->bbWeight < block->bbWeight))
{
@@ -15106,7 +15137,7 @@ bool Compiler::fgFoldConditional(BasicBlock* block)
}
}
}
- else if (bTaken->bbFlags & BBF_PROF_WEIGHT)
+ else if (bTaken->hasProfileWeight())
{
if (bTaken->countOfInEdges() == 1)
{
@@ -16171,7 +16202,9 @@ void Compiler::fgSetOptions()
// to use a frame pointer because of EH. But until all the code uses
// the same test, leave info.compXcptnsCount here.
if (info.compXcptnsCount > 0)
+ {
codeGen->setFramePointerRequiredEH(true);
+ }
#else // !_TARGET_X86_
@@ -16182,6 +16215,15 @@ void Compiler::fgSetOptions()
#endif // _TARGET_X86_
+#ifdef UNIX_X86_ABI
+ if (info.compXcptnsCount > 0)
+ {
+ assert(!codeGen->isGCTypeFixed());
+ // Enforce fully interruptible codegen for funclet unwinding
+ genInterruptible = true;
+ }
+#endif // UNIX_X86_ABI
+
fgCheckArgCnt();
if (info.compCallUnmanaged)
@@ -16250,6 +16292,15 @@ GenTreePtr Compiler::fgInitThisClass()
CORINFO_RESOLVED_TOKEN resolvedToken;
memset(&resolvedToken, 0, sizeof(resolvedToken));
+ // We are in a shared method body, but maybe we don't need a runtime lookup after all.
+ // This covers the case of a generic method on a non-generic type.
+ if (!(info.compClassAttr & CORINFO_FLG_SHAREDINST))
+ {
+ resolvedToken.hClass = info.compClassHnd;
+ return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF);
+ }
+
+ // We need a runtime lookup.
GenTreePtr ctxTree = getRuntimeContextTree(kind.runtimeLookupKind);
// CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE with a zeroed out resolvedToken means "get the static
@@ -16263,7 +16314,7 @@ GenTreePtr Compiler::fgInitThisClass()
// Collectible types requires that for shared generic code, if we use the generic context paramter
// that we report it. (This is a conservative approach, we could detect some cases particularly when the
// context parameter is this that we don't need the eager reporting logic.)
- lvaGenericsContextUsed = true;
+ lvaGenericsContextUseCount++;
switch (kind.runtimeLookupKind)
{
@@ -16952,6 +17003,10 @@ void Compiler::fgMorph()
EndPhase(PHASE_EMPTY_FINALLY);
+ fgMergeFinallyChains();
+
+ EndPhase(PHASE_MERGE_FINALLY_CHAINS);
+
fgCloneFinally();
EndPhase(PHASE_CLONE_FINALLY);
@@ -17072,13 +17127,11 @@ void Compiler::fgPromoteStructs()
#endif // DEBUG
// The lvaTable might grow as we grab temps. Make a local copy here.
-
unsigned startLvaCount = lvaCount;
//
// Loop through the original lvaTable. Looking for struct locals to be promoted.
//
-
lvaStructPromotionInfo structPromotionInfo;
bool tooManyLocals = false;
@@ -17088,13 +17141,14 @@ void Compiler::fgPromoteStructs()
bool promotedVar = false;
LclVarDsc* varDsc = &lvaTable[lclNum];
+ // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote
+ // its fields. Instead, we will attempt to enregister the entire struct.
if (varDsc->lvIsSIMDType() && varDsc->lvIsUsedInSIMDIntrinsic())
{
- // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote
- // its fields. Instead, we will attempt to enregister the entire struct.
varDsc->lvRegStruct = true;
}
- else if (lvaHaveManyLocals()) // Don't promote if we have reached the tracking limit.
+ // Don't promote if we have reached the tracking limit.
+ else if (lvaHaveManyLocals())
{
// Print the message first time when we detected this condition
if (!tooManyLocals)
@@ -17103,159 +17157,56 @@ void Compiler::fgPromoteStructs()
}
tooManyLocals = true;
}
-#if !FEATURE_MULTIREG_STRUCT_PROMOTE
- else if (varDsc->lvIsMultiRegArg)
- {
- JITDUMP("Skipping V%02u: marked lvIsMultiRegArg.\n", lclNum);
- }
-#endif // !FEATURE_MULTIREG_STRUCT_PROMOTE
- else if (varDsc->lvIsMultiRegRet)
- {
- JITDUMP("Skipping V%02u: marked lvIsMultiRegRet.\n", lclNum);
- }
else if (varTypeIsStruct(varDsc))
{
- lvaCanPromoteStructVar(lclNum, &structPromotionInfo);
- bool canPromote = structPromotionInfo.canPromote;
-
- // We start off with shouldPromote same as canPromote.
- // Based on further profitablity checks done below, shouldPromote
- // could be set to false.
- bool shouldPromote = canPromote;
-
- if (canPromote)
- {
- // We *can* promote; *should* we promote?
- // We should only do so if promotion has potential savings. One source of savings
- // is if a field of the struct is accessed, since this access will be turned into
- // an access of the corresponding promoted field variable. Even if there are no
- // field accesses, but only block-level operations on the whole struct, if the struct
- // has only one or two fields, then doing those block operations field-wise is probably faster
- // than doing a whole-variable block operation (e.g., a hardware "copy loop" on x86).
- // Struct promotion also provides the following benefits: reduce stack frame size,
- // reduce the need for zero init of stack frame and fine grained constant/copy prop.
- // Asm diffs indicate that promoting structs up to 3 fields is a net size win.
- // So if no fields are accessed independently, and there are four or more fields,
- // then do not promote.
- //
- // TODO: Ideally we would want to consider the impact of whether the struct is
- // passed as a parameter or assigned the return value of a call. Because once promoted,
- // struct copying is done by field by field assignment instead of a more efficient
- // rep.stos or xmm reg based copy.
- if (structPromotionInfo.fieldCnt > 3 && !varDsc->lvFieldAccessed)
- {
- JITDUMP("Not promoting promotable struct local V%02u: #fields = %d, fieldAccessed = %d.\n", lclNum,
- structPromotionInfo.fieldCnt, varDsc->lvFieldAccessed);
- shouldPromote = false;
- }
-#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
- // TODO-PERF - Only do this when the LclVar is used in an argument context
- // TODO-ARM64 - HFA support should also eliminate the need for this.
- // TODO-LSRA - Currently doesn't support the passing of floating point LCL_VARS in the integer registers
- //
- // For now we currently don't promote structs with a single float field
- // Promoting it can cause us to shuffle it back and forth between the int and
- // the float regs when it is used as a argument, which is very expensive for XARCH
- //
- else if ((structPromotionInfo.fieldCnt == 1) &&
- varTypeIsFloating(structPromotionInfo.fields[0].fldType))
- {
- JITDUMP("Not promoting promotable struct local V%02u: #fields = %d because it is a struct with "
- "single float field.\n",
- lclNum, structPromotionInfo.fieldCnt);
- shouldPromote = false;
- }
-#endif // _TARGET_AMD64_ || _TARGET_ARM64_
+ bool shouldPromote;
-#if !FEATURE_MULTIREG_STRUCT_PROMOTE
-#if defined(_TARGET_ARM64_)
- //
- // For now we currently don't promote structs that are passed in registers
- //
- else if (lvaIsMultiregStruct(varDsc))
- {
- JITDUMP("Not promoting promotable multireg struct local V%02u (size==%d): ", lclNum,
- lvaLclExactSize(lclNum));
- shouldPromote = false;
- }
-#endif // _TARGET_ARM64_
-#endif // !FEATURE_MULTIREG_STRUCT_PROMOTE
- else if (varDsc->lvIsParam)
- {
-#if FEATURE_MULTIREG_STRUCT_PROMOTE
- if (lvaIsMultiregStruct(
- varDsc) && // Is this a variable holding a value that is passed in multiple registers?
- (structPromotionInfo.fieldCnt != 2)) // Does it have exactly two fields
- {
- JITDUMP(
- "Not promoting multireg struct local V%02u, because lvIsParam is true and #fields != 2\n",
- lclNum);
- shouldPromote = false;
- }
- else
-#endif // !FEATURE_MULTIREG_STRUCT_PROMOTE
-
- // TODO-PERF - Implement struct promotion for incoming multireg structs
- // Currently it hits assert(lvFieldCnt==1) in lclvar.cpp line 4417
-
- if (structPromotionInfo.fieldCnt != 1)
- {
- JITDUMP("Not promoting promotable struct local V%02u, because lvIsParam is true and #fields = "
- "%d.\n",
- lclNum, structPromotionInfo.fieldCnt);
- shouldPromote = false;
- }
- }
-
- //
- // If the lvRefCnt is zero and we have a struct promoted parameter we can end up with an extra store of
- // the the incoming register into the stack frame slot.
- // In that case, we would like to avoid promortion.
- // However we haven't yet computed the lvRefCnt values so we can't do that.
- //
- CLANG_FORMAT_COMMENT_ANCHOR;
+ lvaCanPromoteStructVar(lclNum, &structPromotionInfo);
+ if (structPromotionInfo.canPromote)
+ {
+ shouldPromote = lvaShouldPromoteStructVar(lclNum, &structPromotionInfo);
+ }
+ else
+ {
+ shouldPromote = false;
+ }
#if 0
- // Often-useful debugging code: if you've narrowed down a struct-promotion problem to a single
- // method, this allows you to select a subset of the vars to promote (by 1-based ordinal number).
- static int structPromoVarNum = 0;
- structPromoVarNum++;
- if (atoi(getenv("structpromovarnumlo")) <= structPromoVarNum && structPromoVarNum <= atoi(getenv("structpromovarnumhi")))
+ // Often-useful debugging code: if you've narrowed down a struct-promotion problem to a single
+ // method, this allows you to select a subset of the vars to promote (by 1-based ordinal number).
+ static int structPromoVarNum = 0;
+ structPromoVarNum++;
+ if (atoi(getenv("structpromovarnumlo")) <= structPromoVarNum && structPromoVarNum <= atoi(getenv("structpromovarnumhi")))
#endif // 0
- if (shouldPromote)
- {
- assert(canPromote);
-
- // Promote the this struct local var.
- lvaPromoteStructVar(lclNum, &structPromotionInfo);
- promotedVar = true;
+ if (shouldPromote)
+ {
+ // Promote the this struct local var.
+ lvaPromoteStructVar(lclNum, &structPromotionInfo);
+ promotedVar = true;
#ifdef _TARGET_ARM_
- if (structPromotionInfo.requiresScratchVar)
+ if (structPromotionInfo.requiresScratchVar)
+ {
+ // Ensure that the scratch variable is allocated, in case we
+ // pass a promoted struct as an argument.
+ if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM)
{
- // Ensure that the scratch variable is allocated, in case we
- // pass a promoted struct as an argument.
- if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM)
- {
- lvaPromotedStructAssemblyScratchVar =
- lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var."));
- lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL;
- }
+ lvaPromotedStructAssemblyScratchVar =
+ lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var."));
+ lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL;
}
-#endif // _TARGET_ARM_
}
+#endif // _TARGET_ARM_
}
}
-#ifdef FEATURE_SIMD
- if (!promotedVar && varDsc->lvSIMDType && !varDsc->lvFieldAccessed)
+ if (!promotedVar && varDsc->lvIsSIMDType() && !varDsc->lvFieldAccessed)
{
// Even if we have not used this in a SIMD intrinsic, if it is not being promoted,
// we will treat it as a reg struct.
varDsc->lvRegStruct = true;
}
-#endif // FEATURE_SIMD
}
#ifdef DEBUG
@@ -17298,10 +17249,30 @@ Compiler::fgWalkResult Compiler::fgMorphStructField(GenTreePtr tree, fgWalkData*
tree->gtFlags &= ~GTF_GLOB_REF;
GenTreePtr parent = fgWalkPre->parentStack->Index(1);
- if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
+ if (parent->gtOper == GT_ASG)
{
- tree->gtFlags |= GTF_VAR_DEF;
- tree->gtFlags |= GTF_DONT_CSE;
+ if (parent->gtOp.gtOp1 == tree)
+ {
+ tree->gtFlags |= GTF_VAR_DEF;
+ tree->gtFlags |= GTF_DONT_CSE;
+ }
+
+ // Promotion of struct containing struct fields where the field
+ // is a struct with a single pointer sized scalar type field: in
+ // this case struct promotion uses the type of the underlying
+ // scalar field as the type of struct field instead of recursively
+ // promoting. This can lead to a case where we have a block-asgn
+ // with its RHS replaced with a scalar type. Mark RHS value as
+ // DONT_CSE so that assertion prop will not do const propagation.
+ // The reason this is required is that if RHS of a block-asg is a
+ // constant, then it is interpreted as init-block incorrectly.
+ //
+ // TODO - This can also be avoided if we implement recursive struct
+ // promotion.
+ if (varTypeIsStruct(parent) && parent->gtOp.gtOp2 == tree && !varTypeIsStruct(tree))
+ {
+ tree->gtFlags |= GTF_DONT_CSE;
+ }
}
#ifdef DEBUG
if (verbose)
diff --git a/src/jit/optcse.cpp b/src/jit/optcse.cpp
index 5ee6d84920..41aad403d9 100644
--- a/src/jit/optcse.cpp
+++ b/src/jit/optcse.cpp
@@ -321,8 +321,8 @@ Compiler::fgWalkResult Compiler::optCSE_MaskHelper(GenTreePtr* pTree, fgWalkData
//
void Compiler::optCSE_GetMaskData(GenTreePtr tree, optCSE_MaskData* pMaskData)
{
- pMaskData->CSE_defMask = BitVecOps::MakeCopy(cseTraits, cseEmpty);
- pMaskData->CSE_useMask = BitVecOps::MakeCopy(cseTraits, cseEmpty);
+ pMaskData->CSE_defMask = BitVecOps::MakeEmpty(cseTraits);
+ pMaskData->CSE_useMask = BitVecOps::MakeEmpty(cseTraits);
fgWalkTreePre(&tree, optCSE_MaskHelper, (void*)pMaskData);
}
@@ -498,10 +498,7 @@ void Compiler::optValnumCSE_Init()
// Init traits and full/empty bitvectors. This will be used to track the
// individual cse indexes.
cseTraits = new (getAllocator()) BitVecTraits(EXPSET_SZ, this);
- cseFull = BitVecOps::UninitVal();
- cseEmpty = BitVecOps::UninitVal();
- BitVecOps::AssignNoCopy(cseTraits, cseFull, BitVecOps::MakeFull(cseTraits));
- BitVecOps::AssignNoCopy(cseTraits, cseEmpty, BitVecOps::MakeEmpty(cseTraits));
+ cseFull = BitVecOps::MakeFull(cseTraits);
/* Allocate and clear the hash bucket table */
@@ -509,6 +506,9 @@ void Compiler::optValnumCSE_Init()
optCSECandidateCount = 0;
optDoCSE = false; // Stays false until we find duplicate CSE tree
+
+ // optCseArrLenMap is unused in most functions, allocated only when used
+ optCseArrLenMap = nullptr;
}
/*****************************************************************************
@@ -700,8 +700,17 @@ unsigned Compiler::optValnumCSE_Locate()
noway_assert(stmt->gtOper == GT_STMT);
/* We walk the tree in the forwards direction (bottom up) */
+ bool stmtHasArrLenCandidate = false;
for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
{
+ if (tree->OperIsCompare() && stmtHasArrLenCandidate)
+ {
+ // Check if this compare is a function of (one of) the arrary
+ // length candidate(s); we may want to update its value number
+ // if the array length gets CSEd
+ optCseUpdateArrLenMap(tree);
+ }
+
if (!optIsCSEcandidate(tree))
{
continue;
@@ -730,6 +739,11 @@ unsigned Compiler::optValnumCSE_Locate()
{
noway_assert(((unsigned)tree->gtCSEnum) == CSEindex);
}
+
+ if (IS_CSE_INDEX(CSEindex) && (tree->OperGet() == GT_ARR_LENGTH))
+ {
+ stmtHasArrLenCandidate = true;
+ }
}
}
}
@@ -748,6 +762,102 @@ unsigned Compiler::optValnumCSE_Locate()
return 1;
}
+//------------------------------------------------------------------------
+// optCseUpdateArrLenMap: Check if this compare is a tractable function of
+// an array length that is a CSE candidate, and insert
+// an entry in the optCseArrLenMap if so. This facilitates
+// subsequently updating the compare's value number if
+// the array length gets CSEd.
+//
+// Arguments:
+// compare - The compare node to check
+
+void Compiler::optCseUpdateArrLenMap(GenTreePtr compare)
+{
+ assert(compare->OperIsCompare());
+
+ ValueNum compareVN = compare->gtVNPair.GetConservative();
+ VNFuncApp cmpVNFuncApp;
+
+ if (!vnStore->GetVNFunc(compareVN, &cmpVNFuncApp) ||
+ (cmpVNFuncApp.m_func != GetVNFuncForOper(compare->OperGet(), compare->IsUnsigned())))
+ {
+ // Value numbering inferred this compare as something other
+ // than its own operator; leave its value number alone.
+ return;
+ }
+
+ // Now look for an array length feeding the compare
+ ValueNumStore::ArrLenArithBoundInfo info;
+ GenTreePtr arrLenParent = nullptr;
+
+ if (vnStore->IsVNArrLenBound(compareVN))
+ {
+ // Simple compare of an array legnth against something else.
+
+ vnStore->GetArrLenBoundInfo(compareVN, &info);
+ arrLenParent = compare;
+ }
+ else if (vnStore->IsVNArrLenArithBound(compareVN))
+ {
+ // Compare of an array length +/- some offset to something else.
+
+ GenTreePtr op1 = compare->gtGetOp1();
+ GenTreePtr op2 = compare->gtGetOp2();
+
+ vnStore->GetArrLenArithBoundInfo(compareVN, &info);
+ if (GetVNFuncForOper(op1->OperGet(), op1->IsUnsigned()) == (VNFunc)info.arrOper)
+ {
+ // The arithmetic node is the array length's parent.
+ arrLenParent = op1;
+ }
+ else if (GetVNFuncForOper(op2->OperGet(), op2->IsUnsigned()) == (VNFunc)info.arrOper)
+ {
+ // The arithmetic node is the array length's parent.
+ arrLenParent = op2;
+ }
+ }
+
+ if (arrLenParent != nullptr)
+ {
+ GenTreePtr arrLen = nullptr;
+
+ // Find which child of arrLenParent is the array length. Abort if its
+ // conservative value number doesn't match the one from the compare VN.
+
+ GenTreePtr child1 = arrLenParent->gtGetOp1();
+ if ((child1->OperGet() == GT_ARR_LENGTH) && IS_CSE_INDEX(child1->gtCSEnum) &&
+ (info.vnArray == child1->AsArrLen()->ArrRef()->gtVNPair.GetConservative()))
+ {
+ arrLen = child1;
+ }
+ else
+ {
+ GenTreePtr child2 = arrLenParent->gtGetOp2();
+ if ((child2->OperGet() == GT_ARR_LENGTH) && IS_CSE_INDEX(child2->gtCSEnum) &&
+ (info.vnArray == child2->AsArrLen()->ArrRef()->gtVNPair.GetConservative()))
+ {
+ arrLen = child2;
+ }
+ }
+
+ if (arrLen != nullptr)
+ {
+ // Found an arrayLen feeding a compare that is a tracatable function of it;
+ // record this in the map so we can update the compare VN if the array length
+ // node gets CSEd.
+
+ if (optCseArrLenMap == nullptr)
+ {
+ // Allocate map on first use.
+ optCseArrLenMap = new (getAllocator()) NodeToNodeMap(getAllocator());
+ }
+
+ optCseArrLenMap->Set(arrLen, compare);
+ }
+ }
+}
+
/*****************************************************************************
*
* Compute each blocks bbCseGen
@@ -782,7 +892,7 @@ void Compiler::optValnumCSE_InitDataFlow()
if (init_to_zero)
{
/* Initialize to {ZERO} prior to dataflow */
- block->bbCseIn = BitVecOps::MakeCopy(cseTraits, cseEmpty);
+ block->bbCseIn = BitVecOps::MakeEmpty(cseTraits);
}
else
{
@@ -793,7 +903,7 @@ void Compiler::optValnumCSE_InitDataFlow()
block->bbCseOut = BitVecOps::MakeCopy(cseTraits, cseFull);
/* Initialize to {ZERO} prior to locating the CSE candidates */
- block->bbCseGen = BitVecOps::MakeCopy(cseTraits, cseEmpty);
+ block->bbCseGen = BitVecOps::MakeEmpty(cseTraits);
}
// We walk the set of CSE candidates and set the bit corresponsing to the CSEindex
@@ -847,42 +957,31 @@ void Compiler::optValnumCSE_InitDataFlow()
*/
class CSE_DataFlow
{
-private:
- EXPSET_TP m_preMergeOut;
-
- Compiler* m_pCompiler;
+ BitVecTraits* m_pBitVecTraits;
+ EXPSET_TP m_preMergeOut;
public:
- CSE_DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler)
+ CSE_DataFlow(Compiler* pCompiler) : m_pBitVecTraits(pCompiler->cseTraits), m_preMergeOut(BitVecOps::UninitVal())
{
}
- Compiler* getCompiler()
- {
- return m_pCompiler;
- }
-
// At the start of the merge function of the dataflow equations, initialize premerge state (to detect changes.)
void StartMerge(BasicBlock* block)
{
- m_preMergeOut = BitVecOps::MakeCopy(m_pCompiler->cseTraits, block->bbCseOut);
+ BitVecOps::Assign(m_pBitVecTraits, m_preMergeOut, block->bbCseOut);
}
// During merge, perform the actual merging of the predecessor's (since this is a forward analysis) dataflow flags.
void Merge(BasicBlock* block, BasicBlock* predBlock, flowList* preds)
{
- BitVecOps::IntersectionD(m_pCompiler->cseTraits, block->bbCseIn, predBlock->bbCseOut);
+ BitVecOps::IntersectionD(m_pBitVecTraits, block->bbCseIn, predBlock->bbCseOut);
}
// At the end of the merge store results of the dataflow equations, in a postmerge state.
bool EndMerge(BasicBlock* block)
{
- BitVecTraits* traits = m_pCompiler->cseTraits;
- EXPSET_TP mergeOut = BitVecOps::MakeCopy(traits, block->bbCseIn);
- BitVecOps::UnionD(traits, mergeOut, block->bbCseGen);
- BitVecOps::IntersectionD(traits, mergeOut, block->bbCseOut);
- BitVecOps::Assign(traits, block->bbCseOut, mergeOut);
- return (!BitVecOps::Equal(traits, mergeOut, m_preMergeOut));
+ BitVecOps::DataFlowD(m_pBitVecTraits, block->bbCseOut, block->bbCseGen, block->bbCseIn);
+ return !BitVecOps::Equal(m_pBitVecTraits, block->bbCseOut, m_preMergeOut);
}
};
@@ -948,6 +1047,8 @@ void Compiler::optValnumCSE_Availablity()
printf("Labeling the CSEs with Use/Def information\n");
}
#endif
+ EXPSET_TP available_cses = BitVecOps::MakeEmpty(cseTraits);
+
for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
{
GenTreePtr stmt;
@@ -957,7 +1058,7 @@ void Compiler::optValnumCSE_Availablity()
compCurBB = block;
- EXPSET_TP available_cses = BitVecOps::MakeCopy(cseTraits, block->bbCseIn);
+ BitVecOps::Assign(cseTraits, available_cses, block->bbCseIn);
optCSEweight = block->getBBWeight(this);
@@ -1103,6 +1204,18 @@ public:
continue;
}
+#if FEATURE_FIXED_OUT_ARGS
+ // Skip the OutgoingArgArea in computing frame size, since
+ // its size is not yet known and it doesn't affect local
+ // offsets from the frame pointer (though it may affect
+ // them from the stack pointer).
+ noway_assert(m_pCompiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
+ if (lclNum == m_pCompiler->lvaOutgoingArgSpaceVar)
+ {
+ continue;
+ }
+#endif // FEATURE_FIXED_OUT_ARGS
+
bool onStack = (regAvailEstimate == 0); // true when it is likely that this LclVar will have a stack home
// Some LclVars always have stack homes
@@ -1909,6 +2022,39 @@ public:
// use to fetch the same value with no reload, so we can safely propagate that
// conservative VN to this use. This can help range check elimination later on.
cse->gtVNPair.SetConservative(defConservativeVN);
+
+ GenTreePtr cmp;
+ if ((exp->OperGet() == GT_ARR_LENGTH) && (m_pCompiler->optCseArrLenMap != nullptr) &&
+ (m_pCompiler->optCseArrLenMap->Lookup(exp, &cmp)))
+ {
+ // Propagate the new value number to this compare node as well, since
+ // subsequent range check elimination will try to correlate it with
+ // the other appearances that are getting CSEd.
+
+ ValueNumStore* vnStore = m_pCompiler->vnStore;
+ ValueNum oldCmpVN = cmp->gtVNPair.GetConservative();
+ ValueNumStore::ArrLenArithBoundInfo info;
+ ValueNum newCmpArgVN;
+ if (vnStore->IsVNArrLenBound(oldCmpVN))
+ {
+ // Comparison is against the array length directly.
+
+ newCmpArgVN = defConservativeVN;
+ vnStore->GetArrLenBoundInfo(oldCmpVN, &info);
+ }
+ else
+ {
+ // Comparison is against the array length +/- some offset.
+
+ assert(vnStore->IsVNArrLenArithBound(oldCmpVN));
+ vnStore->GetArrLenArithBoundInfo(oldCmpVN, &info);
+ newCmpArgVN = vnStore->VNForFunc(vnStore->TypeOfVN(info.arrOp), (VNFunc)info.arrOper,
+ info.arrOp, defConservativeVN);
+ }
+ ValueNum newCmpVN = vnStore->VNForFunc(vnStore->TypeOfVN(oldCmpVN), (VNFunc)info.cmpOper,
+ info.cmpOp, newCmpArgVN);
+ cmp->gtVNPair.SetConservative(newCmpVN);
+ }
}
#ifdef DEBUG
cse->gtDebugFlags |= GTF_DEBUG_VAR_CSE_REF;
diff --git a/src/jit/optimizer.cpp b/src/jit/optimizer.cpp
index 92edf62890..c18ebc55d0 100644
--- a/src/jit/optimizer.cpp
+++ b/src/jit/optimizer.cpp
@@ -227,7 +227,7 @@ void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool ex
unsigned weight;
- if ((curBlk->bbFlags & BBF_PROF_WEIGHT) != 0)
+ if (curBlk->hasProfileWeight())
{
// We have real profile weights, so we aren't going to change this blocks weight
weight = curBlk->bbWeight;
@@ -370,7 +370,7 @@ void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk)
// Don't unmark blocks that are set to BB_MAX_WEIGHT
// Don't unmark blocks when we are using profile weights
//
- if (!curBlk->isMaxBBWeight() && ((curBlk->bbFlags & BBF_PROF_WEIGHT) == 0))
+ if (!curBlk->isMaxBBWeight() && !curBlk->hasProfileWeight())
{
if (!fgDominate(curBlk, endBlk))
{
@@ -3527,8 +3527,7 @@ void Compiler::fgOptWhileLoop(BasicBlock* block)
{
// Only rely upon the profile weight when all three of these blocks
// have good profile weights
- if ((block->bbFlags & BBF_PROF_WEIGHT) && (bTest->bbFlags & BBF_PROF_WEIGHT) &&
- (block->bbNext->bbFlags & BBF_PROF_WEIGHT))
+ if (block->hasProfileWeight() && bTest->hasProfileWeight() && block->bbNext->hasProfileWeight())
{
allProfileWeightsAreValid = true;
@@ -4836,18 +4835,16 @@ void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight)
* Determine the kind of interference for the call.
*/
-/* static */ inline Compiler::callInterf Compiler::optCallInterf(GenTreePtr call)
+/* static */ inline Compiler::callInterf Compiler::optCallInterf(GenTreeCall* call)
{
- assert(call->gtOper == GT_CALL);
-
// if not a helper, kills everything
- if (call->gtCall.gtCallType != CT_HELPER)
+ if (call->gtCallType != CT_HELPER)
{
return CALLINT_ALL;
}
// setfield and array address store kill all indirections
- switch (eeGetHelperNum(call->gtCall.gtCallMethHnd))
+ switch (eeGetHelperNum(call->gtCallMethHnd))
{
case CORINFO_HELP_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
case CORINFO_HELP_CHECKED_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
@@ -5311,7 +5308,7 @@ Compiler::fgWalkResult Compiler::optIsVarAssgCB(GenTreePtr* pTree, fgWalkData* d
isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
assert(desc && desc->ivaSelf == desc);
- desc->ivaMaskCall = optCallInterf(tree);
+ desc->ivaMaskCall = optCallInterf(tree->AsCall());
}
return WALK_CONTINUE;
@@ -6555,9 +6552,8 @@ void Compiler::fgCreateLoopPreHeader(unsigned lnum)
}
else
{
- bool allValidProfileWeights = ((head->bbFlags & BBF_PROF_WEIGHT) != 0) &&
- ((head->bbJumpDest->bbFlags & BBF_PROF_WEIGHT) != 0) &&
- ((head->bbNext->bbFlags & BBF_PROF_WEIGHT) != 0);
+ bool allValidProfileWeights =
+ (head->hasProfileWeight() && head->bbJumpDest->hasProfileWeight() && head->bbNext->hasProfileWeight());
if (allValidProfileWeights)
{
diff --git a/src/jit/protojit/CMakeLists.txt b/src/jit/protojit/CMakeLists.txt
index 91c69e9a83..d27f30281a 100644
--- a/src/jit/protojit/CMakeLists.txt
+++ b/src/jit/protojit/CMakeLists.txt
@@ -13,6 +13,7 @@ endif(WIN32)
add_library_clr(protojit
SHARED
${SHARED_LIB_SOURCES}
+ ${JIT_ARCH_SOURCES}
)
add_dependencies(protojit jit_exports)
diff --git a/src/jit/protononjit/.gitmirror b/src/jit/protononjit/.gitmirror
new file mode 100644
index 0000000000..f507630f94
--- /dev/null
+++ b/src/jit/protononjit/.gitmirror
@@ -0,0 +1 @@
+Only contents of this folder, excluding subfolders, will be mirrored by the Git-TFS Mirror. \ No newline at end of file
diff --git a/src/jit/protononjit/CMakeLists.txt b/src/jit/protononjit/CMakeLists.txt
new file mode 100644
index 0000000000..e209e4cd36
--- /dev/null
+++ b/src/jit/protononjit/CMakeLists.txt
@@ -0,0 +1,84 @@
+project(protononjit)
+
+add_definitions(-DALT_JIT)
+add_definitions(-DFEATURE_NO_HOST)
+add_definitions(-DSELF_NO_HOST)
+add_definitions(-DFEATURE_READYTORUN_COMPILER)
+remove_definitions(-DFEATURE_MERGE_JIT_AND_ENGINE)
+
+remove_definitions(-DFEATURE_SIMD)
+remove_definitions(-DFEATURE_AVX_SUPPORT)
+
+if (CLR_CMAKE_PLATFORM_ARCH_I386)
+ remove_definitions(-D_TARGET_X86_=1)
+ add_definitions(-D_TARGET_ARM_)
+ set(JIT_ARCH_ALTJIT_SOURCES ${JIT_ARM_SOURCES})
+elseif(CLR_CMAKE_PLATFORM_ARCH_AMD64)
+ remove_definitions(-D_TARGET_AMD64_=1)
+ add_definitions(-D_TARGET_ARM64_)
+ set(JIT_ARCH_ALTJIT_SOURCES ${JIT_ARM64_SOURCES})
+else()
+ clr_unknown_arch()
+endif()
+
+if (NOT WIN32)
+ if (CLR_CMAKE_PLATFORM_ARCH_I386)
+ remove_definitions(-DUNIX_X86_ABI)
+ elseif(CLR_CMAKE_PLATFORM_ARCH_AMD64)
+ remove_definitions(-DUNIX_AMD64_ABI)
+ remove_definitions(-DFEATURE_UNIX_AMD64_STRUCT_PASSING)
+ else()
+ clr_unknown_arch()
+ endif()
+endif(NOT WIN32)
+
+if(WIN32)
+ add_definitions(-DFX_VER_INTERNALNAME_STR=protononjit.dll)
+endif(WIN32)
+
+add_library_clr(protononjit
+ SHARED
+ ${SHARED_LIB_SOURCES}
+ ${JIT_ARCH_ALTJIT_SOURCES}
+)
+
+add_dependencies(protononjit jit_exports)
+
+set_property(TARGET protononjit APPEND_STRING PROPERTY LINK_FLAGS ${JIT_EXPORTS_LINKER_OPTION})
+set_property(TARGET protononjit APPEND_STRING PROPERTY LINK_DEPENDS ${JIT_EXPORTS_FILE})
+
+set(RYUJIT_LINK_LIBRARIES
+ utilcodestaticnohost
+ gcinfo
+)
+
+if(CLR_CMAKE_PLATFORM_UNIX)
+ list(APPEND RYUJIT_LINK_LIBRARIES
+ mscorrc_debug
+ coreclrpal
+ palrt
+ )
+else()
+ list(APPEND RYUJIT_LINK_LIBRARIES
+ ${STATIC_MT_CRT_LIB}
+ ${STATIC_MT_VCRT_LIB}
+ kernel32.lib
+ advapi32.lib
+ ole32.lib
+ oleaut32.lib
+ uuid.lib
+ user32.lib
+ version.lib
+ shlwapi.lib
+ bcrypt.lib
+ crypt32.lib
+ RuntimeObject.lib
+ )
+endif(CLR_CMAKE_PLATFORM_UNIX)
+
+target_link_libraries(protononjit
+ ${RYUJIT_LINK_LIBRARIES}
+)
+
+# add the install targets
+install_clr(protononjit)
diff --git a/src/jit/protononjit/SOURCES b/src/jit/protononjit/SOURCES
new file mode 100644
index 0000000000..353c501873
--- /dev/null
+++ b/src/jit/protononjit/SOURCES
@@ -0,0 +1,10 @@
+
+#
+# DO NOT EDIT THIS FILE!!! Modify the project file in this directory
+# This file merely allows the MSBuild project file in this directory to be integrated with Build.Exe
+#
+TARGETTYPE=NOTARGET
+CLR_TARGETTYPE=DLL
+MSBuildProjectFile=protononjit.nativeproj
+SOURCES=
+ \ No newline at end of file
diff --git a/src/jit/protononjit/makefile b/src/jit/protononjit/makefile
new file mode 100644
index 0000000000..bf27e8c84b
--- /dev/null
+++ b/src/jit/protononjit/makefile
@@ -0,0 +1,7 @@
+
+#
+# DO NOT EDIT THIS FILE!!! Modify the project file in this directory
+# This file merely allows the MSBuild project file in this directory to be integrated with Build.Exe
+#
+
+!INCLUDE $(NTMAKEENV)\devdiv.def
diff --git a/src/jit/protononjit/protononjit.def b/src/jit/protononjit/protononjit.def
new file mode 100644
index 0000000000..1603af74ca
--- /dev/null
+++ b/src/jit/protononjit/protononjit.def
@@ -0,0 +1,7 @@
+; Licensed to the .NET Foundation under one or more agreements.
+; The .NET Foundation licenses this file to you under the MIT license.
+; See the LICENSE file in the project root for more information.
+EXPORTS
+ getJit
+ jitStartup
+ sxsJitStartup
diff --git a/src/jit/protononjit/protononjit.nativeproj b/src/jit/protononjit/protononjit.nativeproj
new file mode 100644
index 0000000000..64df6c5739
--- /dev/null
+++ b/src/jit/protononjit/protononjit.nativeproj
@@ -0,0 +1,86 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+
+ <!--
+ PROTO JIT: The purpose of this module is to provide an isolated environment to develop
+ the RyuJIT backend without interfering with the development of the frontend. The
+ idea is to fork codegen and registerfp, that way we leave the PUCLR backend intact so
+ it can be still consumed by the RyuJIT frontend separately maintaining the code stability
+ of the PUCLR codegen.cpp logic.
+
+ This module is meant to be used as a throwaway or fallback cross-JIT (x86 -> arm) that will just
+ attempt to generate arm code, throw it away and then re-jit using the default jit on x86.
+ -->
+
+ <!--
+ Note that we are defining TargetArch directly because of altjit is not a real fully functional cross
+ compiled binary. It is just a convenience workaround for JIT devs.
+ -->
+ <PropertyGroup>
+ <TargetArch>arm</TargetArch>
+ </PropertyGroup>
+
+ <!-- Import the CLR's settings -->
+
+ <Import Project="$(_NTDRIVE)$(_NTROOT)\ndp\clr\clr.props" />
+
+ <PropertyGroup>
+
+ <!-- Set the output -->
+
+ <OutputName>protononjit</OutputName>
+ <FeatureMergeJitAndEngine>false</FeatureMergeJitAndEngine>
+ <TargetType>DYNLINK</TargetType>
+ <BuildCoreBinaries>false</BuildCoreBinaries>
+ <BuildSysBinaries>false</BuildSysBinaries>
+
+ <!-- Motherhood & apple pie here -->
+
+ <DllEntryPoint>_DllMainCRTStartup</DllEntryPoint>
+ <LinkSubsystem>windows</LinkSubsystem>
+ <LibCLib Condition="'$(FeatureMergeJitAndEngine)'!='true'">$(ClrCrtLib)</LibCLib>
+
+ <!-- JIT specific baloney -->
+
+ <LinkModuleDefinitionFile>$(OutputName).def</LinkModuleDefinitionFile>
+
+ <ClDefines>$(ClDefines);_TARGET_ARM_=1</ClDefines>
+ <ClDefines>$(ClDefines);ALT_JIT</ClDefines>
+
+ <Win32DllLibs>$(SdkLibPath)\kernel32.lib;$(SdkLibPath)\user32.lib;$(SdkLibPath)\advapi32.lib;$(SdkLibPath)\oleaut32.lib;$(SdkLibPath)\uuid.lib</Win32DllLibs>
+ <Win32DllLibs>$(Win32DllLibs);$(ClrLibPath)\utilcode.lib</Win32DllLibs>
+
+ <!-- Profile-guided optimization -->
+
+ <PogoOptimize>false</PogoOptimize>
+ <PogoInstrument>false</PogoInstrument>
+ <PogoUpdate>false</PogoUpdate>
+
+ <!-- Do we want to build with msvcdis disassembly? This should be enabled for DEBUG, disabled otherwise.
+ However, it can be useful to enable it temporarily in non-DEBUG builds, by changing the EnableLateDisasm property.
+ -->
+ <EnableLateDisasm>false</EnableLateDisasm>
+ <ClDefines Condition="'$(EnableLateDisasm)' == 'true'">$(ClDefines);LATE_DISASM=1</ClDefines>
+ <LinkDelayLoad Condition="'$(EnableLateDisasm)' == 'true'">$(LinkDelayLoad);msvcdis$(VC_NONCRT_ProdVerX).dll</LinkDelayLoad>
+ <UseDelayimpLib Condition="'$(EnableLateDisasm)' == 'true' and '$(FeatureMergeJitAndEngine)'!='true'">true</UseDelayimpLib>
+
+ </PropertyGroup>
+
+ <!-- Leaf Project Items -->
+ <ItemGroup>
+ <ProjectReference Include="$(ClrSrcDirectory)utilcode\dyncrt\dyncrt.nativeproj" />
+ <TargetLib Include="$(SdkLibPath)\mscoree.lib" />
+ <TargetLib Include="$(ClrLibPath)\ArmGCInfo.lib">
+ <ProjectReference>$(ClrSrcDirectory)gcinfo\armlib\ArmGCInfo.nativeproj</ProjectReference>
+ </TargetLib>
+ <TargetLib Condition="'$(UseDelayimpLib)' == 'true'" Include="$(ClrLibPath)\delayimp.lib">
+ <ProjectReference>$(ClrSrcDirectory)delayimp\delayimp.nativeproj</ProjectReference>
+ </TargetLib>
+ <TargetLib Condition="'$(DebugBuild)' == 'true'" Include="$(SdkLibPath)\ole32.lib" />
+ <TargetLib Condition="'$(EnableLateDisasm)' == 'true'" Include="$(VCToolsLibPath)\msvcdis.lib" />
+ <RCResourceFile Include="..\native.rc" />
+ </ItemGroup>
+
+ <Import Project="..\jit.settings.targets" />
+
+</Project>
diff --git a/src/jit/rangecheck.cpp b/src/jit/rangecheck.cpp
index 8d16cce31a..91ae81e322 100644
--- a/src/jit/rangecheck.cpp
+++ b/src/jit/rangecheck.cpp
@@ -506,7 +506,7 @@ void RangeCheck::MergeEdgeAssertions(GenTreePtr tree, const ASSERT_VALARG_TP ass
{
index++;
- Compiler::AssertionDsc* curAssertion = m_pCompiler->optGetAssertion((Compiler::AssertionIndex)index);
+ Compiler::AssertionDsc* curAssertion = m_pCompiler->optGetAssertion((AssertionIndex)index);
// Current assertion is about array length.
if (!curAssertion->IsArrLenArithBound() && !curAssertion->IsArrLenBound() && !curAssertion->IsConstantBound())
@@ -517,7 +517,7 @@ void RangeCheck::MergeEdgeAssertions(GenTreePtr tree, const ASSERT_VALARG_TP ass
#ifdef DEBUG
if (m_pCompiler->verbose)
{
- m_pCompiler->optPrintAssertion(curAssertion, (Compiler::AssertionIndex)index);
+ m_pCompiler->optPrintAssertion(curAssertion, (AssertionIndex)index);
}
#endif
@@ -617,7 +617,7 @@ void RangeCheck::MergeEdgeAssertions(GenTreePtr tree, const ASSERT_VALARG_TP ass
#ifdef DEBUG
if (m_pCompiler->verbose)
{
- m_pCompiler->optPrintAssertion(curAssertion, (Compiler::AssertionIndex)index);
+ m_pCompiler->optPrintAssertion(curAssertion, (AssertionIndex)index);
}
#endif
@@ -869,10 +869,13 @@ Range RangeCheck::ComputeRangeForLocalDef(
case GT_ASG:
{
Range range = GetRange(loc->block, loc->stmt, asg->gtGetOp2(), path, monotonic DEBUGARG(indent));
- JITDUMP("Merge assertions from BB%02d:%s for assignment about %p\n", block->bbNum,
- BitVecOps::ToString(m_pCompiler->apTraits, block->bbAssertionIn), dspPtr(asg->gtGetOp1()));
- MergeEdgeAssertions(asg->gtGetOp1(), block->bbAssertionIn, &range);
- JITDUMP("done merging\n");
+ if (!BitVecOps::MayBeUninit(block->bbAssertionIn))
+ {
+ JITDUMP("Merge assertions from BB%02d:%s for assignment about %p\n", block->bbNum,
+ BitVecOps::ToString(m_pCompiler->apTraits, block->bbAssertionIn), dspPtr(asg->gtGetOp1()));
+ MergeEdgeAssertions(asg->gtGetOp1(), block->bbAssertionIn, &range);
+ JITDUMP("done merging\n");
+ }
return range;
}
diff --git a/src/jit/rationalize.cpp b/src/jit/rationalize.cpp
index 00e0bec6f7..1bc3a614a5 100644
--- a/src/jit/rationalize.cpp
+++ b/src/jit/rationalize.cpp
@@ -732,6 +732,35 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStack<G
{
RewriteSIMDOperand(use, false);
}
+ else
+ {
+ // Due to promotion of structs containing fields of type struct with a
+ // single scalar type field, we could potentially see IR nodes of the
+ // form GT_IND(GT_ADD(lclvarAddr, 0)) where 0 is an offset representing
+ // a field-seq. These get folded here.
+ //
+ // TODO: This code can be removed once JIT implements recursive struct
+ // promotion instead of lying about the type of struct field as the type
+ // of its single scalar field.
+ GenTree* addr = node->AsIndir()->Addr();
+ if (addr->OperGet() == GT_ADD && addr->gtGetOp1()->OperGet() == GT_LCL_VAR_ADDR &&
+ addr->gtGetOp2()->IsIntegralConst(0))
+ {
+ GenTreeLclVarCommon* lclVarNode = addr->gtGetOp1()->AsLclVarCommon();
+ unsigned lclNum = lclVarNode->GetLclNum();
+ LclVarDsc* varDsc = comp->lvaTable + lclNum;
+ if (node->TypeGet() == varDsc->TypeGet())
+ {
+ JITDUMP("Rewriting GT_IND(GT_ADD(LCL_VAR_ADDR,0)) to LCL_VAR\n");
+ lclVarNode->SetOper(GT_LCL_VAR);
+ lclVarNode->gtType = node->TypeGet();
+ use.ReplaceWith(comp, lclVarNode);
+ BlockRange().Remove(addr);
+ BlockRange().Remove(addr->gtGetOp2());
+ BlockRange().Remove(node);
+ }
+ }
+ }
break;
case GT_NOP:
diff --git a/src/jit/regalloc.cpp b/src/jit/regalloc.cpp
index 5c3895b4f2..938f8e8124 100644
--- a/src/jit/regalloc.cpp
+++ b/src/jit/regalloc.cpp
@@ -4613,7 +4613,7 @@ regMaskTP Compiler::rpPredictTreeRegUse(GenTreePtr tree,
assert(!args->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
- fgArgTabEntryPtr curArgTabEntry = gtArgEntryByNode(tree, args);
+ fgArgTabEntryPtr curArgTabEntry = gtArgEntryByNode(tree->AsCall(), args);
assert(curArgTabEntry);
regNumber regNum = curArgTabEntry->regNum; // first register use to pass this argument
diff --git a/src/jit/registerfp.cpp b/src/jit/registerfp.cpp
index ed71886cae..68f3bb6c4e 100644
--- a/src/jit/registerfp.cpp
+++ b/src/jit/registerfp.cpp
@@ -637,7 +637,7 @@ void CodeGen::genCodeForTreeFloat(GenTreePtr tree, RegSet::RegisterPreference* p
else
{
assert(oper == GT_CALL);
- genCodeForCall(tree, true);
+ genCodeForCall(tree->AsCall(), true);
}
}
diff --git a/src/jit/regset.cpp b/src/jit/regset.cpp
index 0d0ac3e0ce..dbdf9c8aeb 100644
--- a/src/jit/regset.cpp
+++ b/src/jit/regset.cpp
@@ -1735,13 +1735,12 @@ void RegSet::rsSpillTree(regNumber reg, GenTreePtr tree, unsigned regIdx /* =0 *
*
* Spill the top of the FP x87 stack.
*/
-void RegSet::rsSpillFPStack(GenTreePtr tree)
+void RegSet::rsSpillFPStack(GenTreeCall* call)
{
SpillDsc* spill;
TempDsc* temp;
- var_types treeType = tree->TypeGet();
+ var_types treeType = call->TypeGet();
- assert(tree->OperGet() == GT_CALL);
spill = SpillDsc::alloc(m_rsCompiler, this, treeType);
/* Grab a temp to store the spilled value */
@@ -1750,10 +1749,10 @@ void RegSet::rsSpillFPStack(GenTreePtr tree)
/* Remember what it is we have spilled */
- spill->spillTree = tree;
+ spill->spillTree = call;
SpillDsc* lastDsc = spill;
- regNumber reg = tree->gtRegNum;
+ regNumber reg = call->gtRegNum;
lastDsc->spillNext = rsSpillDesc[reg];
rsSpillDesc[reg] = spill;
@@ -1766,7 +1765,7 @@ void RegSet::rsSpillFPStack(GenTreePtr tree)
/* Mark the tree node as having been spilled */
- rsMarkSpill(tree, reg);
+ rsMarkSpill(call, reg);
}
#endif // defined(_TARGET_X86_) && !FEATURE_STACK_FP_X87
diff --git a/src/jit/regset.h b/src/jit/regset.h
index cdfbb1502a..9af5200290 100644
--- a/src/jit/regset.h
+++ b/src/jit/regset.h
@@ -337,7 +337,7 @@ private:
void rsSpillTree(regNumber reg, GenTreePtr tree, unsigned regIdx = 0);
#if defined(_TARGET_X86_) && !FEATURE_STACK_FP_X87
- void rsSpillFPStack(GenTreePtr tree);
+ void rsSpillFPStack(GenTreeCall* call);
#endif // defined(_TARGET_X86_) && !FEATURE_STACK_FP_X87
#ifdef LEGACY_BACKEND
diff --git a/src/jit/simd.cpp b/src/jit/simd.cpp
index fb190c4fa1..4ba7832cca 100644
--- a/src/jit/simd.cpp
+++ b/src/jit/simd.cpp
@@ -1374,20 +1374,22 @@ GenTreePtr Compiler::impSIMDMinMax(SIMDIntrinsicID intrinsicId,
#ifdef _TARGET_XARCH_
// SSE2 has direct support for float/double/signed word/unsigned byte.
+ // SSE4.1 has direct support for int32/uint32/signed byte/unsigned word.
// For other integer types we compute min/max as follows
//
- // int32/uint32/int64/uint64:
+ // int32/uint32 (SSE2)
+ // int64/uint64 (SSE2&SSE3_4):
// compResult = (op1 < op2) in case of Min
// (op1 > op2) in case of Max
// Min/Max(op1, op2) = Select(compResult, op1, op2)
//
- // unsigned word:
+ // unsigned word (SSE2):
// op1 = op1 - 2^15 ; to make it fit within a signed word
// op2 = op2 - 2^15 ; to make it fit within a signed word
// result = SSE2 signed word Min/Max(op1, op2)
// result = result + 2^15 ; readjust it back
//
- // signed byte:
+ // signed byte (SSE2):
// op1 = op1 + 2^7 ; to make it unsigned
// op1 = op1 + 2^7 ; to make it unsigned
// result = SSE2 unsigned byte Min/Max(op1, op2)
@@ -1395,13 +1397,16 @@ GenTreePtr Compiler::impSIMDMinMax(SIMDIntrinsicID intrinsicId,
GenTree* simdTree = nullptr;
- if (varTypeIsFloating(baseType) || baseType == TYP_SHORT || baseType == TYP_UBYTE)
+ if (varTypeIsFloating(baseType) || baseType == TYP_SHORT || baseType == TYP_UBYTE ||
+ (getSIMDInstructionSet() >= InstructionSet_SSE3_4 &&
+ (baseType == TYP_BYTE || baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_CHAR)))
{
- // SSE2 has direct support
+ // SSE2 or SSE4.1 has direct support
simdTree = gtNewSIMDNode(simdType, op1, op2, intrinsicId, baseType, size);
}
else if (baseType == TYP_CHAR || baseType == TYP_BYTE)
{
+ assert(getSIMDInstructionSet() == InstructionSet_SSE2);
int constVal;
SIMDIntrinsicID operIntrinsic;
SIMDIntrinsicID adjustIntrinsic;
diff --git a/src/jit/simdcodegenxarch.cpp b/src/jit/simdcodegenxarch.cpp
index ace36422fb..468d302d17 100644
--- a/src/jit/simdcodegenxarch.cpp
+++ b/src/jit/simdcodegenxarch.cpp
@@ -243,6 +243,25 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
{
result = INS_pminsw;
}
+ else if (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4)
+ {
+ if (baseType == TYP_BYTE)
+ {
+ result = INS_pminsb;
+ }
+ else if (baseType == TYP_CHAR)
+ {
+ result = INS_pminuw;
+ }
+ else if (baseType == TYP_INT)
+ {
+ result = INS_pminsd;
+ }
+ else if (baseType == TYP_UINT)
+ {
+ result = INS_pminud;
+ }
+ }
else
{
unreached();
@@ -266,6 +285,25 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
{
result = INS_pmaxsw;
}
+ else if (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4)
+ {
+ if (baseType == TYP_BYTE)
+ {
+ result = INS_pmaxsb;
+ }
+ else if (baseType == TYP_CHAR)
+ {
+ result = INS_pmaxuw;
+ }
+ else if (baseType == TYP_INT)
+ {
+ result = INS_pmaxsd;
+ }
+ else if (baseType == TYP_UINT)
+ {
+ result = INS_pmaxud;
+ }
+ }
else
{
unreached();
diff --git a/src/jit/stackfp.cpp b/src/jit/stackfp.cpp
index 3e0eceabb7..e6d4c9e9c9 100644
--- a/src/jit/stackfp.cpp
+++ b/src/jit/stackfp.cpp
@@ -2595,7 +2595,7 @@ void CodeGen::genCodeForTreeStackFP_Special(GenTreePtr tree)
{
case GT_CALL:
{
- genCodeForCall(tree, true);
+ genCodeForCall(tree->AsCall(), true);
break;
}
default:
diff --git a/src/jit/standalone/CMakeLists.txt b/src/jit/standalone/CMakeLists.txt
index f20d3790c7..988108efb1 100644
--- a/src/jit/standalone/CMakeLists.txt
+++ b/src/jit/standalone/CMakeLists.txt
@@ -16,6 +16,7 @@ endif(WIN32)
add_library_clr(clrjit
SHARED
${SHARED_LIB_SOURCES}
+ ${JIT_ARCH_SOURCES}
)
add_dependencies(clrjit jit_exports)
diff --git a/src/jit/target.h b/src/jit/target.h
index 5b608ddfac..f62d90519b 100644
--- a/src/jit/target.h
+++ b/src/jit/target.h
@@ -6,25 +6,11 @@
#ifndef _TARGET_H_
#define _TARGET_H_
-// If the UNIX_AMD64_ABI is defined make sure that _TARGET_AMD64_ is also defined.
-#if defined(UNIX_AMD64_ABI)
-#if !defined(_TARGET_AMD64_)
-#error When UNIX_AMD64_ABI is defined you must define _TARGET_AMD64_ defined as well.
-#endif
-#endif
-
-// If the UNIX_X86_ABI is defined make sure that _TARGET_X86_ is also defined.
-#if defined(UNIX_X86_ABI)
-#if !defined(_TARGET_X86_)
-#error When UNIX_X86_ABI is defined you must define _TARGET_X86_ defined as well.
-#endif
-#endif
-
-#if (defined(FEATURE_CORECLR) && defined(PLATFORM_UNIX))
+#if defined(FEATURE_CORECLR) && defined(_TARGET_UNIX_)
#define FEATURE_VARARG 0
-#else // !(defined(FEATURE_CORECLR) && defined(PLATFORM_UNIX))
+#else // !(defined(FEATURE_CORECLR) && defined(_TARGET_UNIX_))
#define FEATURE_VARARG 1
-#endif // !(defined(FEATURE_CORECLR) && defined(PLATFORM_UNIX))
+#endif // !(defined(FEATURE_CORECLR) && defined(_TARGET_UNIX_))
/*****************************************************************************/
// The following are human readable names for the target architectures
@@ -1080,10 +1066,10 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define REG_ARG_4 REG_R8
#define REG_ARG_5 REG_R9
- SELECTANY const regNumber intArgRegs[] = { REG_EDI, REG_ESI, REG_EDX, REG_ECX, REG_R8, REG_R9 };
- SELECTANY const regMaskTP intArgMasks[] = { REG_EDI, REG_ESI, REG_EDX, REG_ECX, REG_R8, REG_R9 };
- SELECTANY const regNumber fltArgRegs[] = { REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3, REG_XMM4, REG_XMM5, REG_XMM6, REG_XMM7 };
- SELECTANY const regMaskTP fltArgMasks[] = { REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3, REG_XMM4, REG_XMM5, REG_XMM6, REG_XMM7 };
+ SELECTANY const regNumber intArgRegs [] = { REG_EDI, REG_ESI, REG_EDX, REG_ECX, REG_R8, REG_R9 };
+ SELECTANY const regMaskTP intArgMasks[] = { RBM_EDI, RBM_ESI, RBM_EDX, RBM_ECX, RBM_R8, RBM_R9 };
+ SELECTANY const regNumber fltArgRegs [] = { REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3, REG_XMM4, REG_XMM5, REG_XMM6, REG_XMM7 };
+ SELECTANY const regMaskTP fltArgMasks[] = { RBM_XMM0, RBM_XMM1, RBM_XMM2, RBM_XMM3, RBM_XMM4, RBM_XMM5, RBM_XMM6, RBM_XMM7 };
#define RBM_ARG_0 RBM_RDI
#define RBM_ARG_1 RBM_RSI
@@ -1103,9 +1089,9 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define REG_ARG_2 REG_R8
#define REG_ARG_3 REG_R9
- SELECTANY const regNumber intArgRegs[] = { REG_ECX, REG_EDX, REG_R8, REG_R9 };
+ SELECTANY const regNumber intArgRegs [] = { REG_ECX, REG_EDX, REG_R8, REG_R9 };
SELECTANY const regMaskTP intArgMasks[] = { RBM_ECX, RBM_EDX, RBM_R8, RBM_R9 };
- SELECTANY const regNumber fltArgRegs[] = { REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3 };
+ SELECTANY const regNumber fltArgRegs [] = { REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3 };
SELECTANY const regMaskTP fltArgMasks[] = { RBM_XMM0, RBM_XMM1, RBM_XMM2, RBM_XMM3 };
#define RBM_ARG_0 RBM_ECX
@@ -1182,7 +1168,11 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
// TODO-ARM-CQ: Check for sdiv/udiv at runtime and generate it if available
#define USE_HELPERS_FOR_INT_DIV 1 // BeagleBoard (ARMv7A) doesn't support SDIV/UDIV
#define CPU_LOAD_STORE_ARCH 1
+#ifdef LEGACY_BACKEND
#define CPU_LONG_USES_REGPAIR 1
+#else
+ #define CPU_LONG_USES_REGPAIR 0
+#endif
#define CPU_HAS_FP_SUPPORT 1
#define ROUND_FLOAT 0 // Do not round intermed float expression results
#define CPU_HAS_BYTE_REGS 0
@@ -1242,7 +1232,11 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define RBM_CALLEE_SAVED (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED)
#define RBM_CALLEE_TRASH (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH)
+#ifdef LEGACY_BACKEND
#define RBM_CALLEE_TRASH_NOGC (RBM_R2|RBM_R3|RBM_LR)
+#else
+ #define RBM_CALLEE_TRASH_NOGC RBM_CALLEE_TRASH
+#endif
#define REG_DEFAULT_HELPER_CALL_TARGET REG_R12
#define RBM_ALLINT (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH)
@@ -1382,6 +1376,10 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define RBM_VIRTUAL_STUB_PARAM RBM_R4
#define PREDICT_REG_VIRTUAL_STUB_PARAM PREDICT_REG_R4
+ // R2R indirect call. Use the same registers as VSD
+ #define REG_R2R_INDIRECT_PARAM REG_R4
+ #define RBM_R2R_INDIRECT_PARAM RBM_R4
+
// Registers used by PInvoke frame setup
#define REG_PINVOKE_FRAME REG_R4
#define RBM_PINVOKE_FRAME RBM_R4
@@ -1424,6 +1422,10 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define RBM_INTRET RBM_R0
#define REG_LNGRET REG_PAIR_R0R1
#define RBM_LNGRET (RBM_R1|RBM_R0)
+ #define REG_LNGRET_LO REG_R0
+ #define REG_LNGRET_HI REG_R1
+ #define RBM_LNGRET_LO RBM_R0
+ #define RBM_LNGRET_HI RBM_R1
#define REG_FLOATRET REG_F0
#define RBM_FLOATRET RBM_F0
@@ -1434,7 +1436,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define RBM_STOP_FOR_GC_TRASH (RBM_CALLEE_TRASH & ~(RBM_FLOATRET | RBM_INTRET))
// The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper.
- #define RBM_INIT_PINVOKE_FRAME_TRASH RBM_CALLEE_TRASH
+ #define RBM_INIT_PINVOKE_FRAME_TRASH (RBM_CALLEE_TRASH | RBM_PINVOKE_TCB | RBM_PINVOKE_SCRATCH)
#define REG_FPBASE REG_R11
#define RBM_FPBASE RBM_R11
@@ -1668,7 +1670,6 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
// R2R indirect call. Use the same registers as VSD
#define REG_R2R_INDIRECT_PARAM REG_R11
#define RBM_R2R_INDIRECT_PARAM RBM_R11
- #define PREDICT_REG_RER_INDIRECT_PARAM PREDICT_REG_R11
// Registers used by PInvoke frame setup
#define REG_PINVOKE_FRAME REG_R9
diff --git a/src/jit/unwind.h b/src/jit/unwind.h
index 27d23b1b54..c74ee2d1f3 100644
--- a/src/jit/unwind.h
+++ b/src/jit/unwind.h
@@ -321,7 +321,12 @@ class UnwindEpilogCodes : public UnwindBase, public UnwindCodesBase
public:
UnwindEpilogCodes(Compiler* comp)
- : UnwindBase(comp), uecMem(uecMemLocal), uecMemSize(UEC_LOCAL_COUNT), uecCodeSlot(-1), uecFinalized(false)
+ : UnwindBase(comp)
+ , uecMem(uecMemLocal)
+ , firstByteOfLastCode(0)
+ , uecMemSize(UEC_LOCAL_COUNT)
+ , uecCodeSlot(-1)
+ , uecFinalized(false)
{
}
@@ -332,12 +337,16 @@ public:
virtual void AddCode(BYTE b1)
{
AppendByte(b1);
+
+ firstByteOfLastCode = b1;
}
virtual void AddCode(BYTE b1, BYTE b2)
{
AppendByte(b1);
AppendByte(b2);
+
+ firstByteOfLastCode = b1;
}
virtual void AddCode(BYTE b1, BYTE b2, BYTE b3)
@@ -345,6 +354,8 @@ public:
AppendByte(b1);
AppendByte(b2);
AppendByte(b3);
+
+ firstByteOfLastCode = b1;
}
virtual void AddCode(BYTE b1, BYTE b2, BYTE b3, BYTE b4)
@@ -353,6 +364,8 @@ public:
AppendByte(b2);
AppendByte(b3);
AppendByte(b4);
+
+ firstByteOfLastCode = b1;
}
// Return a pointer to the first unwind code byte
@@ -406,11 +419,13 @@ public:
{
assert(!uecFinalized);
noway_assert(0 <= uecCodeSlot && uecCodeSlot < uecMemSize); // There better be at least one code!
- BYTE lastCode = uecMem[uecCodeSlot];
- if (!IsEndCode(lastCode)) // If the last code is an end code, we don't need to append one.
+
+ if (!IsEndCode(firstByteOfLastCode)) // If the last code is an end code, we don't need to append one.
{
- AppendByte(UWC_END); // Add a default "end" code to the end of the array of unwind codes
+ AppendByte(UWC_END); // Add a default "end" code to the end of the array of unwind codes
+ firstByteOfLastCode = UWC_END; // Update firstByteOfLastCode in case we use it later
}
+
uecFinalized = true; // With the "end" code in place, now we're done
#ifdef DEBUG
@@ -445,6 +460,7 @@ private:
// If there are more unwind codes, we dynamically allocate memory.
BYTE uecMemLocal[UEC_LOCAL_COUNT];
BYTE* uecMem;
+ BYTE firstByteOfLastCode;
// uecMemSize is the number of bytes/slots in uecMem. This is equal to UEC_LOCAL_COUNT unless
// we've dynamically allocated memory to store the codes.
diff --git a/src/jit/unwindamd64.cpp b/src/jit/unwindamd64.cpp
index 14eba8cb50..1b2baf6584 100644
--- a/src/jit/unwindamd64.cpp
+++ b/src/jit/unwindamd64.cpp
@@ -448,7 +448,7 @@ void Compiler::unwindSetFrameRegWindows(regNumber reg, unsigned offset)
func->unwindHeader.FrameRegister = (BYTE)reg;
-#ifdef PLATFORM_UNIX
+#ifdef UNIX_AMD64_ABI
if (offset > 240)
{
// On Unix only, we have a CLR-only extension to the AMD64 unwind codes: UWOP_SET_FPREG_LARGE.
@@ -467,7 +467,7 @@ void Compiler::unwindSetFrameRegWindows(regNumber reg, unsigned offset)
func->unwindHeader.FrameOffset = 15;
}
else
-#endif // PLATFORM_UNIX
+#endif // UNIX_AMD64_ABI
{
assert(func->unwindCodeSlot > sizeof(UNWIND_CODE));
UNWIND_CODE* code = (UNWIND_CODE*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(UNWIND_CODE)];
@@ -697,7 +697,7 @@ void DumpUnwindInfo(bool isHotCode,
pCode->CodeOffset, pCode->UnwindOp, pCode->OpInfo); // This should be zero
break;
-#ifdef PLATFORM_UNIX
+#ifdef UNIX_AMD64_ABI
case UWOP_SET_FPREG_LARGE:
printf(" CodeOffset: 0x%02X UnwindOp: UWOP_SET_FPREG_LARGE (%u) OpInfo: Unused (%u)\n",
@@ -712,7 +712,7 @@ void DumpUnwindInfo(bool isHotCode,
}
break;
-#endif // PLATFORM_UNIX
+#endif // UNIX_AMD64_ABI
case UWOP_SAVE_NONVOL:
printf(" CodeOffset: 0x%02X UnwindOp: UWOP_SAVE_NONVOL (%u) OpInfo: %s (%u)\n",
@@ -858,7 +858,7 @@ void Compiler::unwindReserveFuncHelper(FuncInfoDsc* func, bool isHotCode)
#ifdef UNIX_AMD64_ABI
if (generateCFIUnwindCodes())
{
- unwindCodeBytes = func->cfiCodes->size() * sizeof(CFI_CODE);
+ unwindCodeBytes = (DWORD)(func->cfiCodes->size() * sizeof(CFI_CODE));
}
else
#endif // UNIX_AMD64_ABI
@@ -956,7 +956,7 @@ void Compiler::unwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode, void* pCo
#ifdef UNIX_AMD64_ABI
if (generateCFIUnwindCodes())
{
- int size = func->cfiCodes->size();
+ DWORD size = (DWORD)func->cfiCodes->size();
if (size > 0)
{
unwindCodeBytes = size * sizeof(CFI_CODE);
diff --git a/src/jit/utils.cpp b/src/jit/utils.cpp
index 3a45039aa7..9fbe394a21 100644
--- a/src/jit/utils.cpp
+++ b/src/jit/utils.cpp
@@ -25,13 +25,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
// same code for all platforms, hence it is here instead of in the targetXXX.cpp
// files.
-#ifdef PLATFORM_UNIX
+#ifdef _TARGET_UNIX_
// Should we distinguish Mac? Can we?
// Should we distinguish flavors of Unix? Can we?
const char* Target::g_tgtPlatformName = "Unix";
-#else // !PLATFORM_UNIX
+#else // !_TARGET_UNIX_
const char* Target::g_tgtPlatformName = "Windows";
-#endif // !PLATFORM_UNIX
+#endif // !_TARGET_UNIX_
/*****************************************************************************/
@@ -698,18 +698,24 @@ const char* refCntWtd2str(unsigned refCntWtd)
nump = (nump == num1) ? num2 : num1;
- unsigned valueInt = refCntWtd / BB_UNITY_WEIGHT;
- unsigned valueFrac = refCntWtd % BB_UNITY_WEIGHT;
-
- if (valueFrac == 0)
+ if (refCntWtd == BB_MAX_WEIGHT)
{
- sprintf_s(temp, bufSize, "%2u ", valueInt);
+ sprintf_s(temp, bufSize, "MAX ");
}
else
{
- sprintf_s(temp, bufSize, "%2u.%1u", valueInt, (valueFrac * 10 / BB_UNITY_WEIGHT));
- }
+ unsigned valueInt = refCntWtd / BB_UNITY_WEIGHT;
+ unsigned valueFrac = refCntWtd % BB_UNITY_WEIGHT;
+ if (valueFrac == 0)
+ {
+ sprintf_s(temp, bufSize, "%u ", valueInt);
+ }
+ else
+ {
+ sprintf_s(temp, bufSize, "%u.%02u", valueInt, (valueFrac * 100 / BB_UNITY_WEIGHT));
+ }
+ }
return temp;
}
@@ -780,7 +786,7 @@ void ConfigMethodRange::InitRanges(const wchar_t* rangeStr, unsigned capacity)
}
// Allocate some persistent memory
- ICorJitHost* jitHost = JitHost::getJitHost();
+ ICorJitHost* jitHost = g_jitHost;
m_ranges = (Range*)jitHost->allocateMemory(capacity * sizeof(Range));
m_entries = capacity;
@@ -1358,6 +1364,7 @@ void HelperCallProperties::init()
case CORINFO_HELP_ISINSTANCEOFCLASS:
case CORINFO_HELP_ISINSTANCEOFANY:
case CORINFO_HELP_READYTORUN_ISINSTANCEOF:
+ case CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE:
isPure = true;
noThrow = true; // These return null for a failing cast
@@ -1411,9 +1418,7 @@ void HelperCallProperties::init()
case CORINFO_HELP_GETGENERICS_GCSTATIC_BASE:
case CORINFO_HELP_GETGENERICS_NONGCSTATIC_BASE:
case CORINFO_HELP_READYTORUN_STATIC_BASE:
-#if COR_JIT_EE_VERSION > 460
case CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE:
-#endif // COR_JIT_EE_VERSION > 460
// These may invoke static class constructors
// These can throw InvalidProgram exception if the class can not be constructed
@@ -1463,9 +1468,7 @@ void HelperCallProperties::init()
case CORINFO_HELP_VERIFICATION:
case CORINFO_HELP_RNGCHKFAIL:
case CORINFO_HELP_THROWDIVZERO:
-#if COR_JIT_EE_VERSION > 460
case CORINFO_HELP_THROWNULLREF:
-#endif // COR_JIT_EE_VERSION
case CORINFO_HELP_THROW:
case CORINFO_HELP_RETHROW:
@@ -1747,7 +1750,7 @@ double FloatingPointUtils::round(double x)
{
// If the number has no fractional part do nothing
// This shortcut is necessary to workaround precision loss in borderline cases on some platforms
- if (x == ((double)((__int64)x)))
+ if (x == (double)((INT64)x))
{
return x;
}
@@ -1765,3 +1768,43 @@ double FloatingPointUtils::round(double x)
return _copysign(flrTempVal, x);
}
+
+// Windows x86 and Windows ARM/ARM64 may not define _copysignf() but they do define _copysign().
+// We will redirect the macro to this other functions if the macro is not defined for the platform.
+// This has the side effect of a possible implicit upcasting for arguments passed in and an explicit
+// downcasting for the _copysign() call.
+#if (defined(_TARGET_X86_) || defined(_TARGET_ARM_) || defined(_TARGET_ARM64_)) && !defined(FEATURE_PAL)
+
+#if !defined(_copysignf)
+#define _copysignf (float)_copysign
+#endif
+
+#endif
+
+// Rounds a single-precision floating-point value to the nearest integer,
+// and rounds midpoint values to the nearest even number.
+// Note this should align with classlib in floatsingle.cpp
+// Specializing for x86 using a x87 instruction is optional since
+// this outcome is identical across targets.
+float FloatingPointUtils::round(float x)
+{
+ // If the number has no fractional part do nothing
+ // This shortcut is necessary to workaround precision loss in borderline cases on some platforms
+ if (x == (float)((INT32)x))
+ {
+ return x;
+ }
+
+ // We had a number that was equally close to 2 integers.
+ // We need to return the even one.
+
+ float tempVal = (x + 0.5f);
+ float flrTempVal = floorf(tempVal);
+
+ if ((flrTempVal == tempVal) && (fmodf(tempVal, 2.0f) != 0))
+ {
+ flrTempVal -= 1.0f;
+ }
+
+ return _copysignf(flrTempVal, x);
+}
diff --git a/src/jit/utils.h b/src/jit/utils.h
index 1cd35903dd..b41cf84a1e 100644
--- a/src/jit/utils.h
+++ b/src/jit/utils.h
@@ -381,6 +381,15 @@ public:
return m_value;
}
+ // Mark the value as read only; explicitly change the variable to the "read" phase.
+ void MarkAsReadOnly() const
+ {
+#ifdef DEBUG
+ assert(m_initialized);
+ (const_cast<PhasedVar*>(this))->m_writePhase = false;
+#endif // DEBUG
+ }
+
// Functions/operators to write the value. Must be in the write phase.
PhasedVar& operator=(const T& value)
@@ -638,6 +647,8 @@ public:
static unsigned __int64 convertDoubleToUInt64(double d);
static double round(double x);
+
+ static float round(float x);
};
// The CLR requires that critical section locks be initialized via its ClrCreateCriticalSection API...but
diff --git a/src/jit/valuenum.cpp b/src/jit/valuenum.cpp
index aba29c4411..03bc204070 100644
--- a/src/jit/valuenum.cpp
+++ b/src/jit/valuenum.cpp
@@ -32,7 +32,7 @@ VNFunc GetVNFuncForOper(genTreeOps oper, bool isUnsigned)
case GT_LE:
return VNF_LE_UN;
case GT_GE:
- return VNF_GT_UN;
+ return VNF_GE_UN;
case GT_GT:
return VNF_GT_UN;
case GT_ADD:
@@ -206,6 +206,52 @@ T ValueNumStore::EvalOp(VNFunc vnf, T v0, T v1, ValueNum* pExcSet)
}
}
+struct FloatTraits
+{
+ static float NaN()
+ {
+ unsigned bits = 0xFFC00000u;
+ float result;
+ static_assert(sizeof(bits) == sizeof(result), "sizeof(unsigned) must equal sizeof(float)");
+ memcpy(&result, &bits, sizeof(result));
+ return result;
+ }
+};
+
+struct DoubleTraits
+{
+ static double NaN()
+ {
+ unsigned long long bits = 0xFFF8000000000000ull;
+ double result;
+ static_assert(sizeof(bits) == sizeof(result), "sizeof(unsigned long long) must equal sizeof(double)");
+ memcpy(&result, &bits, sizeof(result));
+ return result;
+ }
+};
+
+template <typename TFp, typename TFpTraits>
+TFp FpRem(TFp dividend, TFp divisor)
+{
+ // From the ECMA standard:
+ //
+ // If [divisor] is zero or [dividend] is infinity
+ // the result is NaN.
+ // If [divisor] is infinity,
+ // the result is [dividend]
+
+ if (divisor == 0 || !_finite(dividend))
+ {
+ return TFpTraits::NaN();
+ }
+ else if (!_finite(divisor) && !_isnan(divisor))
+ {
+ return dividend;
+ }
+
+ return (TFp)fmod((double)dividend, (double)divisor);
+}
+
// Specialize for double for floating operations, that doesn't involve unsigned.
template <>
double ValueNumStore::EvalOp<double>(VNFunc vnf, double v0, double v1, ValueNum* pExcSet)
@@ -223,7 +269,31 @@ double ValueNumStore::EvalOp<double>(VNFunc vnf, double v0, double v1, ValueNum*
case GT_DIV:
return v0 / v1;
case GT_MOD:
- return fmod(v0, v1);
+ return FpRem<double, DoubleTraits>(v0, v1);
+
+ default:
+ unreached();
+ }
+}
+
+// Specialize for float for floating operations, that doesn't involve unsigned.
+template <>
+float ValueNumStore::EvalOp<float>(VNFunc vnf, float v0, float v1, ValueNum* pExcSet)
+{
+ genTreeOps oper = genTreeOps(vnf);
+ // Here we handle those that are the same for floating-point types.
+ switch (oper)
+ {
+ case GT_ADD:
+ return v0 + v1;
+ case GT_SUB:
+ return v0 - v1;
+ case GT_MUL:
+ return v0 * v1;
+ case GT_DIV:
+ return v0 / v1;
+ case GT_MOD:
+ return FpRem<float, FloatTraits>(v0, v1);
default:
unreached();
@@ -833,7 +903,7 @@ ValueNum ValueNumStore::VNForHandle(ssize_t cnsVal, unsigned handleFlags)
}
// Returns the value number for zero of the given "typ".
-// It has an unreached() for a "typ" that has no zero value, such as TYP_BYREF.
+// It has an unreached() for a "typ" that has no zero value, such as TYP_VOID.
ValueNum ValueNumStore::VNZeroForType(var_types typ)
{
switch (typ)
@@ -861,6 +931,8 @@ ValueNum ValueNumStore::VNZeroForType(var_types typ)
case TYP_REF:
case TYP_ARRAY:
return VNForNull();
+ case TYP_BYREF:
+ return VNForByrefCon(0);
case TYP_STRUCT:
#ifdef FEATURE_SIMD
// TODO-CQ: Improve value numbering for SIMD types.
@@ -959,6 +1031,17 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN)
}
}
+// Windows x86 and Windows ARM/ARM64 may not define _isnanf() but they do define _isnan().
+// We will redirect the macros to these other functions if the macro is not defined for the
+// platform. This has the side effect of a possible implicit upcasting for arguments passed.
+#if (defined(_TARGET_X86_) || defined(_TARGET_ARM_) || defined(_TARGET_ARM64_)) && !defined(FEATURE_PAL)
+
+#if !defined(_isnanf)
+#define _isnanf _isnan
+#endif
+
+#endif
+
ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, ValueNum arg1VN)
{
assert(arg0VN != NoVN && arg1VN != NoVN);
@@ -986,8 +1069,12 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V
// We don't try to fold a binary operation when one of the constant operands
// is a floating-point constant and the other is not.
//
- bool arg0IsFloating = varTypeIsFloating(TypeOfVN(arg0VN));
- bool arg1IsFloating = varTypeIsFloating(TypeOfVN(arg1VN));
+ var_types arg0VNtyp = TypeOfVN(arg0VN);
+ bool arg0IsFloating = varTypeIsFloating(arg0VNtyp);
+
+ var_types arg1VNtyp = TypeOfVN(arg1VN);
+ bool arg1IsFloating = varTypeIsFloating(arg1VNtyp);
+
if (arg0IsFloating != arg1IsFloating)
{
canFold = false;
@@ -997,8 +1084,10 @@ ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, V
// comparison would return false, an unordered comparison
// will return true if any operands are a NaN. We only perform
// ordered NaN comparison in EvalComparison.
- if ((arg0IsFloating && _isnan(GetConstantDouble(arg0VN))) ||
- (arg1IsFloating && _isnan(GetConstantDouble(arg1VN))))
+ if ((arg0IsFloating && (((arg0VNtyp == TYP_FLOAT) && _isnanf(GetConstantSingle(arg0VN))) ||
+ ((arg0VNtyp == TYP_DOUBLE) && _isnan(GetConstantDouble(arg0VN))))) ||
+ (arg1IsFloating && (((arg1VNtyp == TYP_FLOAT) && _isnanf(GetConstantSingle(arg1VN))) ||
+ ((arg0VNtyp == TYP_DOUBLE) && _isnan(GetConstantDouble(arg1VN))))))
{
canFold = false;
}
@@ -1607,27 +1696,24 @@ INT64 ValueNumStore::GetConstantInt64(ValueNum argVN)
return result;
}
-// Given a float or a double constant value number return its value as a double.
+// Given a double constant value number return its value as a double.
//
double ValueNumStore::GetConstantDouble(ValueNum argVN)
{
assert(IsVNConstant(argVN));
- var_types argVNtyp = TypeOfVN(argVN);
+ assert(TypeOfVN(argVN) == TYP_DOUBLE);
- double result = 0;
+ return ConstantValue<double>(argVN);
+}
- switch (argVNtyp)
- {
- case TYP_FLOAT:
- result = (double)ConstantValue<float>(argVN);
- break;
- case TYP_DOUBLE:
- result = ConstantValue<double>(argVN);
- break;
- default:
- unreached();
- }
- return result;
+// Given a float constant value number return its value as a float.
+//
+float ValueNumStore::GetConstantSingle(ValueNum argVN)
+{
+ assert(IsVNConstant(argVN));
+ assert(TypeOfVN(argVN) == TYP_FLOAT);
+
+ return ConstantValue<float>(argVN);
}
// Compute the proper value number when the VNFunc has all constant arguments
@@ -1796,40 +1882,52 @@ ValueNum ValueNumStore::EvalFuncForConstantFPArgs(var_types typ, VNFunc func, Va
assert(CanEvalForConstantArgs(func));
assert(IsVNConstant(arg0VN) && IsVNConstant(arg1VN));
- // We expect both argument types to be floating point types
+ // We expect both argument types to be floating-point types
var_types arg0VNtyp = TypeOfVN(arg0VN);
var_types arg1VNtyp = TypeOfVN(arg1VN);
assert(varTypeIsFloating(arg0VNtyp));
assert(varTypeIsFloating(arg1VNtyp));
- double arg0Val = GetConstantDouble(arg0VN);
- double arg1Val = GetConstantDouble(arg1VN);
+ // We also expect both arguments to be of the same floating-point type
+ assert(arg0VNtyp == arg1VNtyp);
ValueNum result; // left uninitialized, we are required to initialize it on all paths below.
if (VNFuncIsComparison(func))
{
assert(genActualType(typ) == TYP_INT);
- result = VNForIntCon(EvalComparison(func, arg0Val, arg1Val));
+
+ if (arg0VNtyp == TYP_FLOAT)
+ {
+ result = VNForIntCon(EvalComparison(func, GetConstantSingle(arg0VN), GetConstantSingle(arg1VN)));
+ }
+ else
+ {
+ assert(arg0VNtyp == TYP_DOUBLE);
+ result = VNForIntCon(EvalComparison(func, GetConstantDouble(arg0VN), GetConstantDouble(arg1VN)));
+ }
}
else
{
- assert(varTypeIsFloating(typ)); // We must be computing a floating point result
+ // We expect the return type to be the same as the argument type
+ assert(varTypeIsFloating(typ));
+ assert(arg0VNtyp == typ);
- // We always compute the result using a double
- ValueNum exception = VNForEmptyExcSet();
- double doubleResultVal = EvalOp(func, arg0Val, arg1Val, &exception);
- assert(exception == VNForEmptyExcSet()); // Floating point ops don't throw.
+ ValueNum exception = VNForEmptyExcSet();
if (typ == TYP_FLOAT)
{
- float floatResultVal = float(doubleResultVal);
- result = VNForFloatCon(floatResultVal);
+ float floatResultVal = EvalOp(func, GetConstantSingle(arg0VN), GetConstantSingle(arg1VN), &exception);
+ assert(exception == VNForEmptyExcSet()); // Floating point ops don't throw.
+ result = VNForFloatCon(floatResultVal);
}
else
{
assert(typ == TYP_DOUBLE);
+
+ double doubleResultVal = EvalOp(func, GetConstantDouble(arg0VN), GetConstantDouble(arg1VN), &exception);
+ assert(exception == VNForEmptyExcSet()); // Floating point ops don't throw.
result = VNForDoubleCon(doubleResultVal);
}
}
@@ -1876,6 +1974,7 @@ ValueNum ValueNumStore::EvalCastForConstantArgs(var_types typ, VNFunc func, Valu
{
#ifndef _TARGET_64BIT_
case TYP_REF:
+ case TYP_BYREF:
#endif
case TYP_INT:
{
@@ -1934,6 +2033,9 @@ ValueNum ValueNumStore::EvalCastForConstantArgs(var_types typ, VNFunc func, Valu
else
return VNForLongCon(INT64(arg0Val));
#endif
+ case TYP_BYREF:
+ assert(typ == TYP_BYREF);
+ return VNForByrefCon((INT64)arg0Val);
case TYP_FLOAT:
assert(typ == TYP_FLOAT);
if (srcIsUnsigned)
@@ -1962,6 +2064,7 @@ ValueNum ValueNumStore::EvalCastForConstantArgs(var_types typ, VNFunc func, Valu
{
#ifdef _TARGET_64BIT_
case TYP_REF:
+ case TYP_BYREF:
#endif
case TYP_LONG:
INT64 arg0Val = GetConstantInt64(arg0VN);
@@ -1992,6 +2095,9 @@ ValueNum ValueNumStore::EvalCastForConstantArgs(var_types typ, VNFunc func, Valu
case TYP_ULONG:
assert(typ == TYP_LONG);
return arg0VN;
+ case TYP_BYREF:
+ assert(typ == TYP_BYREF);
+ return VNForByrefCon((INT64)arg0Val);
case TYP_FLOAT:
assert(typ == TYP_FLOAT);
if (srcIsUnsigned)
@@ -2017,6 +2123,47 @@ ValueNum ValueNumStore::EvalCastForConstantArgs(var_types typ, VNFunc func, Valu
}
}
case TYP_FLOAT:
+ {
+ float arg0Val = GetConstantSingle(arg0VN);
+
+ switch (castToType)
+ {
+ case TYP_BYTE:
+ assert(typ == TYP_INT);
+ return VNForIntCon(INT8(arg0Val));
+ case TYP_BOOL:
+ case TYP_UBYTE:
+ assert(typ == TYP_INT);
+ return VNForIntCon(UINT8(arg0Val));
+ case TYP_SHORT:
+ assert(typ == TYP_INT);
+ return VNForIntCon(INT16(arg0Val));
+ case TYP_CHAR:
+ case TYP_USHORT:
+ assert(typ == TYP_INT);
+ return VNForIntCon(UINT16(arg0Val));
+ case TYP_INT:
+ assert(typ == TYP_INT);
+ return VNForIntCon(INT32(arg0Val));
+ case TYP_UINT:
+ assert(typ == TYP_INT);
+ return VNForIntCon(UINT32(arg0Val));
+ case TYP_LONG:
+ assert(typ == TYP_LONG);
+ return VNForLongCon(INT64(arg0Val));
+ case TYP_ULONG:
+ assert(typ == TYP_LONG);
+ return VNForLongCon(UINT64(arg0Val));
+ case TYP_FLOAT:
+ assert(typ == TYP_FLOAT);
+ return VNForFloatCon(arg0Val);
+ case TYP_DOUBLE:
+ assert(typ == TYP_DOUBLE);
+ return VNForDoubleCon(double(arg0Val));
+ default:
+ unreached();
+ }
+ }
case TYP_DOUBLE:
{
double arg0Val = GetConstantDouble(arg0VN);
@@ -3062,6 +3209,53 @@ void ValueNumStore::GetConstantBoundInfo(ValueNum vn, ConstantBoundInfo* info)
}
}
+//------------------------------------------------------------------------
+// IsVNArrLenUnsignedBound: Checks if the specified vn represents an expression
+// such as "(uint)i < (uint)a.len" that implies that the array index is valid
+// (0 <= i && i < a.len).
+//
+// Arguments:
+// vn - Value number to query
+// info - Pointer to an ArrLenUnsignedBoundInfo object to return information about
+// the expression. Not populated if the vn expression isn't suitable (e.g. i <= a.len).
+// This enables optCreateJTrueBoundAssertion to immediatly create an OAK_NO_THROW
+// assertion instead of the OAK_EQUAL/NOT_EQUAL assertions created by signed compares
+// (IsVNArrLenBound, IsVNArrLenArithBound) that require further processing.
+
+bool ValueNumStore::IsVNArrLenUnsignedBound(ValueNum vn, ArrLenUnsignedBoundInfo* info)
+{
+ VNFuncApp funcApp;
+
+ if (GetVNFunc(vn, &funcApp))
+ {
+ if ((funcApp.m_func == VNF_LT_UN) || (funcApp.m_func == VNF_GE_UN))
+ {
+ // We only care about "(uint)i < (uint)a.len" and its negation "(uint)i >= (uint)a.len"
+ if (IsVNArrLen(funcApp.m_args[1]))
+ {
+ info->vnIdx = funcApp.m_args[0];
+ info->cmpOper = funcApp.m_func;
+ info->vnLen = funcApp.m_args[1];
+ return true;
+ }
+ }
+ else if ((funcApp.m_func == VNF_GT_UN) || (funcApp.m_func == VNF_LE_UN))
+ {
+ // We only care about "(uint)a.len > (uint)i" and its negation "(uint)a.len <= (uint)i"
+ if (IsVNArrLen(funcApp.m_args[0]))
+ {
+ info->vnIdx = funcApp.m_args[1];
+ // Let's keep a consistent operand order - it's always i < a.len, never a.len > i
+ info->cmpOper = (funcApp.m_func == VNF_GT_UN) ? VNF_LT_UN : VNF_GE_UN;
+ info->vnLen = funcApp.m_args[0];
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
bool ValueNumStore::IsVNArrLenBound(ValueNum vn)
{
// Do we have "var < a.len"?
@@ -3257,48 +3451,103 @@ bool ValueNumStore::IsVNArrLen(ValueNum vn)
ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, CorInfoIntrinsics gtMathFN, ValueNum arg0VN)
{
assert(arg0VN == VNNormVal(arg0VN));
+
+ // If the math intrinsic is not implemented by target-specific instructions, such as implemented
+ // by user calls, then don't do constant folding on it. This minimizes precision loss.
+
if (IsVNConstant(arg0VN) && Compiler::IsTargetIntrinsic(gtMathFN))
{
- // If the math intrinsic is not implemented by target-specific instructions, such as implemented
- // by user calls, then don't do constant folding on it. This minimizes precision loss.
- // I *may* need separate tracks for the double/float -- if the intrinsic funcs have overloads for these.
- double arg0Val = GetConstantDouble(arg0VN);
+ assert(varTypeIsFloating(TypeOfVN(arg0VN)));
- double res = 0.0;
- switch (gtMathFN)
- {
- case CORINFO_INTRINSIC_Sin:
- res = sin(arg0Val);
- break;
- case CORINFO_INTRINSIC_Cos:
- res = cos(arg0Val);
- break;
- case CORINFO_INTRINSIC_Sqrt:
- res = sqrt(arg0Val);
- break;
- case CORINFO_INTRINSIC_Abs:
- res = fabs(arg0Val); // The result and params are doubles.
- break;
- case CORINFO_INTRINSIC_Round:
- res = FloatingPointUtils::round(arg0Val);
- break;
- default:
- unreached(); // the above are the only math intrinsics at the time of this writing.
- }
if (typ == TYP_DOUBLE)
{
+ // Both operand and its result must be of the same floating point type.
+ assert(typ == TypeOfVN(arg0VN));
+ double arg0Val = GetConstantDouble(arg0VN);
+
+ double res = 0.0;
+ switch (gtMathFN)
+ {
+ case CORINFO_INTRINSIC_Sin:
+ res = sin(arg0Val);
+ break;
+ case CORINFO_INTRINSIC_Cos:
+ res = cos(arg0Val);
+ break;
+ case CORINFO_INTRINSIC_Sqrt:
+ res = sqrt(arg0Val);
+ break;
+ case CORINFO_INTRINSIC_Abs:
+ res = fabs(arg0Val);
+ break;
+ case CORINFO_INTRINSIC_Round:
+ res = FloatingPointUtils::round(arg0Val);
+ break;
+ default:
+ unreached(); // the above are the only math intrinsics at the time of this writing.
+ }
+
return VNForDoubleCon(res);
}
else if (typ == TYP_FLOAT)
{
- return VNForFloatCon(float(res));
+ // Both operand and its result must be of the same floating point type.
+ assert(typ == TypeOfVN(arg0VN));
+ float arg0Val = GetConstantSingle(arg0VN);
+
+ float res = 0.0f;
+ switch (gtMathFN)
+ {
+ case CORINFO_INTRINSIC_Sin:
+ res = sinf(arg0Val);
+ break;
+ case CORINFO_INTRINSIC_Cos:
+ res = cosf(arg0Val);
+ break;
+ case CORINFO_INTRINSIC_Sqrt:
+ res = sqrtf(arg0Val);
+ break;
+ case CORINFO_INTRINSIC_Abs:
+ res = fabsf(arg0Val);
+ break;
+ case CORINFO_INTRINSIC_Round:
+ res = FloatingPointUtils::round(arg0Val);
+ break;
+ default:
+ unreached(); // the above are the only math intrinsics at the time of this writing.
+ }
+
+ return VNForFloatCon(res);
}
else
{
+ // CORINFO_INTRINSIC_Round is currently the only intrinsic that takes floating-point arguments
+ // and that returns a non floating-point result.
+
assert(typ == TYP_INT);
assert(gtMathFN == CORINFO_INTRINSIC_Round);
- return VNForIntCon(int(res));
+ int res = 0;
+
+ switch (TypeOfVN(arg0VN))
+ {
+ case TYP_DOUBLE:
+ {
+ double arg0Val = GetConstantDouble(arg0VN);
+ res = int(FloatingPointUtils::round(arg0Val));
+ break;
+ }
+ case TYP_FLOAT:
+ {
+ float arg0Val = GetConstantSingle(arg0VN);
+ res = int(FloatingPointUtils::round(arg0Val));
+ break;
+ }
+ default:
+ unreached();
+ }
+
+ return VNForIntCon(res);
}
}
else
@@ -7388,11 +7637,9 @@ VNFunc Compiler::fgValueNumberHelperMethVNFunc(CorInfoHelpFunc helpFunc)
case CORINFO_HELP_READYTORUN_STATIC_BASE:
vnf = VNF_ReadyToRunStaticBase;
break;
-#if COR_JIT_EE_VERSION > 460
case CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE:
vnf = VNF_ReadyToRunGenericStaticBase;
break;
-#endif // COR_JIT_EE_VERSION > 460
case CORINFO_HELP_GETSHARED_GCSTATIC_BASE_DYNAMICCLASS:
vnf = VNF_GetsharedGcstaticBaseDynamicclass;
break;
@@ -7466,6 +7713,10 @@ VNFunc Compiler::fgValueNumberHelperMethVNFunc(CorInfoHelpFunc helpFunc)
vnf = VNF_IsInstanceOf;
break;
+ case CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE:
+ vnf = VNF_TypeHandleToRuntimeType;
+ break;
+
case CORINFO_HELP_READYTORUN_ISINSTANCEOF:
vnf = VNF_ReadyToRunIsInstanceOf;
break;
diff --git a/src/jit/valuenum.h b/src/jit/valuenum.h
index e6e0e43a33..2be48491df 100644
--- a/src/jit/valuenum.h
+++ b/src/jit/valuenum.h
@@ -205,6 +205,7 @@ private:
int GetConstantInt32(ValueNum argVN);
INT64 GetConstantInt64(ValueNum argVN);
double GetConstantDouble(ValueNum argVN);
+ float GetConstantSingle(ValueNum argVN);
// Assumes that all the ValueNum arguments of each of these functions have been shown to represent constants.
// Assumes that "vnf" is a operator of the appropriate arity (unary for the first, binary for the second).
@@ -536,6 +537,17 @@ public:
// Returns true iff the VN represents an integeral constant.
bool IsVNInt32Constant(ValueNum vn);
+ struct ArrLenUnsignedBoundInfo
+ {
+ unsigned cmpOper;
+ ValueNum vnIdx;
+ ValueNum vnLen;
+
+ ArrLenUnsignedBoundInfo() : cmpOper(GT_NONE), vnIdx(NoVN), vnLen(NoVN)
+ {
+ }
+ };
+
struct ArrLenArithBoundInfo
{
// (vnArr.len - 1) > vnOp
@@ -606,6 +618,9 @@ public:
// If "vn" is constant bound, then populate the "info" fields for constVal, cmpOp, cmpOper.
void GetConstantBoundInfo(ValueNum vn, ConstantBoundInfo* info);
+ // If "vn" is of the form "(uint)var < (uint)a.len" (or equivalent) return true.
+ bool IsVNArrLenUnsignedBound(ValueNum vn, ArrLenUnsignedBoundInfo* info);
+
// If "vn" is of the form "var < a.len" or "a.len <= var" return true.
bool IsVNArrLenBound(ValueNum vn);
@@ -663,9 +678,13 @@ private:
__fallthrough;
case TYP_BYREF:
-#ifndef PLATFORM_UNIX
+
+#ifdef _MSC_VER
+
assert(&typeid(T) == &typeid(size_t)); // We represent ref/byref constants as size_t's.
-#endif // PLATFORM_UNIX
+
+#endif // _MSC_VER
+
__fallthrough;
case TYP_INT:
diff --git a/src/jit/valuenumfuncs.h b/src/jit/valuenumfuncs.h
index cb99507921..2711b4f056 100644
--- a/src/jit/valuenumfuncs.h
+++ b/src/jit/valuenumfuncs.h
@@ -34,6 +34,7 @@ ValueNumFuncDef(CastClass, 2, false, false, false) // Args: 0: Handle o
ValueNumFuncDef(IsInstanceOf, 2, false, false, false) // Args: 0: Handle of class being queried, 1: object being queried.
ValueNumFuncDef(ReadyToRunCastClass, 2, false, false, false) // Args: 0: Helper stub address, 1: object being cast.
ValueNumFuncDef(ReadyToRunIsInstanceOf, 2, false, false, false) // Args: 0: Helper stub address, 1: object being queried.
+ValueNumFuncDef(TypeHandleToRuntimeType, 1, false, false, false) // Args: 0: TypeHandle to translate
ValueNumFuncDef(LdElemA, 3, false, false, false) // Args: 0: array value; 1: index value; 2: type handle of element.